1 #include "Python.h"
2 #include "pycore_object.h"
3 #include "pycore_sysmodule.h" // _PySys_GetSizeOf()
4
5 #include <stddef.h> // offsetof()
6 #include "_iomodule.h"
7
8 /*[clinic input]
9 module _io
10 class _io.BytesIO "bytesio *" "clinic_state()->PyBytesIO_Type"
11 [clinic start generated code]*/
12 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=48ede2f330f847c3]*/
13
14 typedef struct {
15 PyObject_HEAD
16 PyObject *buf;
17 Py_ssize_t pos;
18 Py_ssize_t string_size;
19 PyObject *dict;
20 PyObject *weakreflist;
21 Py_ssize_t exports;
22 } bytesio;
23
24 typedef struct {
25 PyObject_HEAD
26 bytesio *source;
27 } bytesiobuf;
28
29 /* The bytesio object can be in three states:
30 * Py_REFCNT(buf) == 1, exports == 0.
31 * Py_REFCNT(buf) > 1. exports == 0,
32 first modification or export causes the internal buffer copying.
33 * exports > 0. Py_REFCNT(buf) == 1, any modifications are forbidden.
34 */
35
36 static int
check_closed(bytesio * self)37 check_closed(bytesio *self)
38 {
39 if (self->buf == NULL) {
40 PyErr_SetString(PyExc_ValueError, "I/O operation on closed file.");
41 return 1;
42 }
43 return 0;
44 }
45
46 static int
check_exports(bytesio * self)47 check_exports(bytesio *self)
48 {
49 if (self->exports > 0) {
50 PyErr_SetString(PyExc_BufferError,
51 "Existing exports of data: object cannot be re-sized");
52 return 1;
53 }
54 return 0;
55 }
56
57 #define CHECK_CLOSED(self) \
58 if (check_closed(self)) { \
59 return NULL; \
60 }
61
62 #define CHECK_EXPORTS(self) \
63 if (check_exports(self)) { \
64 return NULL; \
65 }
66
67 #define SHARED_BUF(self) (Py_REFCNT((self)->buf) > 1)
68
69
70 /* Internal routine to get a line from the buffer of a BytesIO
71 object. Returns the length between the current position to the
72 next newline character. */
73 static Py_ssize_t
scan_eol(bytesio * self,Py_ssize_t len)74 scan_eol(bytesio *self, Py_ssize_t len)
75 {
76 const char *start, *n;
77 Py_ssize_t maxlen;
78
79 assert(self->buf != NULL);
80 assert(self->pos >= 0);
81
82 if (self->pos >= self->string_size)
83 return 0;
84
85 /* Move to the end of the line, up to the end of the string, s. */
86 maxlen = self->string_size - self->pos;
87 if (len < 0 || len > maxlen)
88 len = maxlen;
89
90 if (len) {
91 start = PyBytes_AS_STRING(self->buf) + self->pos;
92 n = memchr(start, '\n', len);
93 if (n)
94 /* Get the length from the current position to the end of
95 the line. */
96 len = n - start + 1;
97 }
98 assert(len >= 0);
99 assert(self->pos < PY_SSIZE_T_MAX - len);
100
101 return len;
102 }
103
104 /* Internal routine for detaching the shared buffer of BytesIO objects.
105 The caller should ensure that the 'size' argument is non-negative and
106 not lesser than self->string_size. Returns 0 on success, -1 otherwise. */
107 static int
unshare_buffer(bytesio * self,size_t size)108 unshare_buffer(bytesio *self, size_t size)
109 {
110 PyObject *new_buf;
111 assert(SHARED_BUF(self));
112 assert(self->exports == 0);
113 assert(size >= (size_t)self->string_size);
114 new_buf = PyBytes_FromStringAndSize(NULL, size);
115 if (new_buf == NULL)
116 return -1;
117 memcpy(PyBytes_AS_STRING(new_buf), PyBytes_AS_STRING(self->buf),
118 self->string_size);
119 Py_SETREF(self->buf, new_buf);
120 return 0;
121 }
122
123 /* Internal routine for changing the size of the buffer of BytesIO objects.
124 The caller should ensure that the 'size' argument is non-negative. Returns
125 0 on success, -1 otherwise. */
126 static int
resize_buffer(bytesio * self,size_t size)127 resize_buffer(bytesio *self, size_t size)
128 {
129 assert(self->buf != NULL);
130 assert(self->exports == 0);
131
132 /* Here, unsigned types are used to avoid dealing with signed integer
133 overflow, which is undefined in C. */
134 size_t alloc = PyBytes_GET_SIZE(self->buf);
135
136 /* For simplicity, stay in the range of the signed type. Anyway, Python
137 doesn't allow strings to be longer than this. */
138 if (size > PY_SSIZE_T_MAX)
139 goto overflow;
140
141 if (size < alloc / 2) {
142 /* Major downsize; resize down to exact size. */
143 alloc = size + 1;
144 }
145 else if (size < alloc) {
146 /* Within allocated size; quick exit */
147 return 0;
148 }
149 else if (size <= alloc * 1.125) {
150 /* Moderate upsize; overallocate similar to list_resize() */
151 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
152 }
153 else {
154 /* Major upsize; resize up to exact size */
155 alloc = size + 1;
156 }
157
158 if (SHARED_BUF(self)) {
159 if (unshare_buffer(self, alloc) < 0)
160 return -1;
161 }
162 else {
163 if (_PyBytes_Resize(&self->buf, alloc) < 0)
164 return -1;
165 }
166
167 return 0;
168
169 overflow:
170 PyErr_SetString(PyExc_OverflowError,
171 "new buffer size too large");
172 return -1;
173 }
174
175 /* Internal routine for writing a string of bytes to the buffer of a BytesIO
176 object. Returns the number of bytes written, or -1 on error.
177 Inlining is disabled because it's significantly decreases performance
178 of writelines() in PGO build. */
179 Py_NO_INLINE static Py_ssize_t
write_bytes(bytesio * self,PyObject * b)180 write_bytes(bytesio *self, PyObject *b)
181 {
182 if (check_closed(self)) {
183 return -1;
184 }
185 if (check_exports(self)) {
186 return -1;
187 }
188
189 Py_buffer buf;
190 if (PyObject_GetBuffer(b, &buf, PyBUF_CONTIG_RO) < 0) {
191 return -1;
192 }
193 Py_ssize_t len = buf.len;
194 if (len == 0) {
195 goto done;
196 }
197
198 assert(self->pos >= 0);
199 size_t endpos = (size_t)self->pos + len;
200 if (endpos > (size_t)PyBytes_GET_SIZE(self->buf)) {
201 if (resize_buffer(self, endpos) < 0) {
202 len = -1;
203 goto done;
204 }
205 }
206 else if (SHARED_BUF(self)) {
207 if (unshare_buffer(self, Py_MAX(endpos, (size_t)self->string_size)) < 0) {
208 len = -1;
209 goto done;
210 }
211 }
212
213 if (self->pos > self->string_size) {
214 /* In case of overseek, pad with null bytes the buffer region between
215 the end of stream and the current position.
216
217 0 lo string_size hi
218 | |<---used--->|<----------available----------->|
219 | | <--to pad-->|<---to write---> |
220 0 buf position
221 */
222 memset(PyBytes_AS_STRING(self->buf) + self->string_size, '\0',
223 (self->pos - self->string_size) * sizeof(char));
224 }
225
226 /* Copy the data to the internal buffer, overwriting some of the existing
227 data if self->pos < self->string_size. */
228 memcpy(PyBytes_AS_STRING(self->buf) + self->pos, buf.buf, len);
229 self->pos = endpos;
230
231 /* Set the new length of the internal string if it has changed. */
232 if ((size_t)self->string_size < endpos) {
233 self->string_size = endpos;
234 }
235
236 done:
237 PyBuffer_Release(&buf);
238 return len;
239 }
240
241 static PyObject *
bytesio_get_closed(bytesio * self,void * Py_UNUSED (ignored))242 bytesio_get_closed(bytesio *self, void *Py_UNUSED(ignored))
243 {
244 if (self->buf == NULL) {
245 Py_RETURN_TRUE;
246 }
247 else {
248 Py_RETURN_FALSE;
249 }
250 }
251
252 /*[clinic input]
253 _io.BytesIO.readable
254
255 Returns True if the IO object can be read.
256 [clinic start generated code]*/
257
258 static PyObject *
_io_BytesIO_readable_impl(bytesio * self)259 _io_BytesIO_readable_impl(bytesio *self)
260 /*[clinic end generated code: output=4e93822ad5b62263 input=96c5d0cccfb29f5c]*/
261 {
262 CHECK_CLOSED(self);
263 Py_RETURN_TRUE;
264 }
265
266 /*[clinic input]
267 _io.BytesIO.writable
268
269 Returns True if the IO object can be written.
270 [clinic start generated code]*/
271
272 static PyObject *
_io_BytesIO_writable_impl(bytesio * self)273 _io_BytesIO_writable_impl(bytesio *self)
274 /*[clinic end generated code: output=64ff6a254b1150b8 input=700eed808277560a]*/
275 {
276 CHECK_CLOSED(self);
277 Py_RETURN_TRUE;
278 }
279
280 /*[clinic input]
281 _io.BytesIO.seekable
282
283 Returns True if the IO object can be seeked.
284 [clinic start generated code]*/
285
286 static PyObject *
_io_BytesIO_seekable_impl(bytesio * self)287 _io_BytesIO_seekable_impl(bytesio *self)
288 /*[clinic end generated code: output=6b417f46dcc09b56 input=9421f65627a344dd]*/
289 {
290 CHECK_CLOSED(self);
291 Py_RETURN_TRUE;
292 }
293
294 /*[clinic input]
295 _io.BytesIO.flush
296
297 Does nothing.
298 [clinic start generated code]*/
299
300 static PyObject *
_io_BytesIO_flush_impl(bytesio * self)301 _io_BytesIO_flush_impl(bytesio *self)
302 /*[clinic end generated code: output=187e3d781ca134a0 input=561ea490be4581a7]*/
303 {
304 CHECK_CLOSED(self);
305 Py_RETURN_NONE;
306 }
307
308 /*[clinic input]
309 _io.BytesIO.getbuffer
310
311 cls: defining_class
312 /
313
314 Get a read-write view over the contents of the BytesIO object.
315 [clinic start generated code]*/
316
317 static PyObject *
_io_BytesIO_getbuffer_impl(bytesio * self,PyTypeObject * cls)318 _io_BytesIO_getbuffer_impl(bytesio *self, PyTypeObject *cls)
319 /*[clinic end generated code: output=045091d7ce87fe4e input=0668fbb48f95dffa]*/
320 {
321 _PyIO_State *state = get_io_state_by_cls(cls);
322 PyTypeObject *type = state->PyBytesIOBuffer_Type;
323 bytesiobuf *buf;
324 PyObject *view;
325
326 CHECK_CLOSED(self);
327
328 buf = (bytesiobuf *) type->tp_alloc(type, 0);
329 if (buf == NULL)
330 return NULL;
331 buf->source = (bytesio*)Py_NewRef(self);
332 view = PyMemoryView_FromObject((PyObject *) buf);
333 Py_DECREF(buf);
334 return view;
335 }
336
337 /*[clinic input]
338 _io.BytesIO.getvalue
339
340 Retrieve the entire contents of the BytesIO object.
341 [clinic start generated code]*/
342
343 static PyObject *
_io_BytesIO_getvalue_impl(bytesio * self)344 _io_BytesIO_getvalue_impl(bytesio *self)
345 /*[clinic end generated code: output=b3f6a3233c8fd628 input=4b403ac0af3973ed]*/
346 {
347 CHECK_CLOSED(self);
348 if (self->string_size <= 1 || self->exports > 0)
349 return PyBytes_FromStringAndSize(PyBytes_AS_STRING(self->buf),
350 self->string_size);
351
352 if (self->string_size != PyBytes_GET_SIZE(self->buf)) {
353 if (SHARED_BUF(self)) {
354 if (unshare_buffer(self, self->string_size) < 0)
355 return NULL;
356 }
357 else {
358 if (_PyBytes_Resize(&self->buf, self->string_size) < 0)
359 return NULL;
360 }
361 }
362 return Py_NewRef(self->buf);
363 }
364
365 /*[clinic input]
366 _io.BytesIO.isatty
367
368 Always returns False.
369
370 BytesIO objects are not connected to a TTY-like device.
371 [clinic start generated code]*/
372
373 static PyObject *
_io_BytesIO_isatty_impl(bytesio * self)374 _io_BytesIO_isatty_impl(bytesio *self)
375 /*[clinic end generated code: output=df67712e669f6c8f input=6f97f0985d13f827]*/
376 {
377 CHECK_CLOSED(self);
378 Py_RETURN_FALSE;
379 }
380
381 /*[clinic input]
382 _io.BytesIO.tell
383
384 Current file position, an integer.
385 [clinic start generated code]*/
386
387 static PyObject *
_io_BytesIO_tell_impl(bytesio * self)388 _io_BytesIO_tell_impl(bytesio *self)
389 /*[clinic end generated code: output=b54b0f93cd0e5e1d input=b106adf099cb3657]*/
390 {
391 CHECK_CLOSED(self);
392 return PyLong_FromSsize_t(self->pos);
393 }
394
395 static PyObject *
read_bytes(bytesio * self,Py_ssize_t size)396 read_bytes(bytesio *self, Py_ssize_t size)
397 {
398 const char *output;
399
400 assert(self->buf != NULL);
401 assert(size <= self->string_size);
402 if (size > 1 &&
403 self->pos == 0 && size == PyBytes_GET_SIZE(self->buf) &&
404 self->exports == 0) {
405 self->pos += size;
406 return Py_NewRef(self->buf);
407 }
408
409 output = PyBytes_AS_STRING(self->buf) + self->pos;
410 self->pos += size;
411 return PyBytes_FromStringAndSize(output, size);
412 }
413
414 /*[clinic input]
415 _io.BytesIO.read
416 size: Py_ssize_t(accept={int, NoneType}) = -1
417 /
418
419 Read at most size bytes, returned as a bytes object.
420
421 If the size argument is negative, read until EOF is reached.
422 Return an empty bytes object at EOF.
423 [clinic start generated code]*/
424
425 static PyObject *
_io_BytesIO_read_impl(bytesio * self,Py_ssize_t size)426 _io_BytesIO_read_impl(bytesio *self, Py_ssize_t size)
427 /*[clinic end generated code: output=9cc025f21c75bdd2 input=74344a39f431c3d7]*/
428 {
429 Py_ssize_t n;
430
431 CHECK_CLOSED(self);
432
433 /* adjust invalid sizes */
434 n = self->string_size - self->pos;
435 if (size < 0 || size > n) {
436 size = n;
437 if (size < 0)
438 size = 0;
439 }
440
441 return read_bytes(self, size);
442 }
443
444
445 /*[clinic input]
446 _io.BytesIO.read1
447 size: Py_ssize_t(accept={int, NoneType}) = -1
448 /
449
450 Read at most size bytes, returned as a bytes object.
451
452 If the size argument is negative or omitted, read until EOF is reached.
453 Return an empty bytes object at EOF.
454 [clinic start generated code]*/
455
456 static PyObject *
_io_BytesIO_read1_impl(bytesio * self,Py_ssize_t size)457 _io_BytesIO_read1_impl(bytesio *self, Py_ssize_t size)
458 /*[clinic end generated code: output=d0f843285aa95f1c input=440a395bf9129ef5]*/
459 {
460 return _io_BytesIO_read_impl(self, size);
461 }
462
463 /*[clinic input]
464 _io.BytesIO.readline
465 size: Py_ssize_t(accept={int, NoneType}) = -1
466 /
467
468 Next line from the file, as a bytes object.
469
470 Retain newline. A non-negative size argument limits the maximum
471 number of bytes to return (an incomplete line may be returned then).
472 Return an empty bytes object at EOF.
473 [clinic start generated code]*/
474
475 static PyObject *
_io_BytesIO_readline_impl(bytesio * self,Py_ssize_t size)476 _io_BytesIO_readline_impl(bytesio *self, Py_ssize_t size)
477 /*[clinic end generated code: output=4bff3c251df8ffcd input=e7c3fbd1744e2783]*/
478 {
479 Py_ssize_t n;
480
481 CHECK_CLOSED(self);
482
483 n = scan_eol(self, size);
484
485 return read_bytes(self, n);
486 }
487
488 /*[clinic input]
489 _io.BytesIO.readlines
490 size as arg: object = None
491 /
492
493 List of bytes objects, each a line from the file.
494
495 Call readline() repeatedly and return a list of the lines so read.
496 The optional size argument, if given, is an approximate bound on the
497 total number of bytes in the lines returned.
498 [clinic start generated code]*/
499
500 static PyObject *
_io_BytesIO_readlines_impl(bytesio * self,PyObject * arg)501 _io_BytesIO_readlines_impl(bytesio *self, PyObject *arg)
502 /*[clinic end generated code: output=09b8e34c880808ff input=691aa1314f2c2a87]*/
503 {
504 Py_ssize_t maxsize, size, n;
505 PyObject *result, *line;
506 const char *output;
507
508 CHECK_CLOSED(self);
509
510 if (PyLong_Check(arg)) {
511 maxsize = PyLong_AsSsize_t(arg);
512 if (maxsize == -1 && PyErr_Occurred())
513 return NULL;
514 }
515 else if (arg == Py_None) {
516 /* No size limit, by default. */
517 maxsize = -1;
518 }
519 else {
520 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
521 Py_TYPE(arg)->tp_name);
522 return NULL;
523 }
524
525 size = 0;
526 result = PyList_New(0);
527 if (!result)
528 return NULL;
529
530 output = PyBytes_AS_STRING(self->buf) + self->pos;
531 while ((n = scan_eol(self, -1)) != 0) {
532 self->pos += n;
533 line = PyBytes_FromStringAndSize(output, n);
534 if (!line)
535 goto on_error;
536 if (PyList_Append(result, line) == -1) {
537 Py_DECREF(line);
538 goto on_error;
539 }
540 Py_DECREF(line);
541 size += n;
542 if (maxsize > 0 && size >= maxsize)
543 break;
544 output += n;
545 }
546 return result;
547
548 on_error:
549 Py_DECREF(result);
550 return NULL;
551 }
552
553 /*[clinic input]
554 _io.BytesIO.readinto
555 buffer: Py_buffer(accept={rwbuffer})
556 /
557
558 Read bytes into buffer.
559
560 Returns number of bytes read (0 for EOF), or None if the object
561 is set not to block and has no data to read.
562 [clinic start generated code]*/
563
564 static PyObject *
_io_BytesIO_readinto_impl(bytesio * self,Py_buffer * buffer)565 _io_BytesIO_readinto_impl(bytesio *self, Py_buffer *buffer)
566 /*[clinic end generated code: output=a5d407217dcf0639 input=1424d0fdce857919]*/
567 {
568 Py_ssize_t len, n;
569
570 CHECK_CLOSED(self);
571
572 /* adjust invalid sizes */
573 len = buffer->len;
574 n = self->string_size - self->pos;
575 if (len > n) {
576 len = n;
577 if (len < 0)
578 len = 0;
579 }
580
581 memcpy(buffer->buf, PyBytes_AS_STRING(self->buf) + self->pos, len);
582 assert(self->pos + len < PY_SSIZE_T_MAX);
583 assert(len >= 0);
584 self->pos += len;
585
586 return PyLong_FromSsize_t(len);
587 }
588
589 /*[clinic input]
590 _io.BytesIO.truncate
591 size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None
592 /
593
594 Truncate the file to at most size bytes.
595
596 Size defaults to the current file position, as returned by tell().
597 The current file position is unchanged. Returns the new size.
598 [clinic start generated code]*/
599
600 static PyObject *
_io_BytesIO_truncate_impl(bytesio * self,Py_ssize_t size)601 _io_BytesIO_truncate_impl(bytesio *self, Py_ssize_t size)
602 /*[clinic end generated code: output=9ad17650c15fa09b input=423759dd42d2f7c1]*/
603 {
604 CHECK_CLOSED(self);
605 CHECK_EXPORTS(self);
606
607 if (size < 0) {
608 PyErr_Format(PyExc_ValueError,
609 "negative size value %zd", size);
610 return NULL;
611 }
612
613 if (size < self->string_size) {
614 self->string_size = size;
615 if (resize_buffer(self, size) < 0)
616 return NULL;
617 }
618
619 return PyLong_FromSsize_t(size);
620 }
621
622 static PyObject *
bytesio_iternext(bytesio * self)623 bytesio_iternext(bytesio *self)
624 {
625 Py_ssize_t n;
626
627 CHECK_CLOSED(self);
628
629 n = scan_eol(self, -1);
630
631 if (n == 0)
632 return NULL;
633
634 return read_bytes(self, n);
635 }
636
637 /*[clinic input]
638 _io.BytesIO.seek
639 pos: Py_ssize_t
640 whence: int = 0
641 /
642
643 Change stream position.
644
645 Seek to byte offset pos relative to position indicated by whence:
646 0 Start of stream (the default). pos should be >= 0;
647 1 Current position - pos may be negative;
648 2 End of stream - pos usually negative.
649 Returns the new absolute position.
650 [clinic start generated code]*/
651
652 static PyObject *
_io_BytesIO_seek_impl(bytesio * self,Py_ssize_t pos,int whence)653 _io_BytesIO_seek_impl(bytesio *self, Py_ssize_t pos, int whence)
654 /*[clinic end generated code: output=c26204a68e9190e4 input=1e875e6ebc652948]*/
655 {
656 CHECK_CLOSED(self);
657
658 if (pos < 0 && whence == 0) {
659 PyErr_Format(PyExc_ValueError,
660 "negative seek value %zd", pos);
661 return NULL;
662 }
663
664 /* whence = 0: offset relative to beginning of the string.
665 whence = 1: offset relative to current position.
666 whence = 2: offset relative the end of the string. */
667 if (whence == 1) {
668 if (pos > PY_SSIZE_T_MAX - self->pos) {
669 PyErr_SetString(PyExc_OverflowError,
670 "new position too large");
671 return NULL;
672 }
673 pos += self->pos;
674 }
675 else if (whence == 2) {
676 if (pos > PY_SSIZE_T_MAX - self->string_size) {
677 PyErr_SetString(PyExc_OverflowError,
678 "new position too large");
679 return NULL;
680 }
681 pos += self->string_size;
682 }
683 else if (whence != 0) {
684 PyErr_Format(PyExc_ValueError,
685 "invalid whence (%i, should be 0, 1 or 2)", whence);
686 return NULL;
687 }
688
689 if (pos < 0)
690 pos = 0;
691 self->pos = pos;
692
693 return PyLong_FromSsize_t(self->pos);
694 }
695
696 /*[clinic input]
697 _io.BytesIO.write
698 b: object
699 /
700
701 Write bytes to file.
702
703 Return the number of bytes written.
704 [clinic start generated code]*/
705
706 static PyObject *
_io_BytesIO_write(bytesio * self,PyObject * b)707 _io_BytesIO_write(bytesio *self, PyObject *b)
708 /*[clinic end generated code: output=53316d99800a0b95 input=f5ec7c8c64ed720a]*/
709 {
710 Py_ssize_t n = write_bytes(self, b);
711 return n >= 0 ? PyLong_FromSsize_t(n) : NULL;
712 }
713
714 /*[clinic input]
715 _io.BytesIO.writelines
716 lines: object
717 /
718
719 Write lines to the file.
720
721 Note that newlines are not added. lines can be any iterable object
722 producing bytes-like objects. This is equivalent to calling write() for
723 each element.
724 [clinic start generated code]*/
725
726 static PyObject *
_io_BytesIO_writelines(bytesio * self,PyObject * lines)727 _io_BytesIO_writelines(bytesio *self, PyObject *lines)
728 /*[clinic end generated code: output=7f33aa3271c91752 input=e972539176fc8fc1]*/
729 {
730 PyObject *it, *item;
731
732 CHECK_CLOSED(self);
733
734 it = PyObject_GetIter(lines);
735 if (it == NULL)
736 return NULL;
737
738 while ((item = PyIter_Next(it)) != NULL) {
739 Py_ssize_t ret = write_bytes(self, item);
740 Py_DECREF(item);
741 if (ret < 0) {
742 Py_DECREF(it);
743 return NULL;
744 }
745 }
746 Py_DECREF(it);
747
748 /* See if PyIter_Next failed */
749 if (PyErr_Occurred())
750 return NULL;
751
752 Py_RETURN_NONE;
753 }
754
755 /*[clinic input]
756 _io.BytesIO.close
757
758 Disable all I/O operations.
759 [clinic start generated code]*/
760
761 static PyObject *
_io_BytesIO_close_impl(bytesio * self)762 _io_BytesIO_close_impl(bytesio *self)
763 /*[clinic end generated code: output=1471bb9411af84a0 input=37e1f55556e61f60]*/
764 {
765 CHECK_EXPORTS(self);
766 Py_CLEAR(self->buf);
767 Py_RETURN_NONE;
768 }
769
770 /* Pickling support.
771
772 Note that only pickle protocol 2 and onward are supported since we use
773 extended __reduce__ API of PEP 307 to make BytesIO instances picklable.
774
775 Providing support for protocol < 2 would require the __reduce_ex__ method
776 which is notably long-winded when defined properly.
777
778 For BytesIO, the implementation would similar to one coded for
779 object.__reduce_ex__, but slightly less general. To be more specific, we
780 could call bytesio_getstate directly and avoid checking for the presence of
781 a fallback __reduce__ method. However, we would still need a __newobj__
782 function to use the efficient instance representation of PEP 307.
783 */
784
785 static PyObject *
bytesio_getstate(bytesio * self,PyObject * Py_UNUSED (ignored))786 bytesio_getstate(bytesio *self, PyObject *Py_UNUSED(ignored))
787 {
788 PyObject *initvalue = _io_BytesIO_getvalue_impl(self);
789 PyObject *dict;
790 PyObject *state;
791
792 if (initvalue == NULL)
793 return NULL;
794 if (self->dict == NULL) {
795 dict = Py_NewRef(Py_None);
796 }
797 else {
798 dict = PyDict_Copy(self->dict);
799 if (dict == NULL) {
800 Py_DECREF(initvalue);
801 return NULL;
802 }
803 }
804
805 state = Py_BuildValue("(OnN)", initvalue, self->pos, dict);
806 Py_DECREF(initvalue);
807 return state;
808 }
809
810 static PyObject *
bytesio_setstate(bytesio * self,PyObject * state)811 bytesio_setstate(bytesio *self, PyObject *state)
812 {
813 PyObject *result;
814 PyObject *position_obj;
815 PyObject *dict;
816 Py_ssize_t pos;
817
818 assert(state != NULL);
819
820 /* We allow the state tuple to be longer than 3, because we may need
821 someday to extend the object's state without breaking
822 backward-compatibility. */
823 if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 3) {
824 PyErr_Format(PyExc_TypeError,
825 "%.200s.__setstate__ argument should be 3-tuple, got %.200s",
826 Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
827 return NULL;
828 }
829 CHECK_EXPORTS(self);
830 /* Reset the object to its default state. This is only needed to handle
831 the case of repeated calls to __setstate__. */
832 self->string_size = 0;
833 self->pos = 0;
834
835 /* Set the value of the internal buffer. If state[0] does not support the
836 buffer protocol, bytesio_write will raise the appropriate TypeError. */
837 result = _io_BytesIO_write(self, PyTuple_GET_ITEM(state, 0));
838 if (result == NULL)
839 return NULL;
840 Py_DECREF(result);
841
842 /* Set carefully the position value. Alternatively, we could use the seek
843 method instead of modifying self->pos directly to better protect the
844 object internal state against erroneous (or malicious) inputs. */
845 position_obj = PyTuple_GET_ITEM(state, 1);
846 if (!PyLong_Check(position_obj)) {
847 PyErr_Format(PyExc_TypeError,
848 "second item of state must be an integer, not %.200s",
849 Py_TYPE(position_obj)->tp_name);
850 return NULL;
851 }
852 pos = PyLong_AsSsize_t(position_obj);
853 if (pos == -1 && PyErr_Occurred())
854 return NULL;
855 if (pos < 0) {
856 PyErr_SetString(PyExc_ValueError,
857 "position value cannot be negative");
858 return NULL;
859 }
860 self->pos = pos;
861
862 /* Set the dictionary of the instance variables. */
863 dict = PyTuple_GET_ITEM(state, 2);
864 if (dict != Py_None) {
865 if (!PyDict_Check(dict)) {
866 PyErr_Format(PyExc_TypeError,
867 "third item of state should be a dict, got a %.200s",
868 Py_TYPE(dict)->tp_name);
869 return NULL;
870 }
871 if (self->dict) {
872 /* Alternatively, we could replace the internal dictionary
873 completely. However, it seems more practical to just update it. */
874 if (PyDict_Update(self->dict, dict) < 0)
875 return NULL;
876 }
877 else {
878 self->dict = Py_NewRef(dict);
879 }
880 }
881
882 Py_RETURN_NONE;
883 }
884
885 static void
bytesio_dealloc(bytesio * self)886 bytesio_dealloc(bytesio *self)
887 {
888 PyTypeObject *tp = Py_TYPE(self);
889 _PyObject_GC_UNTRACK(self);
890 if (self->exports > 0) {
891 PyErr_SetString(PyExc_SystemError,
892 "deallocated BytesIO object has exported buffers");
893 PyErr_Print();
894 }
895 Py_CLEAR(self->buf);
896 Py_CLEAR(self->dict);
897 if (self->weakreflist != NULL)
898 PyObject_ClearWeakRefs((PyObject *) self);
899 tp->tp_free(self);
900 Py_DECREF(tp);
901 }
902
903 static PyObject *
bytesio_new(PyTypeObject * type,PyObject * args,PyObject * kwds)904 bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
905 {
906 bytesio *self;
907
908 assert(type != NULL && type->tp_alloc != NULL);
909 self = (bytesio *)type->tp_alloc(type, 0);
910 if (self == NULL)
911 return NULL;
912
913 /* tp_alloc initializes all the fields to zero. So we don't have to
914 initialize them here. */
915
916 self->buf = PyBytes_FromStringAndSize(NULL, 0);
917 if (self->buf == NULL) {
918 Py_DECREF(self);
919 return PyErr_NoMemory();
920 }
921
922 return (PyObject *)self;
923 }
924
925 /*[clinic input]
926 _io.BytesIO.__init__
927 initial_bytes as initvalue: object(c_default="NULL") = b''
928
929 Buffered I/O implementation using an in-memory bytes buffer.
930 [clinic start generated code]*/
931
932 static int
_io_BytesIO___init___impl(bytesio * self,PyObject * initvalue)933 _io_BytesIO___init___impl(bytesio *self, PyObject *initvalue)
934 /*[clinic end generated code: output=65c0c51e24c5b621 input=aac7f31b67bf0fb6]*/
935 {
936 /* In case, __init__ is called multiple times. */
937 self->string_size = 0;
938 self->pos = 0;
939
940 if (self->exports > 0) {
941 PyErr_SetString(PyExc_BufferError,
942 "Existing exports of data: object cannot be re-sized");
943 return -1;
944 }
945 if (initvalue && initvalue != Py_None) {
946 if (PyBytes_CheckExact(initvalue)) {
947 Py_XSETREF(self->buf, Py_NewRef(initvalue));
948 self->string_size = PyBytes_GET_SIZE(initvalue);
949 }
950 else {
951 PyObject *res;
952 res = _io_BytesIO_write(self, initvalue);
953 if (res == NULL)
954 return -1;
955 Py_DECREF(res);
956 self->pos = 0;
957 }
958 }
959
960 return 0;
961 }
962
963 static PyObject *
bytesio_sizeof(bytesio * self,void * unused)964 bytesio_sizeof(bytesio *self, void *unused)
965 {
966 size_t res = _PyObject_SIZE(Py_TYPE(self));
967 if (self->buf && !SHARED_BUF(self)) {
968 size_t s = _PySys_GetSizeOf(self->buf);
969 if (s == (size_t)-1) {
970 return NULL;
971 }
972 res += s;
973 }
974 return PyLong_FromSize_t(res);
975 }
976
977 static int
bytesio_traverse(bytesio * self,visitproc visit,void * arg)978 bytesio_traverse(bytesio *self, visitproc visit, void *arg)
979 {
980 Py_VISIT(Py_TYPE(self));
981 Py_VISIT(self->dict);
982 Py_VISIT(self->buf);
983 return 0;
984 }
985
986 static int
bytesio_clear(bytesio * self)987 bytesio_clear(bytesio *self)
988 {
989 Py_CLEAR(self->dict);
990 if (self->exports == 0) {
991 Py_CLEAR(self->buf);
992 }
993 return 0;
994 }
995
996
997 #define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
998 #include "clinic/bytesio.c.h"
999 #undef clinic_state
1000
1001 static PyGetSetDef bytesio_getsetlist[] = {
1002 {"closed", (getter)bytesio_get_closed, NULL,
1003 "True if the file is closed."},
1004 {NULL}, /* sentinel */
1005 };
1006
1007 static struct PyMethodDef bytesio_methods[] = {
1008 _IO_BYTESIO_READABLE_METHODDEF
1009 _IO_BYTESIO_SEEKABLE_METHODDEF
1010 _IO_BYTESIO_WRITABLE_METHODDEF
1011 _IO_BYTESIO_CLOSE_METHODDEF
1012 _IO_BYTESIO_FLUSH_METHODDEF
1013 _IO_BYTESIO_ISATTY_METHODDEF
1014 _IO_BYTESIO_TELL_METHODDEF
1015 _IO_BYTESIO_WRITE_METHODDEF
1016 _IO_BYTESIO_WRITELINES_METHODDEF
1017 _IO_BYTESIO_READ1_METHODDEF
1018 _IO_BYTESIO_READINTO_METHODDEF
1019 _IO_BYTESIO_READLINE_METHODDEF
1020 _IO_BYTESIO_READLINES_METHODDEF
1021 _IO_BYTESIO_READ_METHODDEF
1022 _IO_BYTESIO_GETBUFFER_METHODDEF
1023 _IO_BYTESIO_GETVALUE_METHODDEF
1024 _IO_BYTESIO_SEEK_METHODDEF
1025 _IO_BYTESIO_TRUNCATE_METHODDEF
1026 {"__getstate__", (PyCFunction)bytesio_getstate, METH_NOARGS, NULL},
1027 {"__setstate__", (PyCFunction)bytesio_setstate, METH_O, NULL},
1028 {"__sizeof__", (PyCFunction)bytesio_sizeof, METH_NOARGS, NULL},
1029 {NULL, NULL} /* sentinel */
1030 };
1031
1032 static PyMemberDef bytesio_members[] = {
1033 {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(bytesio, weakreflist), Py_READONLY},
1034 {"__dictoffset__", Py_T_PYSSIZET, offsetof(bytesio, dict), Py_READONLY},
1035 {NULL}
1036 };
1037
1038 static PyType_Slot bytesio_slots[] = {
1039 {Py_tp_dealloc, bytesio_dealloc},
1040 {Py_tp_doc, (void *)_io_BytesIO___init____doc__},
1041 {Py_tp_traverse, bytesio_traverse},
1042 {Py_tp_clear, bytesio_clear},
1043 {Py_tp_iter, PyObject_SelfIter},
1044 {Py_tp_iternext, bytesio_iternext},
1045 {Py_tp_methods, bytesio_methods},
1046 {Py_tp_members, bytesio_members},
1047 {Py_tp_getset, bytesio_getsetlist},
1048 {Py_tp_init, _io_BytesIO___init__},
1049 {Py_tp_new, bytesio_new},
1050 {0, NULL},
1051 };
1052
1053 PyType_Spec bytesio_spec = {
1054 .name = "_io.BytesIO",
1055 .basicsize = sizeof(bytesio),
1056 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1057 Py_TPFLAGS_IMMUTABLETYPE),
1058 .slots = bytesio_slots,
1059 };
1060
1061 /*
1062 * Implementation of the small intermediate object used by getbuffer().
1063 * getbuffer() returns a memoryview over this object, which should make it
1064 * invisible from Python code.
1065 */
1066
1067 static int
bytesiobuf_getbuffer(bytesiobuf * obj,Py_buffer * view,int flags)1068 bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags)
1069 {
1070 bytesio *b = (bytesio *) obj->source;
1071
1072 if (view == NULL) {
1073 PyErr_SetString(PyExc_BufferError,
1074 "bytesiobuf_getbuffer: view==NULL argument is obsolete");
1075 return -1;
1076 }
1077 if (b->exports == 0 && SHARED_BUF(b)) {
1078 if (unshare_buffer(b, b->string_size) < 0)
1079 return -1;
1080 }
1081
1082 /* cannot fail if view != NULL and readonly == 0 */
1083 (void)PyBuffer_FillInfo(view, (PyObject*)obj,
1084 PyBytes_AS_STRING(b->buf), b->string_size,
1085 0, flags);
1086 b->exports++;
1087 return 0;
1088 }
1089
1090 static void
bytesiobuf_releasebuffer(bytesiobuf * obj,Py_buffer * view)1091 bytesiobuf_releasebuffer(bytesiobuf *obj, Py_buffer *view)
1092 {
1093 bytesio *b = (bytesio *) obj->source;
1094 b->exports--;
1095 }
1096
1097 static int
bytesiobuf_traverse(bytesiobuf * self,visitproc visit,void * arg)1098 bytesiobuf_traverse(bytesiobuf *self, visitproc visit, void *arg)
1099 {
1100 Py_VISIT(Py_TYPE(self));
1101 Py_VISIT(self->source);
1102 return 0;
1103 }
1104
1105 static void
bytesiobuf_dealloc(bytesiobuf * self)1106 bytesiobuf_dealloc(bytesiobuf *self)
1107 {
1108 PyTypeObject *tp = Py_TYPE(self);
1109 /* bpo-31095: UnTrack is needed before calling any callbacks */
1110 PyObject_GC_UnTrack(self);
1111 Py_CLEAR(self->source);
1112 tp->tp_free(self);
1113 Py_DECREF(tp);
1114 }
1115
1116 static PyType_Slot bytesiobuf_slots[] = {
1117 {Py_tp_dealloc, bytesiobuf_dealloc},
1118 {Py_tp_traverse, bytesiobuf_traverse},
1119
1120 // Buffer protocol
1121 {Py_bf_getbuffer, bytesiobuf_getbuffer},
1122 {Py_bf_releasebuffer, bytesiobuf_releasebuffer},
1123 {0, NULL},
1124 };
1125
1126 PyType_Spec bytesiobuf_spec = {
1127 .name = "_io._BytesIOBuffer",
1128 .basicsize = sizeof(bytesiobuf),
1129 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
1130 Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1131 .slots = bytesiobuf_slots,
1132 };
1133