• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include "pycore_object.h"
3 #include "pycore_sysmodule.h"     // _PySys_GetSizeOf()
4 
5 #include <stddef.h>               // offsetof()
6 #include "_iomodule.h"
7 
8 /*[clinic input]
9 module _io
10 class _io.BytesIO "bytesio *" "clinic_state()->PyBytesIO_Type"
11 [clinic start generated code]*/
12 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=48ede2f330f847c3]*/
13 
14 typedef struct {
15     PyObject_HEAD
16     PyObject *buf;
17     Py_ssize_t pos;
18     Py_ssize_t string_size;
19     PyObject *dict;
20     PyObject *weakreflist;
21     Py_ssize_t exports;
22 } bytesio;
23 
24 typedef struct {
25     PyObject_HEAD
26     bytesio *source;
27 } bytesiobuf;
28 
29 /* The bytesio object can be in three states:
30   * Py_REFCNT(buf) == 1, exports == 0.
31   * Py_REFCNT(buf) > 1.  exports == 0,
32     first modification or export causes the internal buffer copying.
33   * exports > 0.  Py_REFCNT(buf) == 1, any modifications are forbidden.
34 */
35 
36 static int
check_closed(bytesio * self)37 check_closed(bytesio *self)
38 {
39     if (self->buf == NULL) {
40         PyErr_SetString(PyExc_ValueError, "I/O operation on closed file.");
41         return 1;
42     }
43     return 0;
44 }
45 
46 static int
check_exports(bytesio * self)47 check_exports(bytesio *self)
48 {
49     if (self->exports > 0) {
50         PyErr_SetString(PyExc_BufferError,
51                         "Existing exports of data: object cannot be re-sized");
52         return 1;
53     }
54     return 0;
55 }
56 
57 #define CHECK_CLOSED(self)                                  \
58     if (check_closed(self)) {                               \
59         return NULL;                                        \
60     }
61 
62 #define CHECK_EXPORTS(self) \
63     if (check_exports(self)) { \
64         return NULL; \
65     }
66 
67 #define SHARED_BUF(self) (Py_REFCNT((self)->buf) > 1)
68 
69 
70 /* Internal routine to get a line from the buffer of a BytesIO
71    object. Returns the length between the current position to the
72    next newline character. */
73 static Py_ssize_t
scan_eol(bytesio * self,Py_ssize_t len)74 scan_eol(bytesio *self, Py_ssize_t len)
75 {
76     const char *start, *n;
77     Py_ssize_t maxlen;
78 
79     assert(self->buf != NULL);
80     assert(self->pos >= 0);
81 
82     if (self->pos >= self->string_size)
83         return 0;
84 
85     /* Move to the end of the line, up to the end of the string, s. */
86     maxlen = self->string_size - self->pos;
87     if (len < 0 || len > maxlen)
88         len = maxlen;
89 
90     if (len) {
91         start = PyBytes_AS_STRING(self->buf) + self->pos;
92         n = memchr(start, '\n', len);
93         if (n)
94             /* Get the length from the current position to the end of
95                the line. */
96             len = n - start + 1;
97     }
98     assert(len >= 0);
99     assert(self->pos < PY_SSIZE_T_MAX - len);
100 
101     return len;
102 }
103 
104 /* Internal routine for detaching the shared buffer of BytesIO objects.
105    The caller should ensure that the 'size' argument is non-negative and
106    not lesser than self->string_size.  Returns 0 on success, -1 otherwise. */
107 static int
unshare_buffer(bytesio * self,size_t size)108 unshare_buffer(bytesio *self, size_t size)
109 {
110     PyObject *new_buf;
111     assert(SHARED_BUF(self));
112     assert(self->exports == 0);
113     assert(size >= (size_t)self->string_size);
114     new_buf = PyBytes_FromStringAndSize(NULL, size);
115     if (new_buf == NULL)
116         return -1;
117     memcpy(PyBytes_AS_STRING(new_buf), PyBytes_AS_STRING(self->buf),
118            self->string_size);
119     Py_SETREF(self->buf, new_buf);
120     return 0;
121 }
122 
123 /* Internal routine for changing the size of the buffer of BytesIO objects.
124    The caller should ensure that the 'size' argument is non-negative.  Returns
125    0 on success, -1 otherwise. */
126 static int
resize_buffer(bytesio * self,size_t size)127 resize_buffer(bytesio *self, size_t size)
128 {
129     assert(self->buf != NULL);
130     assert(self->exports == 0);
131 
132     /* Here, unsigned types are used to avoid dealing with signed integer
133        overflow, which is undefined in C. */
134     size_t alloc = PyBytes_GET_SIZE(self->buf);
135 
136     /* For simplicity, stay in the range of the signed type. Anyway, Python
137        doesn't allow strings to be longer than this. */
138     if (size > PY_SSIZE_T_MAX)
139         goto overflow;
140 
141     if (size < alloc / 2) {
142         /* Major downsize; resize down to exact size. */
143         alloc = size + 1;
144     }
145     else if (size < alloc) {
146         /* Within allocated size; quick exit */
147         return 0;
148     }
149     else if (size <= alloc * 1.125) {
150         /* Moderate upsize; overallocate similar to list_resize() */
151         alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
152     }
153     else {
154         /* Major upsize; resize up to exact size */
155         alloc = size + 1;
156     }
157 
158     if (SHARED_BUF(self)) {
159         if (unshare_buffer(self, alloc) < 0)
160             return -1;
161     }
162     else {
163         if (_PyBytes_Resize(&self->buf, alloc) < 0)
164             return -1;
165     }
166 
167     return 0;
168 
169   overflow:
170     PyErr_SetString(PyExc_OverflowError,
171                     "new buffer size too large");
172     return -1;
173 }
174 
175 /* Internal routine for writing a string of bytes to the buffer of a BytesIO
176    object. Returns the number of bytes written, or -1 on error.
177    Inlining is disabled because it's significantly decreases performance
178    of writelines() in PGO build. */
179 Py_NO_INLINE static Py_ssize_t
write_bytes(bytesio * self,PyObject * b)180 write_bytes(bytesio *self, PyObject *b)
181 {
182     if (check_closed(self)) {
183         return -1;
184     }
185     if (check_exports(self)) {
186         return -1;
187     }
188 
189     Py_buffer buf;
190     if (PyObject_GetBuffer(b, &buf, PyBUF_CONTIG_RO) < 0) {
191         return -1;
192     }
193     Py_ssize_t len = buf.len;
194     if (len == 0) {
195         goto done;
196     }
197 
198     assert(self->pos >= 0);
199     size_t endpos = (size_t)self->pos + len;
200     if (endpos > (size_t)PyBytes_GET_SIZE(self->buf)) {
201         if (resize_buffer(self, endpos) < 0) {
202             len = -1;
203             goto done;
204         }
205     }
206     else if (SHARED_BUF(self)) {
207         if (unshare_buffer(self, Py_MAX(endpos, (size_t)self->string_size)) < 0) {
208             len = -1;
209             goto done;
210         }
211     }
212 
213     if (self->pos > self->string_size) {
214         /* In case of overseek, pad with null bytes the buffer region between
215            the end of stream and the current position.
216 
217           0   lo      string_size                           hi
218           |   |<---used--->|<----------available----------->|
219           |   |            <--to pad-->|<---to write--->    |
220           0   buf                   position
221         */
222         memset(PyBytes_AS_STRING(self->buf) + self->string_size, '\0',
223                (self->pos - self->string_size) * sizeof(char));
224     }
225 
226     /* Copy the data to the internal buffer, overwriting some of the existing
227        data if self->pos < self->string_size. */
228     memcpy(PyBytes_AS_STRING(self->buf) + self->pos, buf.buf, len);
229     self->pos = endpos;
230 
231     /* Set the new length of the internal string if it has changed. */
232     if ((size_t)self->string_size < endpos) {
233         self->string_size = endpos;
234     }
235 
236   done:
237     PyBuffer_Release(&buf);
238     return len;
239 }
240 
241 static PyObject *
bytesio_get_closed(bytesio * self,void * Py_UNUSED (ignored))242 bytesio_get_closed(bytesio *self, void *Py_UNUSED(ignored))
243 {
244     if (self->buf == NULL) {
245         Py_RETURN_TRUE;
246     }
247     else {
248         Py_RETURN_FALSE;
249     }
250 }
251 
252 /*[clinic input]
253 _io.BytesIO.readable
254 
255 Returns True if the IO object can be read.
256 [clinic start generated code]*/
257 
258 static PyObject *
_io_BytesIO_readable_impl(bytesio * self)259 _io_BytesIO_readable_impl(bytesio *self)
260 /*[clinic end generated code: output=4e93822ad5b62263 input=96c5d0cccfb29f5c]*/
261 {
262     CHECK_CLOSED(self);
263     Py_RETURN_TRUE;
264 }
265 
266 /*[clinic input]
267 _io.BytesIO.writable
268 
269 Returns True if the IO object can be written.
270 [clinic start generated code]*/
271 
272 static PyObject *
_io_BytesIO_writable_impl(bytesio * self)273 _io_BytesIO_writable_impl(bytesio *self)
274 /*[clinic end generated code: output=64ff6a254b1150b8 input=700eed808277560a]*/
275 {
276     CHECK_CLOSED(self);
277     Py_RETURN_TRUE;
278 }
279 
280 /*[clinic input]
281 _io.BytesIO.seekable
282 
283 Returns True if the IO object can be seeked.
284 [clinic start generated code]*/
285 
286 static PyObject *
_io_BytesIO_seekable_impl(bytesio * self)287 _io_BytesIO_seekable_impl(bytesio *self)
288 /*[clinic end generated code: output=6b417f46dcc09b56 input=9421f65627a344dd]*/
289 {
290     CHECK_CLOSED(self);
291     Py_RETURN_TRUE;
292 }
293 
294 /*[clinic input]
295 _io.BytesIO.flush
296 
297 Does nothing.
298 [clinic start generated code]*/
299 
300 static PyObject *
_io_BytesIO_flush_impl(bytesio * self)301 _io_BytesIO_flush_impl(bytesio *self)
302 /*[clinic end generated code: output=187e3d781ca134a0 input=561ea490be4581a7]*/
303 {
304     CHECK_CLOSED(self);
305     Py_RETURN_NONE;
306 }
307 
308 /*[clinic input]
309 _io.BytesIO.getbuffer
310 
311     cls: defining_class
312     /
313 
314 Get a read-write view over the contents of the BytesIO object.
315 [clinic start generated code]*/
316 
317 static PyObject *
_io_BytesIO_getbuffer_impl(bytesio * self,PyTypeObject * cls)318 _io_BytesIO_getbuffer_impl(bytesio *self, PyTypeObject *cls)
319 /*[clinic end generated code: output=045091d7ce87fe4e input=0668fbb48f95dffa]*/
320 {
321     _PyIO_State *state = get_io_state_by_cls(cls);
322     PyTypeObject *type = state->PyBytesIOBuffer_Type;
323     bytesiobuf *buf;
324     PyObject *view;
325 
326     CHECK_CLOSED(self);
327 
328     buf = (bytesiobuf *) type->tp_alloc(type, 0);
329     if (buf == NULL)
330         return NULL;
331     buf->source = (bytesio*)Py_NewRef(self);
332     view = PyMemoryView_FromObject((PyObject *) buf);
333     Py_DECREF(buf);
334     return view;
335 }
336 
337 /*[clinic input]
338 _io.BytesIO.getvalue
339 
340 Retrieve the entire contents of the BytesIO object.
341 [clinic start generated code]*/
342 
343 static PyObject *
_io_BytesIO_getvalue_impl(bytesio * self)344 _io_BytesIO_getvalue_impl(bytesio *self)
345 /*[clinic end generated code: output=b3f6a3233c8fd628 input=4b403ac0af3973ed]*/
346 {
347     CHECK_CLOSED(self);
348     if (self->string_size <= 1 || self->exports > 0)
349         return PyBytes_FromStringAndSize(PyBytes_AS_STRING(self->buf),
350                                          self->string_size);
351 
352     if (self->string_size != PyBytes_GET_SIZE(self->buf)) {
353         if (SHARED_BUF(self)) {
354             if (unshare_buffer(self, self->string_size) < 0)
355                 return NULL;
356         }
357         else {
358             if (_PyBytes_Resize(&self->buf, self->string_size) < 0)
359                 return NULL;
360         }
361     }
362     return Py_NewRef(self->buf);
363 }
364 
365 /*[clinic input]
366 _io.BytesIO.isatty
367 
368 Always returns False.
369 
370 BytesIO objects are not connected to a TTY-like device.
371 [clinic start generated code]*/
372 
373 static PyObject *
_io_BytesIO_isatty_impl(bytesio * self)374 _io_BytesIO_isatty_impl(bytesio *self)
375 /*[clinic end generated code: output=df67712e669f6c8f input=6f97f0985d13f827]*/
376 {
377     CHECK_CLOSED(self);
378     Py_RETURN_FALSE;
379 }
380 
381 /*[clinic input]
382 _io.BytesIO.tell
383 
384 Current file position, an integer.
385 [clinic start generated code]*/
386 
387 static PyObject *
_io_BytesIO_tell_impl(bytesio * self)388 _io_BytesIO_tell_impl(bytesio *self)
389 /*[clinic end generated code: output=b54b0f93cd0e5e1d input=b106adf099cb3657]*/
390 {
391     CHECK_CLOSED(self);
392     return PyLong_FromSsize_t(self->pos);
393 }
394 
395 static PyObject *
read_bytes(bytesio * self,Py_ssize_t size)396 read_bytes(bytesio *self, Py_ssize_t size)
397 {
398     const char *output;
399 
400     assert(self->buf != NULL);
401     assert(size <= self->string_size);
402     if (size > 1 &&
403         self->pos == 0 && size == PyBytes_GET_SIZE(self->buf) &&
404         self->exports == 0) {
405         self->pos += size;
406         return Py_NewRef(self->buf);
407     }
408 
409     output = PyBytes_AS_STRING(self->buf) + self->pos;
410     self->pos += size;
411     return PyBytes_FromStringAndSize(output, size);
412 }
413 
414 /*[clinic input]
415 _io.BytesIO.read
416     size: Py_ssize_t(accept={int, NoneType}) = -1
417     /
418 
419 Read at most size bytes, returned as a bytes object.
420 
421 If the size argument is negative, read until EOF is reached.
422 Return an empty bytes object at EOF.
423 [clinic start generated code]*/
424 
425 static PyObject *
_io_BytesIO_read_impl(bytesio * self,Py_ssize_t size)426 _io_BytesIO_read_impl(bytesio *self, Py_ssize_t size)
427 /*[clinic end generated code: output=9cc025f21c75bdd2 input=74344a39f431c3d7]*/
428 {
429     Py_ssize_t n;
430 
431     CHECK_CLOSED(self);
432 
433     /* adjust invalid sizes */
434     n = self->string_size - self->pos;
435     if (size < 0 || size > n) {
436         size = n;
437         if (size < 0)
438             size = 0;
439     }
440 
441     return read_bytes(self, size);
442 }
443 
444 
445 /*[clinic input]
446 _io.BytesIO.read1
447     size: Py_ssize_t(accept={int, NoneType}) = -1
448     /
449 
450 Read at most size bytes, returned as a bytes object.
451 
452 If the size argument is negative or omitted, read until EOF is reached.
453 Return an empty bytes object at EOF.
454 [clinic start generated code]*/
455 
456 static PyObject *
_io_BytesIO_read1_impl(bytesio * self,Py_ssize_t size)457 _io_BytesIO_read1_impl(bytesio *self, Py_ssize_t size)
458 /*[clinic end generated code: output=d0f843285aa95f1c input=440a395bf9129ef5]*/
459 {
460     return _io_BytesIO_read_impl(self, size);
461 }
462 
463 /*[clinic input]
464 _io.BytesIO.readline
465     size: Py_ssize_t(accept={int, NoneType}) = -1
466     /
467 
468 Next line from the file, as a bytes object.
469 
470 Retain newline.  A non-negative size argument limits the maximum
471 number of bytes to return (an incomplete line may be returned then).
472 Return an empty bytes object at EOF.
473 [clinic start generated code]*/
474 
475 static PyObject *
_io_BytesIO_readline_impl(bytesio * self,Py_ssize_t size)476 _io_BytesIO_readline_impl(bytesio *self, Py_ssize_t size)
477 /*[clinic end generated code: output=4bff3c251df8ffcd input=e7c3fbd1744e2783]*/
478 {
479     Py_ssize_t n;
480 
481     CHECK_CLOSED(self);
482 
483     n = scan_eol(self, size);
484 
485     return read_bytes(self, n);
486 }
487 
488 /*[clinic input]
489 _io.BytesIO.readlines
490     size as arg: object = None
491     /
492 
493 List of bytes objects, each a line from the file.
494 
495 Call readline() repeatedly and return a list of the lines so read.
496 The optional size argument, if given, is an approximate bound on the
497 total number of bytes in the lines returned.
498 [clinic start generated code]*/
499 
500 static PyObject *
_io_BytesIO_readlines_impl(bytesio * self,PyObject * arg)501 _io_BytesIO_readlines_impl(bytesio *self, PyObject *arg)
502 /*[clinic end generated code: output=09b8e34c880808ff input=691aa1314f2c2a87]*/
503 {
504     Py_ssize_t maxsize, size, n;
505     PyObject *result, *line;
506     const char *output;
507 
508     CHECK_CLOSED(self);
509 
510     if (PyLong_Check(arg)) {
511         maxsize = PyLong_AsSsize_t(arg);
512         if (maxsize == -1 && PyErr_Occurred())
513             return NULL;
514     }
515     else if (arg == Py_None) {
516         /* No size limit, by default. */
517         maxsize = -1;
518     }
519     else {
520         PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
521                      Py_TYPE(arg)->tp_name);
522         return NULL;
523     }
524 
525     size = 0;
526     result = PyList_New(0);
527     if (!result)
528         return NULL;
529 
530     output = PyBytes_AS_STRING(self->buf) + self->pos;
531     while ((n = scan_eol(self, -1)) != 0) {
532         self->pos += n;
533         line = PyBytes_FromStringAndSize(output, n);
534         if (!line)
535             goto on_error;
536         if (PyList_Append(result, line) == -1) {
537             Py_DECREF(line);
538             goto on_error;
539         }
540         Py_DECREF(line);
541         size += n;
542         if (maxsize > 0 && size >= maxsize)
543             break;
544         output += n;
545     }
546     return result;
547 
548   on_error:
549     Py_DECREF(result);
550     return NULL;
551 }
552 
553 /*[clinic input]
554 _io.BytesIO.readinto
555     buffer: Py_buffer(accept={rwbuffer})
556     /
557 
558 Read bytes into buffer.
559 
560 Returns number of bytes read (0 for EOF), or None if the object
561 is set not to block and has no data to read.
562 [clinic start generated code]*/
563 
564 static PyObject *
_io_BytesIO_readinto_impl(bytesio * self,Py_buffer * buffer)565 _io_BytesIO_readinto_impl(bytesio *self, Py_buffer *buffer)
566 /*[clinic end generated code: output=a5d407217dcf0639 input=1424d0fdce857919]*/
567 {
568     Py_ssize_t len, n;
569 
570     CHECK_CLOSED(self);
571 
572     /* adjust invalid sizes */
573     len = buffer->len;
574     n = self->string_size - self->pos;
575     if (len > n) {
576         len = n;
577         if (len < 0)
578             len = 0;
579     }
580 
581     memcpy(buffer->buf, PyBytes_AS_STRING(self->buf) + self->pos, len);
582     assert(self->pos + len < PY_SSIZE_T_MAX);
583     assert(len >= 0);
584     self->pos += len;
585 
586     return PyLong_FromSsize_t(len);
587 }
588 
589 /*[clinic input]
590 _io.BytesIO.truncate
591     size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None
592     /
593 
594 Truncate the file to at most size bytes.
595 
596 Size defaults to the current file position, as returned by tell().
597 The current file position is unchanged.  Returns the new size.
598 [clinic start generated code]*/
599 
600 static PyObject *
_io_BytesIO_truncate_impl(bytesio * self,Py_ssize_t size)601 _io_BytesIO_truncate_impl(bytesio *self, Py_ssize_t size)
602 /*[clinic end generated code: output=9ad17650c15fa09b input=423759dd42d2f7c1]*/
603 {
604     CHECK_CLOSED(self);
605     CHECK_EXPORTS(self);
606 
607     if (size < 0) {
608         PyErr_Format(PyExc_ValueError,
609                      "negative size value %zd", size);
610         return NULL;
611     }
612 
613     if (size < self->string_size) {
614         self->string_size = size;
615         if (resize_buffer(self, size) < 0)
616             return NULL;
617     }
618 
619     return PyLong_FromSsize_t(size);
620 }
621 
622 static PyObject *
bytesio_iternext(bytesio * self)623 bytesio_iternext(bytesio *self)
624 {
625     Py_ssize_t n;
626 
627     CHECK_CLOSED(self);
628 
629     n = scan_eol(self, -1);
630 
631     if (n == 0)
632         return NULL;
633 
634     return read_bytes(self, n);
635 }
636 
637 /*[clinic input]
638 _io.BytesIO.seek
639     pos: Py_ssize_t
640     whence: int = 0
641     /
642 
643 Change stream position.
644 
645 Seek to byte offset pos relative to position indicated by whence:
646      0  Start of stream (the default).  pos should be >= 0;
647      1  Current position - pos may be negative;
648      2  End of stream - pos usually negative.
649 Returns the new absolute position.
650 [clinic start generated code]*/
651 
652 static PyObject *
_io_BytesIO_seek_impl(bytesio * self,Py_ssize_t pos,int whence)653 _io_BytesIO_seek_impl(bytesio *self, Py_ssize_t pos, int whence)
654 /*[clinic end generated code: output=c26204a68e9190e4 input=1e875e6ebc652948]*/
655 {
656     CHECK_CLOSED(self);
657 
658     if (pos < 0 && whence == 0) {
659         PyErr_Format(PyExc_ValueError,
660                      "negative seek value %zd", pos);
661         return NULL;
662     }
663 
664     /* whence = 0: offset relative to beginning of the string.
665        whence = 1: offset relative to current position.
666        whence = 2: offset relative the end of the string. */
667     if (whence == 1) {
668         if (pos > PY_SSIZE_T_MAX - self->pos) {
669             PyErr_SetString(PyExc_OverflowError,
670                             "new position too large");
671             return NULL;
672         }
673         pos += self->pos;
674     }
675     else if (whence == 2) {
676         if (pos > PY_SSIZE_T_MAX - self->string_size) {
677             PyErr_SetString(PyExc_OverflowError,
678                             "new position too large");
679             return NULL;
680         }
681         pos += self->string_size;
682     }
683     else if (whence != 0) {
684         PyErr_Format(PyExc_ValueError,
685                      "invalid whence (%i, should be 0, 1 or 2)", whence);
686         return NULL;
687     }
688 
689     if (pos < 0)
690         pos = 0;
691     self->pos = pos;
692 
693     return PyLong_FromSsize_t(self->pos);
694 }
695 
696 /*[clinic input]
697 _io.BytesIO.write
698     b: object
699     /
700 
701 Write bytes to file.
702 
703 Return the number of bytes written.
704 [clinic start generated code]*/
705 
706 static PyObject *
_io_BytesIO_write(bytesio * self,PyObject * b)707 _io_BytesIO_write(bytesio *self, PyObject *b)
708 /*[clinic end generated code: output=53316d99800a0b95 input=f5ec7c8c64ed720a]*/
709 {
710     Py_ssize_t n = write_bytes(self, b);
711     return n >= 0 ? PyLong_FromSsize_t(n) : NULL;
712 }
713 
714 /*[clinic input]
715 _io.BytesIO.writelines
716     lines: object
717     /
718 
719 Write lines to the file.
720 
721 Note that newlines are not added.  lines can be any iterable object
722 producing bytes-like objects. This is equivalent to calling write() for
723 each element.
724 [clinic start generated code]*/
725 
726 static PyObject *
_io_BytesIO_writelines(bytesio * self,PyObject * lines)727 _io_BytesIO_writelines(bytesio *self, PyObject *lines)
728 /*[clinic end generated code: output=7f33aa3271c91752 input=e972539176fc8fc1]*/
729 {
730     PyObject *it, *item;
731 
732     CHECK_CLOSED(self);
733 
734     it = PyObject_GetIter(lines);
735     if (it == NULL)
736         return NULL;
737 
738     while ((item = PyIter_Next(it)) != NULL) {
739         Py_ssize_t ret = write_bytes(self, item);
740         Py_DECREF(item);
741         if (ret < 0) {
742             Py_DECREF(it);
743             return NULL;
744         }
745     }
746     Py_DECREF(it);
747 
748     /* See if PyIter_Next failed */
749     if (PyErr_Occurred())
750         return NULL;
751 
752     Py_RETURN_NONE;
753 }
754 
755 /*[clinic input]
756 _io.BytesIO.close
757 
758 Disable all I/O operations.
759 [clinic start generated code]*/
760 
761 static PyObject *
_io_BytesIO_close_impl(bytesio * self)762 _io_BytesIO_close_impl(bytesio *self)
763 /*[clinic end generated code: output=1471bb9411af84a0 input=37e1f55556e61f60]*/
764 {
765     CHECK_EXPORTS(self);
766     Py_CLEAR(self->buf);
767     Py_RETURN_NONE;
768 }
769 
770 /* Pickling support.
771 
772    Note that only pickle protocol 2 and onward are supported since we use
773    extended __reduce__ API of PEP 307 to make BytesIO instances picklable.
774 
775    Providing support for protocol < 2 would require the __reduce_ex__ method
776    which is notably long-winded when defined properly.
777 
778    For BytesIO, the implementation would similar to one coded for
779    object.__reduce_ex__, but slightly less general. To be more specific, we
780    could call bytesio_getstate directly and avoid checking for the presence of
781    a fallback __reduce__ method. However, we would still need a __newobj__
782    function to use the efficient instance representation of PEP 307.
783  */
784 
785 static PyObject *
bytesio_getstate(bytesio * self,PyObject * Py_UNUSED (ignored))786 bytesio_getstate(bytesio *self, PyObject *Py_UNUSED(ignored))
787 {
788     PyObject *initvalue = _io_BytesIO_getvalue_impl(self);
789     PyObject *dict;
790     PyObject *state;
791 
792     if (initvalue == NULL)
793         return NULL;
794     if (self->dict == NULL) {
795         dict = Py_NewRef(Py_None);
796     }
797     else {
798         dict = PyDict_Copy(self->dict);
799         if (dict == NULL) {
800             Py_DECREF(initvalue);
801             return NULL;
802         }
803     }
804 
805     state = Py_BuildValue("(OnN)", initvalue, self->pos, dict);
806     Py_DECREF(initvalue);
807     return state;
808 }
809 
810 static PyObject *
bytesio_setstate(bytesio * self,PyObject * state)811 bytesio_setstate(bytesio *self, PyObject *state)
812 {
813     PyObject *result;
814     PyObject *position_obj;
815     PyObject *dict;
816     Py_ssize_t pos;
817 
818     assert(state != NULL);
819 
820     /* We allow the state tuple to be longer than 3, because we may need
821        someday to extend the object's state without breaking
822        backward-compatibility. */
823     if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 3) {
824         PyErr_Format(PyExc_TypeError,
825                      "%.200s.__setstate__ argument should be 3-tuple, got %.200s",
826                      Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
827         return NULL;
828     }
829     CHECK_EXPORTS(self);
830     /* Reset the object to its default state. This is only needed to handle
831        the case of repeated calls to __setstate__. */
832     self->string_size = 0;
833     self->pos = 0;
834 
835     /* Set the value of the internal buffer. If state[0] does not support the
836        buffer protocol, bytesio_write will raise the appropriate TypeError. */
837     result = _io_BytesIO_write(self, PyTuple_GET_ITEM(state, 0));
838     if (result == NULL)
839         return NULL;
840     Py_DECREF(result);
841 
842     /* Set carefully the position value. Alternatively, we could use the seek
843        method instead of modifying self->pos directly to better protect the
844        object internal state against erroneous (or malicious) inputs. */
845     position_obj = PyTuple_GET_ITEM(state, 1);
846     if (!PyLong_Check(position_obj)) {
847         PyErr_Format(PyExc_TypeError,
848                      "second item of state must be an integer, not %.200s",
849                      Py_TYPE(position_obj)->tp_name);
850         return NULL;
851     }
852     pos = PyLong_AsSsize_t(position_obj);
853     if (pos == -1 && PyErr_Occurred())
854         return NULL;
855     if (pos < 0) {
856         PyErr_SetString(PyExc_ValueError,
857                         "position value cannot be negative");
858         return NULL;
859     }
860     self->pos = pos;
861 
862     /* Set the dictionary of the instance variables. */
863     dict = PyTuple_GET_ITEM(state, 2);
864     if (dict != Py_None) {
865         if (!PyDict_Check(dict)) {
866             PyErr_Format(PyExc_TypeError,
867                          "third item of state should be a dict, got a %.200s",
868                          Py_TYPE(dict)->tp_name);
869             return NULL;
870         }
871         if (self->dict) {
872             /* Alternatively, we could replace the internal dictionary
873                completely. However, it seems more practical to just update it. */
874             if (PyDict_Update(self->dict, dict) < 0)
875                 return NULL;
876         }
877         else {
878             self->dict = Py_NewRef(dict);
879         }
880     }
881 
882     Py_RETURN_NONE;
883 }
884 
885 static void
bytesio_dealloc(bytesio * self)886 bytesio_dealloc(bytesio *self)
887 {
888     PyTypeObject *tp = Py_TYPE(self);
889     _PyObject_GC_UNTRACK(self);
890     if (self->exports > 0) {
891         PyErr_SetString(PyExc_SystemError,
892                         "deallocated BytesIO object has exported buffers");
893         PyErr_Print();
894     }
895     Py_CLEAR(self->buf);
896     Py_CLEAR(self->dict);
897     if (self->weakreflist != NULL)
898         PyObject_ClearWeakRefs((PyObject *) self);
899     tp->tp_free(self);
900     Py_DECREF(tp);
901 }
902 
903 static PyObject *
bytesio_new(PyTypeObject * type,PyObject * args,PyObject * kwds)904 bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
905 {
906     bytesio *self;
907 
908     assert(type != NULL && type->tp_alloc != NULL);
909     self = (bytesio *)type->tp_alloc(type, 0);
910     if (self == NULL)
911         return NULL;
912 
913     /* tp_alloc initializes all the fields to zero. So we don't have to
914        initialize them here. */
915 
916     self->buf = PyBytes_FromStringAndSize(NULL, 0);
917     if (self->buf == NULL) {
918         Py_DECREF(self);
919         return PyErr_NoMemory();
920     }
921 
922     return (PyObject *)self;
923 }
924 
925 /*[clinic input]
926 _io.BytesIO.__init__
927     initial_bytes as initvalue: object(c_default="NULL") = b''
928 
929 Buffered I/O implementation using an in-memory bytes buffer.
930 [clinic start generated code]*/
931 
932 static int
_io_BytesIO___init___impl(bytesio * self,PyObject * initvalue)933 _io_BytesIO___init___impl(bytesio *self, PyObject *initvalue)
934 /*[clinic end generated code: output=65c0c51e24c5b621 input=aac7f31b67bf0fb6]*/
935 {
936     /* In case, __init__ is called multiple times. */
937     self->string_size = 0;
938     self->pos = 0;
939 
940     if (self->exports > 0) {
941         PyErr_SetString(PyExc_BufferError,
942                         "Existing exports of data: object cannot be re-sized");
943         return -1;
944     }
945     if (initvalue && initvalue != Py_None) {
946         if (PyBytes_CheckExact(initvalue)) {
947             Py_XSETREF(self->buf, Py_NewRef(initvalue));
948             self->string_size = PyBytes_GET_SIZE(initvalue);
949         }
950         else {
951             PyObject *res;
952             res = _io_BytesIO_write(self, initvalue);
953             if (res == NULL)
954                 return -1;
955             Py_DECREF(res);
956             self->pos = 0;
957         }
958     }
959 
960     return 0;
961 }
962 
963 static PyObject *
bytesio_sizeof(bytesio * self,void * unused)964 bytesio_sizeof(bytesio *self, void *unused)
965 {
966     size_t res = _PyObject_SIZE(Py_TYPE(self));
967     if (self->buf && !SHARED_BUF(self)) {
968         size_t s = _PySys_GetSizeOf(self->buf);
969         if (s == (size_t)-1) {
970             return NULL;
971         }
972         res += s;
973     }
974     return PyLong_FromSize_t(res);
975 }
976 
977 static int
bytesio_traverse(bytesio * self,visitproc visit,void * arg)978 bytesio_traverse(bytesio *self, visitproc visit, void *arg)
979 {
980     Py_VISIT(Py_TYPE(self));
981     Py_VISIT(self->dict);
982     Py_VISIT(self->buf);
983     return 0;
984 }
985 
986 static int
bytesio_clear(bytesio * self)987 bytesio_clear(bytesio *self)
988 {
989     Py_CLEAR(self->dict);
990     if (self->exports == 0) {
991         Py_CLEAR(self->buf);
992     }
993     return 0;
994 }
995 
996 
997 #define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
998 #include "clinic/bytesio.c.h"
999 #undef clinic_state
1000 
1001 static PyGetSetDef bytesio_getsetlist[] = {
1002     {"closed",  (getter)bytesio_get_closed, NULL,
1003      "True if the file is closed."},
1004     {NULL},            /* sentinel */
1005 };
1006 
1007 static struct PyMethodDef bytesio_methods[] = {
1008     _IO_BYTESIO_READABLE_METHODDEF
1009     _IO_BYTESIO_SEEKABLE_METHODDEF
1010     _IO_BYTESIO_WRITABLE_METHODDEF
1011     _IO_BYTESIO_CLOSE_METHODDEF
1012     _IO_BYTESIO_FLUSH_METHODDEF
1013     _IO_BYTESIO_ISATTY_METHODDEF
1014     _IO_BYTESIO_TELL_METHODDEF
1015     _IO_BYTESIO_WRITE_METHODDEF
1016     _IO_BYTESIO_WRITELINES_METHODDEF
1017     _IO_BYTESIO_READ1_METHODDEF
1018     _IO_BYTESIO_READINTO_METHODDEF
1019     _IO_BYTESIO_READLINE_METHODDEF
1020     _IO_BYTESIO_READLINES_METHODDEF
1021     _IO_BYTESIO_READ_METHODDEF
1022     _IO_BYTESIO_GETBUFFER_METHODDEF
1023     _IO_BYTESIO_GETVALUE_METHODDEF
1024     _IO_BYTESIO_SEEK_METHODDEF
1025     _IO_BYTESIO_TRUNCATE_METHODDEF
1026     {"__getstate__",  (PyCFunction)bytesio_getstate,  METH_NOARGS, NULL},
1027     {"__setstate__",  (PyCFunction)bytesio_setstate,  METH_O, NULL},
1028     {"__sizeof__", (PyCFunction)bytesio_sizeof,     METH_NOARGS, NULL},
1029     {NULL, NULL}        /* sentinel */
1030 };
1031 
1032 static PyMemberDef bytesio_members[] = {
1033     {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(bytesio, weakreflist), Py_READONLY},
1034     {"__dictoffset__", Py_T_PYSSIZET, offsetof(bytesio, dict), Py_READONLY},
1035     {NULL}
1036 };
1037 
1038 static PyType_Slot bytesio_slots[] = {
1039     {Py_tp_dealloc, bytesio_dealloc},
1040     {Py_tp_doc, (void *)_io_BytesIO___init____doc__},
1041     {Py_tp_traverse, bytesio_traverse},
1042     {Py_tp_clear, bytesio_clear},
1043     {Py_tp_iter, PyObject_SelfIter},
1044     {Py_tp_iternext, bytesio_iternext},
1045     {Py_tp_methods, bytesio_methods},
1046     {Py_tp_members, bytesio_members},
1047     {Py_tp_getset, bytesio_getsetlist},
1048     {Py_tp_init, _io_BytesIO___init__},
1049     {Py_tp_new, bytesio_new},
1050     {0, NULL},
1051 };
1052 
1053 PyType_Spec bytesio_spec = {
1054     .name = "_io.BytesIO",
1055     .basicsize = sizeof(bytesio),
1056     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1057               Py_TPFLAGS_IMMUTABLETYPE),
1058     .slots = bytesio_slots,
1059 };
1060 
1061 /*
1062  * Implementation of the small intermediate object used by getbuffer().
1063  * getbuffer() returns a memoryview over this object, which should make it
1064  * invisible from Python code.
1065  */
1066 
1067 static int
bytesiobuf_getbuffer(bytesiobuf * obj,Py_buffer * view,int flags)1068 bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags)
1069 {
1070     bytesio *b = (bytesio *) obj->source;
1071 
1072     if (view == NULL) {
1073         PyErr_SetString(PyExc_BufferError,
1074             "bytesiobuf_getbuffer: view==NULL argument is obsolete");
1075         return -1;
1076     }
1077     if (b->exports == 0 && SHARED_BUF(b)) {
1078         if (unshare_buffer(b, b->string_size) < 0)
1079             return -1;
1080     }
1081 
1082     /* cannot fail if view != NULL and readonly == 0 */
1083     (void)PyBuffer_FillInfo(view, (PyObject*)obj,
1084                             PyBytes_AS_STRING(b->buf), b->string_size,
1085                             0, flags);
1086     b->exports++;
1087     return 0;
1088 }
1089 
1090 static void
bytesiobuf_releasebuffer(bytesiobuf * obj,Py_buffer * view)1091 bytesiobuf_releasebuffer(bytesiobuf *obj, Py_buffer *view)
1092 {
1093     bytesio *b = (bytesio *) obj->source;
1094     b->exports--;
1095 }
1096 
1097 static int
bytesiobuf_traverse(bytesiobuf * self,visitproc visit,void * arg)1098 bytesiobuf_traverse(bytesiobuf *self, visitproc visit, void *arg)
1099 {
1100     Py_VISIT(Py_TYPE(self));
1101     Py_VISIT(self->source);
1102     return 0;
1103 }
1104 
1105 static void
bytesiobuf_dealloc(bytesiobuf * self)1106 bytesiobuf_dealloc(bytesiobuf *self)
1107 {
1108     PyTypeObject *tp = Py_TYPE(self);
1109     /* bpo-31095: UnTrack is needed before calling any callbacks */
1110     PyObject_GC_UnTrack(self);
1111     Py_CLEAR(self->source);
1112     tp->tp_free(self);
1113     Py_DECREF(tp);
1114 }
1115 
1116 static PyType_Slot bytesiobuf_slots[] = {
1117     {Py_tp_dealloc, bytesiobuf_dealloc},
1118     {Py_tp_traverse, bytesiobuf_traverse},
1119 
1120     // Buffer protocol
1121     {Py_bf_getbuffer, bytesiobuf_getbuffer},
1122     {Py_bf_releasebuffer, bytesiobuf_releasebuffer},
1123     {0, NULL},
1124 };
1125 
1126 PyType_Spec bytesiobuf_spec = {
1127     .name = "_io._BytesIOBuffer",
1128     .basicsize = sizeof(bytesiobuf),
1129     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
1130               Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1131     .slots = bytesiobuf_slots,
1132 };
1133