1 #include "Python.h"
2 #include "pycore_object.h"
3 #include "structmember.h" /* for offsetof() */
4 #include "_iomodule.h"
5
6 /*[clinic input]
7 module _io
8 class _io.BytesIO "bytesio *" "&PyBytesIO_Type"
9 [clinic start generated code]*/
10 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7f50ec034f5c0b26]*/
11
12 typedef struct {
13 PyObject_HEAD
14 PyObject *buf;
15 Py_ssize_t pos;
16 Py_ssize_t string_size;
17 PyObject *dict;
18 PyObject *weakreflist;
19 Py_ssize_t exports;
20 } bytesio;
21
22 typedef struct {
23 PyObject_HEAD
24 bytesio *source;
25 } bytesiobuf;
26
27 /* The bytesio object can be in three states:
28 * Py_REFCNT(buf) == 1, exports == 0.
29 * Py_REFCNT(buf) > 1. exports == 0,
30 first modification or export causes the internal buffer copying.
31 * exports > 0. Py_REFCNT(buf) == 1, any modifications are forbidden.
32 */
33
34 #define CHECK_CLOSED(self) \
35 if ((self)->buf == NULL) { \
36 PyErr_SetString(PyExc_ValueError, \
37 "I/O operation on closed file."); \
38 return NULL; \
39 }
40
41 #define CHECK_EXPORTS(self) \
42 if ((self)->exports > 0) { \
43 PyErr_SetString(PyExc_BufferError, \
44 "Existing exports of data: object cannot be re-sized"); \
45 return NULL; \
46 }
47
48 #define SHARED_BUF(self) (Py_REFCNT((self)->buf) > 1)
49
50
51 /* Internal routine to get a line from the buffer of a BytesIO
52 object. Returns the length between the current position to the
53 next newline character. */
54 static Py_ssize_t
scan_eol(bytesio * self,Py_ssize_t len)55 scan_eol(bytesio *self, Py_ssize_t len)
56 {
57 const char *start, *n;
58 Py_ssize_t maxlen;
59
60 assert(self->buf != NULL);
61 assert(self->pos >= 0);
62
63 if (self->pos >= self->string_size)
64 return 0;
65
66 /* Move to the end of the line, up to the end of the string, s. */
67 maxlen = self->string_size - self->pos;
68 if (len < 0 || len > maxlen)
69 len = maxlen;
70
71 if (len) {
72 start = PyBytes_AS_STRING(self->buf) + self->pos;
73 n = memchr(start, '\n', len);
74 if (n)
75 /* Get the length from the current position to the end of
76 the line. */
77 len = n - start + 1;
78 }
79 assert(len >= 0);
80 assert(self->pos < PY_SSIZE_T_MAX - len);
81
82 return len;
83 }
84
85 /* Internal routine for detaching the shared buffer of BytesIO objects.
86 The caller should ensure that the 'size' argument is non-negative and
87 not lesser than self->string_size. Returns 0 on success, -1 otherwise. */
88 static int
unshare_buffer(bytesio * self,size_t size)89 unshare_buffer(bytesio *self, size_t size)
90 {
91 PyObject *new_buf;
92 assert(SHARED_BUF(self));
93 assert(self->exports == 0);
94 assert(size >= (size_t)self->string_size);
95 new_buf = PyBytes_FromStringAndSize(NULL, size);
96 if (new_buf == NULL)
97 return -1;
98 memcpy(PyBytes_AS_STRING(new_buf), PyBytes_AS_STRING(self->buf),
99 self->string_size);
100 Py_SETREF(self->buf, new_buf);
101 return 0;
102 }
103
104 /* Internal routine for changing the size of the buffer of BytesIO objects.
105 The caller should ensure that the 'size' argument is non-negative. Returns
106 0 on success, -1 otherwise. */
107 static int
resize_buffer(bytesio * self,size_t size)108 resize_buffer(bytesio *self, size_t size)
109 {
110 /* Here, unsigned types are used to avoid dealing with signed integer
111 overflow, which is undefined in C. */
112 size_t alloc = PyBytes_GET_SIZE(self->buf);
113
114 assert(self->buf != NULL);
115
116 /* For simplicity, stay in the range of the signed type. Anyway, Python
117 doesn't allow strings to be longer than this. */
118 if (size > PY_SSIZE_T_MAX)
119 goto overflow;
120
121 if (size < alloc / 2) {
122 /* Major downsize; resize down to exact size. */
123 alloc = size + 1;
124 }
125 else if (size < alloc) {
126 /* Within allocated size; quick exit */
127 return 0;
128 }
129 else if (size <= alloc * 1.125) {
130 /* Moderate upsize; overallocate similar to list_resize() */
131 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
132 }
133 else {
134 /* Major upsize; resize up to exact size */
135 alloc = size + 1;
136 }
137
138 if (alloc > ((size_t)-1) / sizeof(char))
139 goto overflow;
140
141 if (SHARED_BUF(self)) {
142 if (unshare_buffer(self, alloc) < 0)
143 return -1;
144 }
145 else {
146 if (_PyBytes_Resize(&self->buf, alloc) < 0)
147 return -1;
148 }
149
150 return 0;
151
152 overflow:
153 PyErr_SetString(PyExc_OverflowError,
154 "new buffer size too large");
155 return -1;
156 }
157
158 /* Internal routine for writing a string of bytes to the buffer of a BytesIO
159 object. Returns the number of bytes written, or -1 on error. */
160 static Py_ssize_t
write_bytes(bytesio * self,const char * bytes,Py_ssize_t len)161 write_bytes(bytesio *self, const char *bytes, Py_ssize_t len)
162 {
163 size_t endpos;
164 assert(self->buf != NULL);
165 assert(self->pos >= 0);
166 assert(len >= 0);
167
168 endpos = (size_t)self->pos + len;
169 if (endpos > (size_t)PyBytes_GET_SIZE(self->buf)) {
170 if (resize_buffer(self, endpos) < 0)
171 return -1;
172 }
173 else if (SHARED_BUF(self)) {
174 if (unshare_buffer(self, Py_MAX(endpos, (size_t)self->string_size)) < 0)
175 return -1;
176 }
177
178 if (self->pos > self->string_size) {
179 /* In case of overseek, pad with null bytes the buffer region between
180 the end of stream and the current position.
181
182 0 lo string_size hi
183 | |<---used--->|<----------available----------->|
184 | | <--to pad-->|<---to write---> |
185 0 buf position
186 */
187 memset(PyBytes_AS_STRING(self->buf) + self->string_size, '\0',
188 (self->pos - self->string_size) * sizeof(char));
189 }
190
191 /* Copy the data to the internal buffer, overwriting some of the existing
192 data if self->pos < self->string_size. */
193 memcpy(PyBytes_AS_STRING(self->buf) + self->pos, bytes, len);
194 self->pos = endpos;
195
196 /* Set the new length of the internal string if it has changed. */
197 if ((size_t)self->string_size < endpos) {
198 self->string_size = endpos;
199 }
200
201 return len;
202 }
203
204 static PyObject *
bytesio_get_closed(bytesio * self,void * Py_UNUSED (ignored))205 bytesio_get_closed(bytesio *self, void *Py_UNUSED(ignored))
206 {
207 if (self->buf == NULL) {
208 Py_RETURN_TRUE;
209 }
210 else {
211 Py_RETURN_FALSE;
212 }
213 }
214
215 /*[clinic input]
216 _io.BytesIO.readable
217
218 Returns True if the IO object can be read.
219 [clinic start generated code]*/
220
221 static PyObject *
_io_BytesIO_readable_impl(bytesio * self)222 _io_BytesIO_readable_impl(bytesio *self)
223 /*[clinic end generated code: output=4e93822ad5b62263 input=96c5d0cccfb29f5c]*/
224 {
225 CHECK_CLOSED(self);
226 Py_RETURN_TRUE;
227 }
228
229 /*[clinic input]
230 _io.BytesIO.writable
231
232 Returns True if the IO object can be written.
233 [clinic start generated code]*/
234
235 static PyObject *
_io_BytesIO_writable_impl(bytesio * self)236 _io_BytesIO_writable_impl(bytesio *self)
237 /*[clinic end generated code: output=64ff6a254b1150b8 input=700eed808277560a]*/
238 {
239 CHECK_CLOSED(self);
240 Py_RETURN_TRUE;
241 }
242
243 /*[clinic input]
244 _io.BytesIO.seekable
245
246 Returns True if the IO object can be seeked.
247 [clinic start generated code]*/
248
249 static PyObject *
_io_BytesIO_seekable_impl(bytesio * self)250 _io_BytesIO_seekable_impl(bytesio *self)
251 /*[clinic end generated code: output=6b417f46dcc09b56 input=9421f65627a344dd]*/
252 {
253 CHECK_CLOSED(self);
254 Py_RETURN_TRUE;
255 }
256
257 /*[clinic input]
258 _io.BytesIO.flush
259
260 Does nothing.
261 [clinic start generated code]*/
262
263 static PyObject *
_io_BytesIO_flush_impl(bytesio * self)264 _io_BytesIO_flush_impl(bytesio *self)
265 /*[clinic end generated code: output=187e3d781ca134a0 input=561ea490be4581a7]*/
266 {
267 CHECK_CLOSED(self);
268 Py_RETURN_NONE;
269 }
270
271 /*[clinic input]
272 _io.BytesIO.getbuffer
273
274 Get a read-write view over the contents of the BytesIO object.
275 [clinic start generated code]*/
276
277 static PyObject *
_io_BytesIO_getbuffer_impl(bytesio * self)278 _io_BytesIO_getbuffer_impl(bytesio *self)
279 /*[clinic end generated code: output=72cd7c6e13aa09ed input=8f738ef615865176]*/
280 {
281 PyTypeObject *type = &_PyBytesIOBuffer_Type;
282 bytesiobuf *buf;
283 PyObject *view;
284
285 CHECK_CLOSED(self);
286
287 buf = (bytesiobuf *) type->tp_alloc(type, 0);
288 if (buf == NULL)
289 return NULL;
290 Py_INCREF(self);
291 buf->source = self;
292 view = PyMemoryView_FromObject((PyObject *) buf);
293 Py_DECREF(buf);
294 return view;
295 }
296
297 /*[clinic input]
298 _io.BytesIO.getvalue
299
300 Retrieve the entire contents of the BytesIO object.
301 [clinic start generated code]*/
302
303 static PyObject *
_io_BytesIO_getvalue_impl(bytesio * self)304 _io_BytesIO_getvalue_impl(bytesio *self)
305 /*[clinic end generated code: output=b3f6a3233c8fd628 input=4b403ac0af3973ed]*/
306 {
307 CHECK_CLOSED(self);
308 if (self->string_size <= 1 || self->exports > 0)
309 return PyBytes_FromStringAndSize(PyBytes_AS_STRING(self->buf),
310 self->string_size);
311
312 if (self->string_size != PyBytes_GET_SIZE(self->buf)) {
313 if (SHARED_BUF(self)) {
314 if (unshare_buffer(self, self->string_size) < 0)
315 return NULL;
316 }
317 else {
318 if (_PyBytes_Resize(&self->buf, self->string_size) < 0)
319 return NULL;
320 }
321 }
322 Py_INCREF(self->buf);
323 return self->buf;
324 }
325
326 /*[clinic input]
327 _io.BytesIO.isatty
328
329 Always returns False.
330
331 BytesIO objects are not connected to a TTY-like device.
332 [clinic start generated code]*/
333
334 static PyObject *
_io_BytesIO_isatty_impl(bytesio * self)335 _io_BytesIO_isatty_impl(bytesio *self)
336 /*[clinic end generated code: output=df67712e669f6c8f input=6f97f0985d13f827]*/
337 {
338 CHECK_CLOSED(self);
339 Py_RETURN_FALSE;
340 }
341
342 /*[clinic input]
343 _io.BytesIO.tell
344
345 Current file position, an integer.
346 [clinic start generated code]*/
347
348 static PyObject *
_io_BytesIO_tell_impl(bytesio * self)349 _io_BytesIO_tell_impl(bytesio *self)
350 /*[clinic end generated code: output=b54b0f93cd0e5e1d input=b106adf099cb3657]*/
351 {
352 CHECK_CLOSED(self);
353 return PyLong_FromSsize_t(self->pos);
354 }
355
356 static PyObject *
read_bytes(bytesio * self,Py_ssize_t size)357 read_bytes(bytesio *self, Py_ssize_t size)
358 {
359 char *output;
360
361 assert(self->buf != NULL);
362 assert(size <= self->string_size);
363 if (size > 1 &&
364 self->pos == 0 && size == PyBytes_GET_SIZE(self->buf) &&
365 self->exports == 0) {
366 self->pos += size;
367 Py_INCREF(self->buf);
368 return self->buf;
369 }
370
371 output = PyBytes_AS_STRING(self->buf) + self->pos;
372 self->pos += size;
373 return PyBytes_FromStringAndSize(output, size);
374 }
375
376 /*[clinic input]
377 _io.BytesIO.read
378 size: Py_ssize_t(accept={int, NoneType}) = -1
379 /
380
381 Read at most size bytes, returned as a bytes object.
382
383 If the size argument is negative, read until EOF is reached.
384 Return an empty bytes object at EOF.
385 [clinic start generated code]*/
386
387 static PyObject *
_io_BytesIO_read_impl(bytesio * self,Py_ssize_t size)388 _io_BytesIO_read_impl(bytesio *self, Py_ssize_t size)
389 /*[clinic end generated code: output=9cc025f21c75bdd2 input=74344a39f431c3d7]*/
390 {
391 Py_ssize_t n;
392
393 CHECK_CLOSED(self);
394
395 /* adjust invalid sizes */
396 n = self->string_size - self->pos;
397 if (size < 0 || size > n) {
398 size = n;
399 if (size < 0)
400 size = 0;
401 }
402
403 return read_bytes(self, size);
404 }
405
406
407 /*[clinic input]
408 _io.BytesIO.read1
409 size: Py_ssize_t(accept={int, NoneType}) = -1
410 /
411
412 Read at most size bytes, returned as a bytes object.
413
414 If the size argument is negative or omitted, read until EOF is reached.
415 Return an empty bytes object at EOF.
416 [clinic start generated code]*/
417
418 static PyObject *
_io_BytesIO_read1_impl(bytesio * self,Py_ssize_t size)419 _io_BytesIO_read1_impl(bytesio *self, Py_ssize_t size)
420 /*[clinic end generated code: output=d0f843285aa95f1c input=440a395bf9129ef5]*/
421 {
422 return _io_BytesIO_read_impl(self, size);
423 }
424
425 /*[clinic input]
426 _io.BytesIO.readline
427 size: Py_ssize_t(accept={int, NoneType}) = -1
428 /
429
430 Next line from the file, as a bytes object.
431
432 Retain newline. A non-negative size argument limits the maximum
433 number of bytes to return (an incomplete line may be returned then).
434 Return an empty bytes object at EOF.
435 [clinic start generated code]*/
436
437 static PyObject *
_io_BytesIO_readline_impl(bytesio * self,Py_ssize_t size)438 _io_BytesIO_readline_impl(bytesio *self, Py_ssize_t size)
439 /*[clinic end generated code: output=4bff3c251df8ffcd input=e7c3fbd1744e2783]*/
440 {
441 Py_ssize_t n;
442
443 CHECK_CLOSED(self);
444
445 n = scan_eol(self, size);
446
447 return read_bytes(self, n);
448 }
449
450 /*[clinic input]
451 _io.BytesIO.readlines
452 size as arg: object = None
453 /
454
455 List of bytes objects, each a line from the file.
456
457 Call readline() repeatedly and return a list of the lines so read.
458 The optional size argument, if given, is an approximate bound on the
459 total number of bytes in the lines returned.
460 [clinic start generated code]*/
461
462 static PyObject *
_io_BytesIO_readlines_impl(bytesio * self,PyObject * arg)463 _io_BytesIO_readlines_impl(bytesio *self, PyObject *arg)
464 /*[clinic end generated code: output=09b8e34c880808ff input=691aa1314f2c2a87]*/
465 {
466 Py_ssize_t maxsize, size, n;
467 PyObject *result, *line;
468 char *output;
469
470 CHECK_CLOSED(self);
471
472 if (PyLong_Check(arg)) {
473 maxsize = PyLong_AsSsize_t(arg);
474 if (maxsize == -1 && PyErr_Occurred())
475 return NULL;
476 }
477 else if (arg == Py_None) {
478 /* No size limit, by default. */
479 maxsize = -1;
480 }
481 else {
482 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
483 Py_TYPE(arg)->tp_name);
484 return NULL;
485 }
486
487 size = 0;
488 result = PyList_New(0);
489 if (!result)
490 return NULL;
491
492 output = PyBytes_AS_STRING(self->buf) + self->pos;
493 while ((n = scan_eol(self, -1)) != 0) {
494 self->pos += n;
495 line = PyBytes_FromStringAndSize(output, n);
496 if (!line)
497 goto on_error;
498 if (PyList_Append(result, line) == -1) {
499 Py_DECREF(line);
500 goto on_error;
501 }
502 Py_DECREF(line);
503 size += n;
504 if (maxsize > 0 && size >= maxsize)
505 break;
506 output += n;
507 }
508 return result;
509
510 on_error:
511 Py_DECREF(result);
512 return NULL;
513 }
514
515 /*[clinic input]
516 _io.BytesIO.readinto
517 buffer: Py_buffer(accept={rwbuffer})
518 /
519
520 Read bytes into buffer.
521
522 Returns number of bytes read (0 for EOF), or None if the object
523 is set not to block and has no data to read.
524 [clinic start generated code]*/
525
526 static PyObject *
_io_BytesIO_readinto_impl(bytesio * self,Py_buffer * buffer)527 _io_BytesIO_readinto_impl(bytesio *self, Py_buffer *buffer)
528 /*[clinic end generated code: output=a5d407217dcf0639 input=1424d0fdce857919]*/
529 {
530 Py_ssize_t len, n;
531
532 CHECK_CLOSED(self);
533
534 /* adjust invalid sizes */
535 len = buffer->len;
536 n = self->string_size - self->pos;
537 if (len > n) {
538 len = n;
539 if (len < 0)
540 len = 0;
541 }
542
543 memcpy(buffer->buf, PyBytes_AS_STRING(self->buf) + self->pos, len);
544 assert(self->pos + len < PY_SSIZE_T_MAX);
545 assert(len >= 0);
546 self->pos += len;
547
548 return PyLong_FromSsize_t(len);
549 }
550
551 /*[clinic input]
552 _io.BytesIO.truncate
553 size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None
554 /
555
556 Truncate the file to at most size bytes.
557
558 Size defaults to the current file position, as returned by tell().
559 The current file position is unchanged. Returns the new size.
560 [clinic start generated code]*/
561
562 static PyObject *
_io_BytesIO_truncate_impl(bytesio * self,Py_ssize_t size)563 _io_BytesIO_truncate_impl(bytesio *self, Py_ssize_t size)
564 /*[clinic end generated code: output=9ad17650c15fa09b input=423759dd42d2f7c1]*/
565 {
566 CHECK_CLOSED(self);
567 CHECK_EXPORTS(self);
568
569 if (size < 0) {
570 PyErr_Format(PyExc_ValueError,
571 "negative size value %zd", size);
572 return NULL;
573 }
574
575 if (size < self->string_size) {
576 self->string_size = size;
577 if (resize_buffer(self, size) < 0)
578 return NULL;
579 }
580
581 return PyLong_FromSsize_t(size);
582 }
583
584 static PyObject *
bytesio_iternext(bytesio * self)585 bytesio_iternext(bytesio *self)
586 {
587 Py_ssize_t n;
588
589 CHECK_CLOSED(self);
590
591 n = scan_eol(self, -1);
592
593 if (n == 0)
594 return NULL;
595
596 return read_bytes(self, n);
597 }
598
599 /*[clinic input]
600 _io.BytesIO.seek
601 pos: Py_ssize_t
602 whence: int = 0
603 /
604
605 Change stream position.
606
607 Seek to byte offset pos relative to position indicated by whence:
608 0 Start of stream (the default). pos should be >= 0;
609 1 Current position - pos may be negative;
610 2 End of stream - pos usually negative.
611 Returns the new absolute position.
612 [clinic start generated code]*/
613
614 static PyObject *
_io_BytesIO_seek_impl(bytesio * self,Py_ssize_t pos,int whence)615 _io_BytesIO_seek_impl(bytesio *self, Py_ssize_t pos, int whence)
616 /*[clinic end generated code: output=c26204a68e9190e4 input=1e875e6ebc652948]*/
617 {
618 CHECK_CLOSED(self);
619
620 if (pos < 0 && whence == 0) {
621 PyErr_Format(PyExc_ValueError,
622 "negative seek value %zd", pos);
623 return NULL;
624 }
625
626 /* whence = 0: offset relative to beginning of the string.
627 whence = 1: offset relative to current position.
628 whence = 2: offset relative the end of the string. */
629 if (whence == 1) {
630 if (pos > PY_SSIZE_T_MAX - self->pos) {
631 PyErr_SetString(PyExc_OverflowError,
632 "new position too large");
633 return NULL;
634 }
635 pos += self->pos;
636 }
637 else if (whence == 2) {
638 if (pos > PY_SSIZE_T_MAX - self->string_size) {
639 PyErr_SetString(PyExc_OverflowError,
640 "new position too large");
641 return NULL;
642 }
643 pos += self->string_size;
644 }
645 else if (whence != 0) {
646 PyErr_Format(PyExc_ValueError,
647 "invalid whence (%i, should be 0, 1 or 2)", whence);
648 return NULL;
649 }
650
651 if (pos < 0)
652 pos = 0;
653 self->pos = pos;
654
655 return PyLong_FromSsize_t(self->pos);
656 }
657
658 /*[clinic input]
659 _io.BytesIO.write
660 b: object
661 /
662
663 Write bytes to file.
664
665 Return the number of bytes written.
666 [clinic start generated code]*/
667
668 static PyObject *
_io_BytesIO_write(bytesio * self,PyObject * b)669 _io_BytesIO_write(bytesio *self, PyObject *b)
670 /*[clinic end generated code: output=53316d99800a0b95 input=f5ec7c8c64ed720a]*/
671 {
672 Py_ssize_t n = 0;
673 Py_buffer buf;
674
675 CHECK_CLOSED(self);
676 CHECK_EXPORTS(self);
677
678 if (PyObject_GetBuffer(b, &buf, PyBUF_CONTIG_RO) < 0)
679 return NULL;
680
681 if (buf.len != 0)
682 n = write_bytes(self, buf.buf, buf.len);
683
684 PyBuffer_Release(&buf);
685 return n >= 0 ? PyLong_FromSsize_t(n) : NULL;
686 }
687
688 /*[clinic input]
689 _io.BytesIO.writelines
690 lines: object
691 /
692
693 Write lines to the file.
694
695 Note that newlines are not added. lines can be any iterable object
696 producing bytes-like objects. This is equivalent to calling write() for
697 each element.
698 [clinic start generated code]*/
699
700 static PyObject *
_io_BytesIO_writelines(bytesio * self,PyObject * lines)701 _io_BytesIO_writelines(bytesio *self, PyObject *lines)
702 /*[clinic end generated code: output=7f33aa3271c91752 input=e972539176fc8fc1]*/
703 {
704 PyObject *it, *item;
705 PyObject *ret;
706
707 CHECK_CLOSED(self);
708
709 it = PyObject_GetIter(lines);
710 if (it == NULL)
711 return NULL;
712
713 while ((item = PyIter_Next(it)) != NULL) {
714 ret = _io_BytesIO_write(self, item);
715 Py_DECREF(item);
716 if (ret == NULL) {
717 Py_DECREF(it);
718 return NULL;
719 }
720 Py_DECREF(ret);
721 }
722 Py_DECREF(it);
723
724 /* See if PyIter_Next failed */
725 if (PyErr_Occurred())
726 return NULL;
727
728 Py_RETURN_NONE;
729 }
730
731 /*[clinic input]
732 _io.BytesIO.close
733
734 Disable all I/O operations.
735 [clinic start generated code]*/
736
737 static PyObject *
_io_BytesIO_close_impl(bytesio * self)738 _io_BytesIO_close_impl(bytesio *self)
739 /*[clinic end generated code: output=1471bb9411af84a0 input=37e1f55556e61f60]*/
740 {
741 CHECK_EXPORTS(self);
742 Py_CLEAR(self->buf);
743 Py_RETURN_NONE;
744 }
745
746 /* Pickling support.
747
748 Note that only pickle protocol 2 and onward are supported since we use
749 extended __reduce__ API of PEP 307 to make BytesIO instances picklable.
750
751 Providing support for protocol < 2 would require the __reduce_ex__ method
752 which is notably long-winded when defined properly.
753
754 For BytesIO, the implementation would similar to one coded for
755 object.__reduce_ex__, but slightly less general. To be more specific, we
756 could call bytesio_getstate directly and avoid checking for the presence of
757 a fallback __reduce__ method. However, we would still need a __newobj__
758 function to use the efficient instance representation of PEP 307.
759 */
760
761 static PyObject *
bytesio_getstate(bytesio * self,PyObject * Py_UNUSED (ignored))762 bytesio_getstate(bytesio *self, PyObject *Py_UNUSED(ignored))
763 {
764 PyObject *initvalue = _io_BytesIO_getvalue_impl(self);
765 PyObject *dict;
766 PyObject *state;
767
768 if (initvalue == NULL)
769 return NULL;
770 if (self->dict == NULL) {
771 Py_INCREF(Py_None);
772 dict = Py_None;
773 }
774 else {
775 dict = PyDict_Copy(self->dict);
776 if (dict == NULL) {
777 Py_DECREF(initvalue);
778 return NULL;
779 }
780 }
781
782 state = Py_BuildValue("(OnN)", initvalue, self->pos, dict);
783 Py_DECREF(initvalue);
784 return state;
785 }
786
787 static PyObject *
bytesio_setstate(bytesio * self,PyObject * state)788 bytesio_setstate(bytesio *self, PyObject *state)
789 {
790 PyObject *result;
791 PyObject *position_obj;
792 PyObject *dict;
793 Py_ssize_t pos;
794
795 assert(state != NULL);
796
797 /* We allow the state tuple to be longer than 3, because we may need
798 someday to extend the object's state without breaking
799 backward-compatibility. */
800 if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 3) {
801 PyErr_Format(PyExc_TypeError,
802 "%.200s.__setstate__ argument should be 3-tuple, got %.200s",
803 Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
804 return NULL;
805 }
806 CHECK_EXPORTS(self);
807 /* Reset the object to its default state. This is only needed to handle
808 the case of repeated calls to __setstate__. */
809 self->string_size = 0;
810 self->pos = 0;
811
812 /* Set the value of the internal buffer. If state[0] does not support the
813 buffer protocol, bytesio_write will raise the appropriate TypeError. */
814 result = _io_BytesIO_write(self, PyTuple_GET_ITEM(state, 0));
815 if (result == NULL)
816 return NULL;
817 Py_DECREF(result);
818
819 /* Set carefully the position value. Alternatively, we could use the seek
820 method instead of modifying self->pos directly to better protect the
821 object internal state against erroneous (or malicious) inputs. */
822 position_obj = PyTuple_GET_ITEM(state, 1);
823 if (!PyLong_Check(position_obj)) {
824 PyErr_Format(PyExc_TypeError,
825 "second item of state must be an integer, not %.200s",
826 Py_TYPE(position_obj)->tp_name);
827 return NULL;
828 }
829 pos = PyLong_AsSsize_t(position_obj);
830 if (pos == -1 && PyErr_Occurred())
831 return NULL;
832 if (pos < 0) {
833 PyErr_SetString(PyExc_ValueError,
834 "position value cannot be negative");
835 return NULL;
836 }
837 self->pos = pos;
838
839 /* Set the dictionary of the instance variables. */
840 dict = PyTuple_GET_ITEM(state, 2);
841 if (dict != Py_None) {
842 if (!PyDict_Check(dict)) {
843 PyErr_Format(PyExc_TypeError,
844 "third item of state should be a dict, got a %.200s",
845 Py_TYPE(dict)->tp_name);
846 return NULL;
847 }
848 if (self->dict) {
849 /* Alternatively, we could replace the internal dictionary
850 completely. However, it seems more practical to just update it. */
851 if (PyDict_Update(self->dict, dict) < 0)
852 return NULL;
853 }
854 else {
855 Py_INCREF(dict);
856 self->dict = dict;
857 }
858 }
859
860 Py_RETURN_NONE;
861 }
862
863 static void
bytesio_dealloc(bytesio * self)864 bytesio_dealloc(bytesio *self)
865 {
866 _PyObject_GC_UNTRACK(self);
867 if (self->exports > 0) {
868 PyErr_SetString(PyExc_SystemError,
869 "deallocated BytesIO object has exported buffers");
870 PyErr_Print();
871 }
872 Py_CLEAR(self->buf);
873 Py_CLEAR(self->dict);
874 if (self->weakreflist != NULL)
875 PyObject_ClearWeakRefs((PyObject *) self);
876 Py_TYPE(self)->tp_free(self);
877 }
878
879 static PyObject *
bytesio_new(PyTypeObject * type,PyObject * args,PyObject * kwds)880 bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
881 {
882 bytesio *self;
883
884 assert(type != NULL && type->tp_alloc != NULL);
885 self = (bytesio *)type->tp_alloc(type, 0);
886 if (self == NULL)
887 return NULL;
888
889 /* tp_alloc initializes all the fields to zero. So we don't have to
890 initialize them here. */
891
892 self->buf = PyBytes_FromStringAndSize(NULL, 0);
893 if (self->buf == NULL) {
894 Py_DECREF(self);
895 return PyErr_NoMemory();
896 }
897
898 return (PyObject *)self;
899 }
900
901 /*[clinic input]
902 _io.BytesIO.__init__
903 initial_bytes as initvalue: object(c_default="NULL") = b''
904
905 Buffered I/O implementation using an in-memory bytes buffer.
906 [clinic start generated code]*/
907
908 static int
_io_BytesIO___init___impl(bytesio * self,PyObject * initvalue)909 _io_BytesIO___init___impl(bytesio *self, PyObject *initvalue)
910 /*[clinic end generated code: output=65c0c51e24c5b621 input=aac7f31b67bf0fb6]*/
911 {
912 /* In case, __init__ is called multiple times. */
913 self->string_size = 0;
914 self->pos = 0;
915
916 if (self->exports > 0) {
917 PyErr_SetString(PyExc_BufferError,
918 "Existing exports of data: object cannot be re-sized");
919 return -1;
920 }
921 if (initvalue && initvalue != Py_None) {
922 if (PyBytes_CheckExact(initvalue)) {
923 Py_INCREF(initvalue);
924 Py_XSETREF(self->buf, initvalue);
925 self->string_size = PyBytes_GET_SIZE(initvalue);
926 }
927 else {
928 PyObject *res;
929 res = _io_BytesIO_write(self, initvalue);
930 if (res == NULL)
931 return -1;
932 Py_DECREF(res);
933 self->pos = 0;
934 }
935 }
936
937 return 0;
938 }
939
940 static PyObject *
bytesio_sizeof(bytesio * self,void * unused)941 bytesio_sizeof(bytesio *self, void *unused)
942 {
943 Py_ssize_t res;
944
945 res = _PyObject_SIZE(Py_TYPE(self));
946 if (self->buf && !SHARED_BUF(self)) {
947 Py_ssize_t s = _PySys_GetSizeOf(self->buf);
948 if (s == -1) {
949 return NULL;
950 }
951 res += s;
952 }
953 return PyLong_FromSsize_t(res);
954 }
955
956 static int
bytesio_traverse(bytesio * self,visitproc visit,void * arg)957 bytesio_traverse(bytesio *self, visitproc visit, void *arg)
958 {
959 Py_VISIT(self->dict);
960 return 0;
961 }
962
963 static int
bytesio_clear(bytesio * self)964 bytesio_clear(bytesio *self)
965 {
966 Py_CLEAR(self->dict);
967 return 0;
968 }
969
970
971 #include "clinic/bytesio.c.h"
972
973 static PyGetSetDef bytesio_getsetlist[] = {
974 {"closed", (getter)bytesio_get_closed, NULL,
975 "True if the file is closed."},
976 {NULL}, /* sentinel */
977 };
978
979 static struct PyMethodDef bytesio_methods[] = {
980 _IO_BYTESIO_READABLE_METHODDEF
981 _IO_BYTESIO_SEEKABLE_METHODDEF
982 _IO_BYTESIO_WRITABLE_METHODDEF
983 _IO_BYTESIO_CLOSE_METHODDEF
984 _IO_BYTESIO_FLUSH_METHODDEF
985 _IO_BYTESIO_ISATTY_METHODDEF
986 _IO_BYTESIO_TELL_METHODDEF
987 _IO_BYTESIO_WRITE_METHODDEF
988 _IO_BYTESIO_WRITELINES_METHODDEF
989 _IO_BYTESIO_READ1_METHODDEF
990 _IO_BYTESIO_READINTO_METHODDEF
991 _IO_BYTESIO_READLINE_METHODDEF
992 _IO_BYTESIO_READLINES_METHODDEF
993 _IO_BYTESIO_READ_METHODDEF
994 _IO_BYTESIO_GETBUFFER_METHODDEF
995 _IO_BYTESIO_GETVALUE_METHODDEF
996 _IO_BYTESIO_SEEK_METHODDEF
997 _IO_BYTESIO_TRUNCATE_METHODDEF
998 {"__getstate__", (PyCFunction)bytesio_getstate, METH_NOARGS, NULL},
999 {"__setstate__", (PyCFunction)bytesio_setstate, METH_O, NULL},
1000 {"__sizeof__", (PyCFunction)bytesio_sizeof, METH_NOARGS, NULL},
1001 {NULL, NULL} /* sentinel */
1002 };
1003
1004 PyTypeObject PyBytesIO_Type = {
1005 PyVarObject_HEAD_INIT(NULL, 0)
1006 "_io.BytesIO", /*tp_name*/
1007 sizeof(bytesio), /*tp_basicsize*/
1008 0, /*tp_itemsize*/
1009 (destructor)bytesio_dealloc, /*tp_dealloc*/
1010 0, /*tp_vectorcall_offset*/
1011 0, /*tp_getattr*/
1012 0, /*tp_setattr*/
1013 0, /*tp_as_async*/
1014 0, /*tp_repr*/
1015 0, /*tp_as_number*/
1016 0, /*tp_as_sequence*/
1017 0, /*tp_as_mapping*/
1018 0, /*tp_hash*/
1019 0, /*tp_call*/
1020 0, /*tp_str*/
1021 0, /*tp_getattro*/
1022 0, /*tp_setattro*/
1023 0, /*tp_as_buffer*/
1024 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1025 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1026 _io_BytesIO___init____doc__, /*tp_doc*/
1027 (traverseproc)bytesio_traverse, /*tp_traverse*/
1028 (inquiry)bytesio_clear, /*tp_clear*/
1029 0, /*tp_richcompare*/
1030 offsetof(bytesio, weakreflist), /*tp_weaklistoffset*/
1031 PyObject_SelfIter, /*tp_iter*/
1032 (iternextfunc)bytesio_iternext, /*tp_iternext*/
1033 bytesio_methods, /*tp_methods*/
1034 0, /*tp_members*/
1035 bytesio_getsetlist, /*tp_getset*/
1036 0, /*tp_base*/
1037 0, /*tp_dict*/
1038 0, /*tp_descr_get*/
1039 0, /*tp_descr_set*/
1040 offsetof(bytesio, dict), /*tp_dictoffset*/
1041 _io_BytesIO___init__, /*tp_init*/
1042 0, /*tp_alloc*/
1043 bytesio_new, /*tp_new*/
1044 };
1045
1046
1047 /*
1048 * Implementation of the small intermediate object used by getbuffer().
1049 * getbuffer() returns a memoryview over this object, which should make it
1050 * invisible from Python code.
1051 */
1052
1053 static int
bytesiobuf_getbuffer(bytesiobuf * obj,Py_buffer * view,int flags)1054 bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags)
1055 {
1056 bytesio *b = (bytesio *) obj->source;
1057
1058 if (view == NULL) {
1059 PyErr_SetString(PyExc_BufferError,
1060 "bytesiobuf_getbuffer: view==NULL argument is obsolete");
1061 return -1;
1062 }
1063 if (SHARED_BUF(b)) {
1064 if (unshare_buffer(b, b->string_size) < 0)
1065 return -1;
1066 }
1067
1068 /* cannot fail if view != NULL and readonly == 0 */
1069 (void)PyBuffer_FillInfo(view, (PyObject*)obj,
1070 PyBytes_AS_STRING(b->buf), b->string_size,
1071 0, flags);
1072 b->exports++;
1073 return 0;
1074 }
1075
1076 static void
bytesiobuf_releasebuffer(bytesiobuf * obj,Py_buffer * view)1077 bytesiobuf_releasebuffer(bytesiobuf *obj, Py_buffer *view)
1078 {
1079 bytesio *b = (bytesio *) obj->source;
1080 b->exports--;
1081 }
1082
1083 static int
bytesiobuf_traverse(bytesiobuf * self,visitproc visit,void * arg)1084 bytesiobuf_traverse(bytesiobuf *self, visitproc visit, void *arg)
1085 {
1086 Py_VISIT(self->source);
1087 return 0;
1088 }
1089
1090 static void
bytesiobuf_dealloc(bytesiobuf * self)1091 bytesiobuf_dealloc(bytesiobuf *self)
1092 {
1093 /* bpo-31095: UnTrack is needed before calling any callbacks */
1094 PyObject_GC_UnTrack(self);
1095 Py_CLEAR(self->source);
1096 Py_TYPE(self)->tp_free(self);
1097 }
1098
1099 static PyBufferProcs bytesiobuf_as_buffer = {
1100 (getbufferproc) bytesiobuf_getbuffer,
1101 (releasebufferproc) bytesiobuf_releasebuffer,
1102 };
1103
1104 PyTypeObject _PyBytesIOBuffer_Type = {
1105 PyVarObject_HEAD_INIT(NULL, 0)
1106 "_io._BytesIOBuffer", /*tp_name*/
1107 sizeof(bytesiobuf), /*tp_basicsize*/
1108 0, /*tp_itemsize*/
1109 (destructor)bytesiobuf_dealloc, /*tp_dealloc*/
1110 0, /*tp_vectorcall_offset*/
1111 0, /*tp_getattr*/
1112 0, /*tp_setattr*/
1113 0, /*tp_as_async*/
1114 0, /*tp_repr*/
1115 0, /*tp_as_number*/
1116 0, /*tp_as_sequence*/
1117 0, /*tp_as_mapping*/
1118 0, /*tp_hash*/
1119 0, /*tp_call*/
1120 0, /*tp_str*/
1121 0, /*tp_getattro*/
1122 0, /*tp_setattro*/
1123 &bytesiobuf_as_buffer, /*tp_as_buffer*/
1124 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1125 0, /*tp_doc*/
1126 (traverseproc)bytesiobuf_traverse, /*tp_traverse*/
1127 0, /*tp_clear*/
1128 0, /*tp_richcompare*/
1129 0, /*tp_weaklistoffset*/
1130 0, /*tp_iter*/
1131 0, /*tp_iternext*/
1132 0, /*tp_methods*/
1133 0, /*tp_members*/
1134 0, /*tp_getset*/
1135 0, /*tp_base*/
1136 0, /*tp_dict*/
1137 0, /*tp_descr_get*/
1138 0, /*tp_descr_set*/
1139 0, /*tp_dictoffset*/
1140 0, /*tp_init*/
1141 0, /*tp_alloc*/
1142 0, /*tp_new*/
1143 };
1144