1 #define PY_SSIZE_T_CLEAN
2 #include "Python.h"
3 #include "structmember.h"
4 #include "accu.h"
5 #include "_iomodule.h"
6
7 /* Implementation note: the buffer is always at least one character longer
8 than the enclosed string, for proper functioning of _PyIO_find_line_ending.
9 */
10
11 #define STATE_REALIZED 1
12 #define STATE_ACCUMULATING 2
13
14 /*[clinic input]
15 module _io
16 class _io.StringIO "stringio *" "&PyStringIO_Type"
17 [clinic start generated code]*/
18 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c17bc0f42165cd7d]*/
19
20 typedef struct {
21 PyObject_HEAD
22 Py_UCS4 *buf;
23 Py_ssize_t pos;
24 Py_ssize_t string_size;
25 size_t buf_size;
26
27 /* The stringio object can be in two states: accumulating or realized.
28 In accumulating state, the internal buffer contains nothing and
29 the contents are given by the embedded _PyAccu structure.
30 In realized state, the internal buffer is meaningful and the
31 _PyAccu is destroyed.
32 */
33 int state;
34 _PyAccu accu;
35
36 char ok; /* initialized? */
37 char closed;
38 char readuniversal;
39 char readtranslate;
40 PyObject *decoder;
41 PyObject *readnl;
42 PyObject *writenl;
43
44 PyObject *dict;
45 PyObject *weakreflist;
46 } stringio;
47
48 static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs);
49
50 #define CHECK_INITIALIZED(self) \
51 if (self->ok <= 0) { \
52 PyErr_SetString(PyExc_ValueError, \
53 "I/O operation on uninitialized object"); \
54 return NULL; \
55 }
56
57 #define CHECK_CLOSED(self) \
58 if (self->closed) { \
59 PyErr_SetString(PyExc_ValueError, \
60 "I/O operation on closed file"); \
61 return NULL; \
62 }
63
64 #define ENSURE_REALIZED(self) \
65 if (realize(self) < 0) { \
66 return NULL; \
67 }
68
69
70 /* Internal routine for changing the size, in terms of characters, of the
71 buffer of StringIO objects. The caller should ensure that the 'size'
72 argument is non-negative. Returns 0 on success, -1 otherwise. */
73 static int
resize_buffer(stringio * self,size_t size)74 resize_buffer(stringio *self, size_t size)
75 {
76 /* Here, unsigned types are used to avoid dealing with signed integer
77 overflow, which is undefined in C. */
78 size_t alloc = self->buf_size;
79 Py_UCS4 *new_buf = NULL;
80
81 assert(self->buf != NULL);
82
83 /* Reserve one more char for line ending detection. */
84 size = size + 1;
85 /* For simplicity, stay in the range of the signed type. Anyway, Python
86 doesn't allow strings to be longer than this. */
87 if (size > PY_SSIZE_T_MAX)
88 goto overflow;
89
90 if (size < alloc / 2) {
91 /* Major downsize; resize down to exact size. */
92 alloc = size + 1;
93 }
94 else if (size < alloc) {
95 /* Within allocated size; quick exit */
96 return 0;
97 }
98 else if (size <= alloc * 1.125) {
99 /* Moderate upsize; overallocate similar to list_resize() */
100 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
101 }
102 else {
103 /* Major upsize; resize up to exact size */
104 alloc = size + 1;
105 }
106
107 if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
108 goto overflow;
109 new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
110 if (new_buf == NULL) {
111 PyErr_NoMemory();
112 return -1;
113 }
114 self->buf_size = alloc;
115 self->buf = new_buf;
116
117 return 0;
118
119 overflow:
120 PyErr_SetString(PyExc_OverflowError,
121 "new buffer size too large");
122 return -1;
123 }
124
125 static PyObject *
make_intermediate(stringio * self)126 make_intermediate(stringio *self)
127 {
128 PyObject *intermediate = _PyAccu_Finish(&self->accu);
129 self->state = STATE_REALIZED;
130 if (intermediate == NULL)
131 return NULL;
132 if (_PyAccu_Init(&self->accu) ||
133 _PyAccu_Accumulate(&self->accu, intermediate)) {
134 Py_DECREF(intermediate);
135 return NULL;
136 }
137 self->state = STATE_ACCUMULATING;
138 return intermediate;
139 }
140
141 static int
realize(stringio * self)142 realize(stringio *self)
143 {
144 Py_ssize_t len;
145 PyObject *intermediate;
146
147 if (self->state == STATE_REALIZED)
148 return 0;
149 assert(self->state == STATE_ACCUMULATING);
150 self->state = STATE_REALIZED;
151
152 intermediate = _PyAccu_Finish(&self->accu);
153 if (intermediate == NULL)
154 return -1;
155
156 /* Append the intermediate string to the internal buffer.
157 The length should be equal to the current cursor position.
158 */
159 len = PyUnicode_GET_LENGTH(intermediate);
160 if (resize_buffer(self, len) < 0) {
161 Py_DECREF(intermediate);
162 return -1;
163 }
164 if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
165 Py_DECREF(intermediate);
166 return -1;
167 }
168
169 Py_DECREF(intermediate);
170 return 0;
171 }
172
173 /* Internal routine for writing a whole PyUnicode object to the buffer of a
174 StringIO object. Returns 0 on success, or -1 on error. */
175 static Py_ssize_t
write_str(stringio * self,PyObject * obj)176 write_str(stringio *self, PyObject *obj)
177 {
178 Py_ssize_t len;
179 PyObject *decoded = NULL;
180
181 assert(self->buf != NULL);
182 assert(self->pos >= 0);
183
184 if (self->decoder != NULL) {
185 decoded = _PyIncrementalNewlineDecoder_decode(
186 self->decoder, obj, 1 /* always final */);
187 }
188 else {
189 decoded = obj;
190 Py_INCREF(decoded);
191 }
192 if (self->writenl) {
193 PyObject *translated = PyUnicode_Replace(
194 decoded, _PyIO_str_nl, self->writenl, -1);
195 Py_DECREF(decoded);
196 decoded = translated;
197 }
198 if (decoded == NULL)
199 return -1;
200
201 assert(PyUnicode_Check(decoded));
202 if (PyUnicode_READY(decoded)) {
203 Py_DECREF(decoded);
204 return -1;
205 }
206 len = PyUnicode_GET_LENGTH(decoded);
207 assert(len >= 0);
208
209 /* This overflow check is not strictly necessary. However, it avoids us to
210 deal with funky things like comparing an unsigned and a signed
211 integer. */
212 if (self->pos > PY_SSIZE_T_MAX - len) {
213 PyErr_SetString(PyExc_OverflowError,
214 "new position too large");
215 goto fail;
216 }
217
218 if (self->state == STATE_ACCUMULATING) {
219 if (self->string_size == self->pos) {
220 if (_PyAccu_Accumulate(&self->accu, decoded))
221 goto fail;
222 goto success;
223 }
224 if (realize(self))
225 goto fail;
226 }
227
228 if (self->pos + len > self->string_size) {
229 if (resize_buffer(self, self->pos + len) < 0)
230 goto fail;
231 }
232
233 if (self->pos > self->string_size) {
234 /* In case of overseek, pad with null bytes the buffer region between
235 the end of stream and the current position.
236
237 0 lo string_size hi
238 | |<---used--->|<----------available----------->|
239 | | <--to pad-->|<---to write---> |
240 0 buf position
241
242 */
243 memset(self->buf + self->string_size, '\0',
244 (self->pos - self->string_size) * sizeof(Py_UCS4));
245 }
246
247 /* Copy the data to the internal buffer, overwriting some of the
248 existing data if self->pos < self->string_size. */
249 if (!PyUnicode_AsUCS4(decoded,
250 self->buf + self->pos,
251 self->buf_size - self->pos,
252 0))
253 goto fail;
254
255 success:
256 /* Set the new length of the internal string if it has changed. */
257 self->pos += len;
258 if (self->string_size < self->pos)
259 self->string_size = self->pos;
260
261 Py_DECREF(decoded);
262 return 0;
263
264 fail:
265 Py_XDECREF(decoded);
266 return -1;
267 }
268
269 /*[clinic input]
270 _io.StringIO.getvalue
271
272 Retrieve the entire contents of the object.
273 [clinic start generated code]*/
274
275 static PyObject *
_io_StringIO_getvalue_impl(stringio * self)276 _io_StringIO_getvalue_impl(stringio *self)
277 /*[clinic end generated code: output=27b6a7bfeaebce01 input=d23cb81d6791cf88]*/
278 {
279 CHECK_INITIALIZED(self);
280 CHECK_CLOSED(self);
281 if (self->state == STATE_ACCUMULATING)
282 return make_intermediate(self);
283 return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
284 self->string_size);
285 }
286
287 /*[clinic input]
288 _io.StringIO.tell
289
290 Tell the current file position.
291 [clinic start generated code]*/
292
293 static PyObject *
_io_StringIO_tell_impl(stringio * self)294 _io_StringIO_tell_impl(stringio *self)
295 /*[clinic end generated code: output=2e87ac67b116c77b input=ec866ebaff02f405]*/
296 {
297 CHECK_INITIALIZED(self);
298 CHECK_CLOSED(self);
299 return PyLong_FromSsize_t(self->pos);
300 }
301
302 /*[clinic input]
303 _io.StringIO.read
304 size: Py_ssize_t(accept={int, NoneType}) = -1
305 /
306
307 Read at most size characters, returned as a string.
308
309 If the argument is negative or omitted, read until EOF
310 is reached. Return an empty string at EOF.
311 [clinic start generated code]*/
312
313 static PyObject *
_io_StringIO_read_impl(stringio * self,Py_ssize_t size)314 _io_StringIO_read_impl(stringio *self, Py_ssize_t size)
315 /*[clinic end generated code: output=ae8cf6002f71626c input=0921093383dfb92d]*/
316 {
317 Py_ssize_t n;
318 Py_UCS4 *output;
319
320 CHECK_INITIALIZED(self);
321 CHECK_CLOSED(self);
322
323 /* adjust invalid sizes */
324 n = self->string_size - self->pos;
325 if (size < 0 || size > n) {
326 size = n;
327 if (size < 0)
328 size = 0;
329 }
330
331 /* Optimization for seek(0); read() */
332 if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
333 PyObject *result = make_intermediate(self);
334 self->pos = self->string_size;
335 return result;
336 }
337
338 ENSURE_REALIZED(self);
339 output = self->buf + self->pos;
340 self->pos += size;
341 return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
342 }
343
344 /* Internal helper, used by stringio_readline and stringio_iternext */
345 static PyObject *
_stringio_readline(stringio * self,Py_ssize_t limit)346 _stringio_readline(stringio *self, Py_ssize_t limit)
347 {
348 Py_UCS4 *start, *end, old_char;
349 Py_ssize_t len, consumed;
350
351 /* In case of overseek, return the empty string */
352 if (self->pos >= self->string_size)
353 return PyUnicode_New(0, 0);
354
355 start = self->buf + self->pos;
356 if (limit < 0 || limit > self->string_size - self->pos)
357 limit = self->string_size - self->pos;
358
359 end = start + limit;
360 old_char = *end;
361 *end = '\0';
362 len = _PyIO_find_line_ending(
363 self->readtranslate, self->readuniversal, self->readnl,
364 PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
365 *end = old_char;
366 /* If we haven't found any line ending, we just return everything
367 (`consumed` is ignored). */
368 if (len < 0)
369 len = limit;
370 self->pos += len;
371 return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
372 }
373
374 /*[clinic input]
375 _io.StringIO.readline
376 size: Py_ssize_t(accept={int, NoneType}) = -1
377 /
378
379 Read until newline or EOF.
380
381 Returns an empty string if EOF is hit immediately.
382 [clinic start generated code]*/
383
384 static PyObject *
_io_StringIO_readline_impl(stringio * self,Py_ssize_t size)385 _io_StringIO_readline_impl(stringio *self, Py_ssize_t size)
386 /*[clinic end generated code: output=cabd6452f1b7e85d input=a5bd70bf682aa276]*/
387 {
388 CHECK_INITIALIZED(self);
389 CHECK_CLOSED(self);
390 ENSURE_REALIZED(self);
391
392 return _stringio_readline(self, size);
393 }
394
395 static PyObject *
stringio_iternext(stringio * self)396 stringio_iternext(stringio *self)
397 {
398 PyObject *line;
399
400 CHECK_INITIALIZED(self);
401 CHECK_CLOSED(self);
402 ENSURE_REALIZED(self);
403
404 if (Py_TYPE(self) == &PyStringIO_Type) {
405 /* Skip method call overhead for speed */
406 line = _stringio_readline(self, -1);
407 }
408 else {
409 /* XXX is subclassing StringIO really supported? */
410 line = PyObject_CallMethodObjArgs((PyObject *)self,
411 _PyIO_str_readline, NULL);
412 if (line && !PyUnicode_Check(line)) {
413 PyErr_Format(PyExc_OSError,
414 "readline() should have returned a str object, "
415 "not '%.200s'", Py_TYPE(line)->tp_name);
416 Py_DECREF(line);
417 return NULL;
418 }
419 }
420
421 if (line == NULL)
422 return NULL;
423
424 if (PyUnicode_GET_LENGTH(line) == 0) {
425 /* Reached EOF */
426 Py_DECREF(line);
427 return NULL;
428 }
429
430 return line;
431 }
432
433 /*[clinic input]
434 _io.StringIO.truncate
435 pos as size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None
436 /
437
438 Truncate size to pos.
439
440 The pos argument defaults to the current file position, as
441 returned by tell(). The current file position is unchanged.
442 Returns the new absolute position.
443 [clinic start generated code]*/
444
445 static PyObject *
_io_StringIO_truncate_impl(stringio * self,Py_ssize_t size)446 _io_StringIO_truncate_impl(stringio *self, Py_ssize_t size)
447 /*[clinic end generated code: output=eb3aef8e06701365 input=5505cff90ca48b96]*/
448 {
449 CHECK_INITIALIZED(self);
450 CHECK_CLOSED(self);
451
452 if (size < 0) {
453 PyErr_Format(PyExc_ValueError,
454 "Negative size value %zd", size);
455 return NULL;
456 }
457
458 if (size < self->string_size) {
459 ENSURE_REALIZED(self);
460 if (resize_buffer(self, size) < 0)
461 return NULL;
462 self->string_size = size;
463 }
464
465 return PyLong_FromSsize_t(size);
466 }
467
468 /*[clinic input]
469 _io.StringIO.seek
470 pos: Py_ssize_t
471 whence: int = 0
472 /
473
474 Change stream position.
475
476 Seek to character offset pos relative to position indicated by whence:
477 0 Start of stream (the default). pos should be >= 0;
478 1 Current position - pos must be 0;
479 2 End of stream - pos must be 0.
480 Returns the new absolute position.
481 [clinic start generated code]*/
482
483 static PyObject *
_io_StringIO_seek_impl(stringio * self,Py_ssize_t pos,int whence)484 _io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence)
485 /*[clinic end generated code: output=e9e0ac9a8ae71c25 input=e3855b24e7cae06a]*/
486 {
487 CHECK_INITIALIZED(self);
488 CHECK_CLOSED(self);
489
490 if (whence != 0 && whence != 1 && whence != 2) {
491 PyErr_Format(PyExc_ValueError,
492 "Invalid whence (%i, should be 0, 1 or 2)", whence);
493 return NULL;
494 }
495 else if (pos < 0 && whence == 0) {
496 PyErr_Format(PyExc_ValueError,
497 "Negative seek position %zd", pos);
498 return NULL;
499 }
500 else if (whence != 0 && pos != 0) {
501 PyErr_SetString(PyExc_OSError,
502 "Can't do nonzero cur-relative seeks");
503 return NULL;
504 }
505
506 /* whence = 0: offset relative to beginning of the string.
507 whence = 1: no change to current position.
508 whence = 2: change position to end of file. */
509 if (whence == 1) {
510 pos = self->pos;
511 }
512 else if (whence == 2) {
513 pos = self->string_size;
514 }
515
516 self->pos = pos;
517
518 return PyLong_FromSsize_t(self->pos);
519 }
520
521 /*[clinic input]
522 _io.StringIO.write
523 s as obj: object
524 /
525
526 Write string to file.
527
528 Returns the number of characters written, which is always equal to
529 the length of the string.
530 [clinic start generated code]*/
531
532 static PyObject *
_io_StringIO_write(stringio * self,PyObject * obj)533 _io_StringIO_write(stringio *self, PyObject *obj)
534 /*[clinic end generated code: output=0deaba91a15b94da input=cf96f3b16586e669]*/
535 {
536 Py_ssize_t size;
537
538 CHECK_INITIALIZED(self);
539 if (!PyUnicode_Check(obj)) {
540 PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
541 Py_TYPE(obj)->tp_name);
542 return NULL;
543 }
544 if (PyUnicode_READY(obj))
545 return NULL;
546 CHECK_CLOSED(self);
547 size = PyUnicode_GET_LENGTH(obj);
548
549 if (size > 0 && write_str(self, obj) < 0)
550 return NULL;
551
552 return PyLong_FromSsize_t(size);
553 }
554
555 /*[clinic input]
556 _io.StringIO.close
557
558 Close the IO object.
559
560 Attempting any further operation after the object is closed
561 will raise a ValueError.
562
563 This method has no effect if the file is already closed.
564 [clinic start generated code]*/
565
566 static PyObject *
_io_StringIO_close_impl(stringio * self)567 _io_StringIO_close_impl(stringio *self)
568 /*[clinic end generated code: output=04399355cbe518f1 input=cbc10b45f35d6d46]*/
569 {
570 self->closed = 1;
571 /* Free up some memory */
572 if (resize_buffer(self, 0) < 0)
573 return NULL;
574 _PyAccu_Destroy(&self->accu);
575 Py_CLEAR(self->readnl);
576 Py_CLEAR(self->writenl);
577 Py_CLEAR(self->decoder);
578 Py_RETURN_NONE;
579 }
580
581 static int
stringio_traverse(stringio * self,visitproc visit,void * arg)582 stringio_traverse(stringio *self, visitproc visit, void *arg)
583 {
584 Py_VISIT(self->dict);
585 return 0;
586 }
587
588 static int
stringio_clear(stringio * self)589 stringio_clear(stringio *self)
590 {
591 Py_CLEAR(self->dict);
592 return 0;
593 }
594
595 static void
stringio_dealloc(stringio * self)596 stringio_dealloc(stringio *self)
597 {
598 _PyObject_GC_UNTRACK(self);
599 self->ok = 0;
600 if (self->buf) {
601 PyMem_Free(self->buf);
602 self->buf = NULL;
603 }
604 _PyAccu_Destroy(&self->accu);
605 Py_CLEAR(self->readnl);
606 Py_CLEAR(self->writenl);
607 Py_CLEAR(self->decoder);
608 Py_CLEAR(self->dict);
609 if (self->weakreflist != NULL)
610 PyObject_ClearWeakRefs((PyObject *) self);
611 Py_TYPE(self)->tp_free(self);
612 }
613
614 static PyObject *
stringio_new(PyTypeObject * type,PyObject * args,PyObject * kwds)615 stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
616 {
617 stringio *self;
618
619 assert(type != NULL && type->tp_alloc != NULL);
620 self = (stringio *)type->tp_alloc(type, 0);
621 if (self == NULL)
622 return NULL;
623
624 /* tp_alloc initializes all the fields to zero. So we don't have to
625 initialize them here. */
626
627 self->buf = (Py_UCS4 *)PyMem_Malloc(0);
628 if (self->buf == NULL) {
629 Py_DECREF(self);
630 return PyErr_NoMemory();
631 }
632
633 return (PyObject *)self;
634 }
635
636 /*[clinic input]
637 _io.StringIO.__init__
638 initial_value as value: object(c_default="NULL") = ''
639 newline as newline_obj: object(c_default="NULL") = '\n'
640
641 Text I/O implementation using an in-memory buffer.
642
643 The initial_value argument sets the value of object. The newline
644 argument is like the one of TextIOWrapper's constructor.
645 [clinic start generated code]*/
646
647 static int
_io_StringIO___init___impl(stringio * self,PyObject * value,PyObject * newline_obj)648 _io_StringIO___init___impl(stringio *self, PyObject *value,
649 PyObject *newline_obj)
650 /*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/
651 {
652 const char *newline = "\n";
653 Py_ssize_t value_len;
654
655 /* Parse the newline argument. We only want to allow unicode objects or
656 None. */
657 if (newline_obj == Py_None) {
658 newline = NULL;
659 }
660 else if (newline_obj) {
661 if (!PyUnicode_Check(newline_obj)) {
662 PyErr_Format(PyExc_TypeError,
663 "newline must be str or None, not %.200s",
664 Py_TYPE(newline_obj)->tp_name);
665 return -1;
666 }
667 newline = PyUnicode_AsUTF8(newline_obj);
668 if (newline == NULL)
669 return -1;
670 }
671
672 if (newline && newline[0] != '\0'
673 && !(newline[0] == '\n' && newline[1] == '\0')
674 && !(newline[0] == '\r' && newline[1] == '\0')
675 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
676 PyErr_Format(PyExc_ValueError,
677 "illegal newline value: %R", newline_obj);
678 return -1;
679 }
680 if (value && value != Py_None && !PyUnicode_Check(value)) {
681 PyErr_Format(PyExc_TypeError,
682 "initial_value must be str or None, not %.200s",
683 Py_TYPE(value)->tp_name);
684 return -1;
685 }
686
687 self->ok = 0;
688
689 _PyAccu_Destroy(&self->accu);
690 Py_CLEAR(self->readnl);
691 Py_CLEAR(self->writenl);
692 Py_CLEAR(self->decoder);
693
694 assert((newline != NULL && newline_obj != Py_None) ||
695 (newline == NULL && newline_obj == Py_None));
696
697 if (newline) {
698 self->readnl = PyUnicode_FromString(newline);
699 if (self->readnl == NULL)
700 return -1;
701 }
702 self->readuniversal = (newline == NULL || newline[0] == '\0');
703 self->readtranslate = (newline == NULL);
704 /* If newline == "", we don't translate anything.
705 If newline == "\n" or newline == None, we translate to "\n", which is
706 a no-op.
707 (for newline == None, TextIOWrapper translates to os.linesep, but it
708 is pointless for StringIO)
709 */
710 if (newline != NULL && newline[0] == '\r') {
711 self->writenl = self->readnl;
712 Py_INCREF(self->writenl);
713 }
714
715 if (self->readuniversal) {
716 self->decoder = PyObject_CallFunction(
717 (PyObject *)&PyIncrementalNewlineDecoder_Type,
718 "Oi", Py_None, (int) self->readtranslate);
719 if (self->decoder == NULL)
720 return -1;
721 }
722
723 /* Now everything is set up, resize buffer to size of initial value,
724 and copy it */
725 self->string_size = 0;
726 if (value && value != Py_None)
727 value_len = PyUnicode_GetLength(value);
728 else
729 value_len = 0;
730 if (value_len > 0) {
731 /* This is a heuristic, for newline translation might change
732 the string length. */
733 if (resize_buffer(self, 0) < 0)
734 return -1;
735 self->state = STATE_REALIZED;
736 self->pos = 0;
737 if (write_str(self, value) < 0)
738 return -1;
739 }
740 else {
741 /* Empty stringio object, we can start by accumulating */
742 if (resize_buffer(self, 0) < 0)
743 return -1;
744 if (_PyAccu_Init(&self->accu))
745 return -1;
746 self->state = STATE_ACCUMULATING;
747 }
748 self->pos = 0;
749
750 self->closed = 0;
751 self->ok = 1;
752 return 0;
753 }
754
755 /* Properties and pseudo-properties */
756
757 /*[clinic input]
758 _io.StringIO.readable
759
760 Returns True if the IO object can be read.
761 [clinic start generated code]*/
762
763 static PyObject *
_io_StringIO_readable_impl(stringio * self)764 _io_StringIO_readable_impl(stringio *self)
765 /*[clinic end generated code: output=b19d44dd8b1ceb99 input=39ce068b224c21ad]*/
766 {
767 CHECK_INITIALIZED(self);
768 CHECK_CLOSED(self);
769 Py_RETURN_TRUE;
770 }
771
772 /*[clinic input]
773 _io.StringIO.writable
774
775 Returns True if the IO object can be written.
776 [clinic start generated code]*/
777
778 static PyObject *
_io_StringIO_writable_impl(stringio * self)779 _io_StringIO_writable_impl(stringio *self)
780 /*[clinic end generated code: output=13e4dd77187074ca input=7a691353aac38835]*/
781 {
782 CHECK_INITIALIZED(self);
783 CHECK_CLOSED(self);
784 Py_RETURN_TRUE;
785 }
786
787 /*[clinic input]
788 _io.StringIO.seekable
789
790 Returns True if the IO object can be seeked.
791 [clinic start generated code]*/
792
793 static PyObject *
_io_StringIO_seekable_impl(stringio * self)794 _io_StringIO_seekable_impl(stringio *self)
795 /*[clinic end generated code: output=4d20b4641c756879 input=4c606d05b32952e6]*/
796 {
797 CHECK_INITIALIZED(self);
798 CHECK_CLOSED(self);
799 Py_RETURN_TRUE;
800 }
801
802 /* Pickling support.
803
804 The implementation of __getstate__ is similar to the one for BytesIO,
805 except that we also save the newline parameter. For __setstate__ and unlike
806 BytesIO, we call __init__ to restore the object's state. Doing so allows us
807 to avoid decoding the complex newline state while keeping the object
808 representation compact.
809
810 See comment in bytesio.c regarding why only pickle protocols and onward are
811 supported.
812 */
813
814 static PyObject *
stringio_getstate(stringio * self)815 stringio_getstate(stringio *self)
816 {
817 PyObject *initvalue = _io_StringIO_getvalue_impl(self);
818 PyObject *dict;
819 PyObject *state;
820
821 if (initvalue == NULL)
822 return NULL;
823 if (self->dict == NULL) {
824 Py_INCREF(Py_None);
825 dict = Py_None;
826 }
827 else {
828 dict = PyDict_Copy(self->dict);
829 if (dict == NULL) {
830 Py_DECREF(initvalue);
831 return NULL;
832 }
833 }
834
835 state = Py_BuildValue("(OOnN)", initvalue,
836 self->readnl ? self->readnl : Py_None,
837 self->pos, dict);
838 Py_DECREF(initvalue);
839 return state;
840 }
841
842 static PyObject *
stringio_setstate(stringio * self,PyObject * state)843 stringio_setstate(stringio *self, PyObject *state)
844 {
845 PyObject *initarg;
846 PyObject *position_obj;
847 PyObject *dict;
848 Py_ssize_t pos;
849
850 assert(state != NULL);
851 CHECK_CLOSED(self);
852
853 /* We allow the state tuple to be longer than 4, because we may need
854 someday to extend the object's state without breaking
855 backward-compatibility. */
856 if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) {
857 PyErr_Format(PyExc_TypeError,
858 "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
859 Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
860 return NULL;
861 }
862
863 /* Initialize the object's state. */
864 initarg = PyTuple_GetSlice(state, 0, 2);
865 if (initarg == NULL)
866 return NULL;
867 if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) {
868 Py_DECREF(initarg);
869 return NULL;
870 }
871 Py_DECREF(initarg);
872
873 /* Restore the buffer state. Even if __init__ did initialize the buffer,
874 we have to initialize it again since __init__ may translate the
875 newlines in the initial_value string. We clearly do not want that
876 because the string value in the state tuple has already been translated
877 once by __init__. So we do not take any chance and replace object's
878 buffer completely. */
879 {
880 PyObject *item;
881 Py_UCS4 *buf;
882 Py_ssize_t bufsize;
883
884 item = PyTuple_GET_ITEM(state, 0);
885 buf = PyUnicode_AsUCS4Copy(item);
886 if (buf == NULL)
887 return NULL;
888 bufsize = PyUnicode_GET_LENGTH(item);
889
890 if (resize_buffer(self, bufsize) < 0) {
891 PyMem_Free(buf);
892 return NULL;
893 }
894 memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
895 PyMem_Free(buf);
896 self->string_size = bufsize;
897 }
898
899 /* Set carefully the position value. Alternatively, we could use the seek
900 method instead of modifying self->pos directly to better protect the
901 object internal state against errneous (or malicious) inputs. */
902 position_obj = PyTuple_GET_ITEM(state, 2);
903 if (!PyLong_Check(position_obj)) {
904 PyErr_Format(PyExc_TypeError,
905 "third item of state must be an integer, got %.200s",
906 Py_TYPE(position_obj)->tp_name);
907 return NULL;
908 }
909 pos = PyLong_AsSsize_t(position_obj);
910 if (pos == -1 && PyErr_Occurred())
911 return NULL;
912 if (pos < 0) {
913 PyErr_SetString(PyExc_ValueError,
914 "position value cannot be negative");
915 return NULL;
916 }
917 self->pos = pos;
918
919 /* Set the dictionary of the instance variables. */
920 dict = PyTuple_GET_ITEM(state, 3);
921 if (dict != Py_None) {
922 if (!PyDict_Check(dict)) {
923 PyErr_Format(PyExc_TypeError,
924 "fourth item of state should be a dict, got a %.200s",
925 Py_TYPE(dict)->tp_name);
926 return NULL;
927 }
928 if (self->dict) {
929 /* Alternatively, we could replace the internal dictionary
930 completely. However, it seems more practical to just update it. */
931 if (PyDict_Update(self->dict, dict) < 0)
932 return NULL;
933 }
934 else {
935 Py_INCREF(dict);
936 self->dict = dict;
937 }
938 }
939
940 Py_RETURN_NONE;
941 }
942
943
944 static PyObject *
stringio_closed(stringio * self,void * context)945 stringio_closed(stringio *self, void *context)
946 {
947 CHECK_INITIALIZED(self);
948 return PyBool_FromLong(self->closed);
949 }
950
951 static PyObject *
stringio_line_buffering(stringio * self,void * context)952 stringio_line_buffering(stringio *self, void *context)
953 {
954 CHECK_INITIALIZED(self);
955 CHECK_CLOSED(self);
956 Py_RETURN_FALSE;
957 }
958
959 static PyObject *
stringio_newlines(stringio * self,void * context)960 stringio_newlines(stringio *self, void *context)
961 {
962 CHECK_INITIALIZED(self);
963 CHECK_CLOSED(self);
964 if (self->decoder == NULL)
965 Py_RETURN_NONE;
966 return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
967 }
968
969 #include "clinic/stringio.c.h"
970
971 static struct PyMethodDef stringio_methods[] = {
972 _IO_STRINGIO_CLOSE_METHODDEF
973 _IO_STRINGIO_GETVALUE_METHODDEF
974 _IO_STRINGIO_READ_METHODDEF
975 _IO_STRINGIO_READLINE_METHODDEF
976 _IO_STRINGIO_TELL_METHODDEF
977 _IO_STRINGIO_TRUNCATE_METHODDEF
978 _IO_STRINGIO_SEEK_METHODDEF
979 _IO_STRINGIO_WRITE_METHODDEF
980
981 _IO_STRINGIO_SEEKABLE_METHODDEF
982 _IO_STRINGIO_READABLE_METHODDEF
983 _IO_STRINGIO_WRITABLE_METHODDEF
984
985 {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
986 {"__setstate__", (PyCFunction)stringio_setstate, METH_O},
987 {NULL, NULL} /* sentinel */
988 };
989
990 static PyGetSetDef stringio_getset[] = {
991 {"closed", (getter)stringio_closed, NULL, NULL},
992 {"newlines", (getter)stringio_newlines, NULL, NULL},
993 /* (following comments straight off of the original Python wrapper:)
994 XXX Cruft to support the TextIOWrapper API. This would only
995 be meaningful if StringIO supported the buffer attribute.
996 Hopefully, a better solution, than adding these pseudo-attributes,
997 will be found.
998 */
999 {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
1000 {NULL}
1001 };
1002
1003 PyTypeObject PyStringIO_Type = {
1004 PyVarObject_HEAD_INIT(NULL, 0)
1005 "_io.StringIO", /*tp_name*/
1006 sizeof(stringio), /*tp_basicsize*/
1007 0, /*tp_itemsize*/
1008 (destructor)stringio_dealloc, /*tp_dealloc*/
1009 0, /*tp_print*/
1010 0, /*tp_getattr*/
1011 0, /*tp_setattr*/
1012 0, /*tp_reserved*/
1013 0, /*tp_repr*/
1014 0, /*tp_as_number*/
1015 0, /*tp_as_sequence*/
1016 0, /*tp_as_mapping*/
1017 0, /*tp_hash*/
1018 0, /*tp_call*/
1019 0, /*tp_str*/
1020 0, /*tp_getattro*/
1021 0, /*tp_setattro*/
1022 0, /*tp_as_buffer*/
1023 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
1024 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1025 _io_StringIO___init____doc__, /*tp_doc*/
1026 (traverseproc)stringio_traverse, /*tp_traverse*/
1027 (inquiry)stringio_clear, /*tp_clear*/
1028 0, /*tp_richcompare*/
1029 offsetof(stringio, weakreflist), /*tp_weaklistoffset*/
1030 0, /*tp_iter*/
1031 (iternextfunc)stringio_iternext, /*tp_iternext*/
1032 stringio_methods, /*tp_methods*/
1033 0, /*tp_members*/
1034 stringio_getset, /*tp_getset*/
1035 0, /*tp_base*/
1036 0, /*tp_dict*/
1037 0, /*tp_descr_get*/
1038 0, /*tp_descr_set*/
1039 offsetof(stringio, dict), /*tp_dictoffset*/
1040 _io_StringIO___init__, /*tp_init*/
1041 0, /*tp_alloc*/
1042 stringio_new, /*tp_new*/
1043 };
1044