• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #define PY_SSIZE_T_CLEAN
2 #include "Python.h"
3 #include "structmember.h"
4 #include "accu.h"
5 #include "_iomodule.h"
6 
7 /* Implementation note: the buffer is always at least one character longer
8    than the enclosed string, for proper functioning of _PyIO_find_line_ending.
9 */
10 
11 #define STATE_REALIZED 1
12 #define STATE_ACCUMULATING 2
13 
14 /*[clinic input]
15 module _io
16 class _io.StringIO "stringio *" "&PyStringIO_Type"
17 [clinic start generated code]*/
18 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c17bc0f42165cd7d]*/
19 
20 typedef struct {
21     PyObject_HEAD
22     Py_UCS4 *buf;
23     Py_ssize_t pos;
24     Py_ssize_t string_size;
25     size_t buf_size;
26 
27     /* The stringio object can be in two states: accumulating or realized.
28        In accumulating state, the internal buffer contains nothing and
29        the contents are given by the embedded _PyAccu structure.
30        In realized state, the internal buffer is meaningful and the
31        _PyAccu is destroyed.
32     */
33     int state;
34     _PyAccu accu;
35 
36     char ok; /* initialized? */
37     char closed;
38     char readuniversal;
39     char readtranslate;
40     PyObject *decoder;
41     PyObject *readnl;
42     PyObject *writenl;
43 
44     PyObject *dict;
45     PyObject *weakreflist;
46 } stringio;
47 
48 static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs);
49 
50 #define CHECK_INITIALIZED(self) \
51     if (self->ok <= 0) { \
52         PyErr_SetString(PyExc_ValueError, \
53             "I/O operation on uninitialized object"); \
54         return NULL; \
55     }
56 
57 #define CHECK_CLOSED(self) \
58     if (self->closed) { \
59         PyErr_SetString(PyExc_ValueError, \
60             "I/O operation on closed file"); \
61         return NULL; \
62     }
63 
64 #define ENSURE_REALIZED(self) \
65     if (realize(self) < 0) { \
66         return NULL; \
67     }
68 
69 
70 /* Internal routine for changing the size, in terms of characters, of the
71    buffer of StringIO objects.  The caller should ensure that the 'size'
72    argument is non-negative.  Returns 0 on success, -1 otherwise. */
73 static int
resize_buffer(stringio * self,size_t size)74 resize_buffer(stringio *self, size_t size)
75 {
76     /* Here, unsigned types are used to avoid dealing with signed integer
77        overflow, which is undefined in C. */
78     size_t alloc = self->buf_size;
79     Py_UCS4 *new_buf = NULL;
80 
81     assert(self->buf != NULL);
82 
83     /* Reserve one more char for line ending detection. */
84     size = size + 1;
85     /* For simplicity, stay in the range of the signed type. Anyway, Python
86        doesn't allow strings to be longer than this. */
87     if (size > PY_SSIZE_T_MAX)
88         goto overflow;
89 
90     if (size < alloc / 2) {
91         /* Major downsize; resize down to exact size. */
92         alloc = size + 1;
93     }
94     else if (size < alloc) {
95         /* Within allocated size; quick exit */
96         return 0;
97     }
98     else if (size <= alloc * 1.125) {
99         /* Moderate upsize; overallocate similar to list_resize() */
100         alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
101     }
102     else {
103         /* Major upsize; resize up to exact size */
104         alloc = size + 1;
105     }
106 
107     if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
108         goto overflow;
109     new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
110     if (new_buf == NULL) {
111         PyErr_NoMemory();
112         return -1;
113     }
114     self->buf_size = alloc;
115     self->buf = new_buf;
116 
117     return 0;
118 
119   overflow:
120     PyErr_SetString(PyExc_OverflowError,
121                     "new buffer size too large");
122     return -1;
123 }
124 
125 static PyObject *
make_intermediate(stringio * self)126 make_intermediate(stringio *self)
127 {
128     PyObject *intermediate = _PyAccu_Finish(&self->accu);
129     self->state = STATE_REALIZED;
130     if (intermediate == NULL)
131         return NULL;
132     if (_PyAccu_Init(&self->accu) ||
133         _PyAccu_Accumulate(&self->accu, intermediate)) {
134         Py_DECREF(intermediate);
135         return NULL;
136     }
137     self->state = STATE_ACCUMULATING;
138     return intermediate;
139 }
140 
141 static int
realize(stringio * self)142 realize(stringio *self)
143 {
144     Py_ssize_t len;
145     PyObject *intermediate;
146 
147     if (self->state == STATE_REALIZED)
148         return 0;
149     assert(self->state == STATE_ACCUMULATING);
150     self->state = STATE_REALIZED;
151 
152     intermediate = _PyAccu_Finish(&self->accu);
153     if (intermediate == NULL)
154         return -1;
155 
156     /* Append the intermediate string to the internal buffer.
157        The length should be equal to the current cursor position.
158      */
159     len = PyUnicode_GET_LENGTH(intermediate);
160     if (resize_buffer(self, len) < 0) {
161         Py_DECREF(intermediate);
162         return -1;
163     }
164     if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
165         Py_DECREF(intermediate);
166         return -1;
167     }
168 
169     Py_DECREF(intermediate);
170     return 0;
171 }
172 
173 /* Internal routine for writing a whole PyUnicode object to the buffer of a
174    StringIO object. Returns 0 on success, or -1 on error. */
175 static Py_ssize_t
write_str(stringio * self,PyObject * obj)176 write_str(stringio *self, PyObject *obj)
177 {
178     Py_ssize_t len;
179     PyObject *decoded = NULL;
180 
181     assert(self->buf != NULL);
182     assert(self->pos >= 0);
183 
184     if (self->decoder != NULL) {
185         decoded = _PyIncrementalNewlineDecoder_decode(
186             self->decoder, obj, 1 /* always final */);
187     }
188     else {
189         decoded = obj;
190         Py_INCREF(decoded);
191     }
192     if (self->writenl) {
193         PyObject *translated = PyUnicode_Replace(
194             decoded, _PyIO_str_nl, self->writenl, -1);
195         Py_DECREF(decoded);
196         decoded = translated;
197     }
198     if (decoded == NULL)
199         return -1;
200 
201     assert(PyUnicode_Check(decoded));
202     if (PyUnicode_READY(decoded)) {
203         Py_DECREF(decoded);
204         return -1;
205     }
206     len = PyUnicode_GET_LENGTH(decoded);
207     assert(len >= 0);
208 
209     /* This overflow check is not strictly necessary. However, it avoids us to
210        deal with funky things like comparing an unsigned and a signed
211        integer. */
212     if (self->pos > PY_SSIZE_T_MAX - len) {
213         PyErr_SetString(PyExc_OverflowError,
214                         "new position too large");
215         goto fail;
216     }
217 
218     if (self->state == STATE_ACCUMULATING) {
219         if (self->string_size == self->pos) {
220             if (_PyAccu_Accumulate(&self->accu, decoded))
221                 goto fail;
222             goto success;
223         }
224         if (realize(self))
225             goto fail;
226     }
227 
228     if (self->pos + len > self->string_size) {
229         if (resize_buffer(self, self->pos + len) < 0)
230             goto fail;
231     }
232 
233     if (self->pos > self->string_size) {
234         /* In case of overseek, pad with null bytes the buffer region between
235            the end of stream and the current position.
236 
237           0   lo      string_size                           hi
238           |   |<---used--->|<----------available----------->|
239           |   |            <--to pad-->|<---to write--->    |
240           0   buf                   position
241 
242         */
243         memset(self->buf + self->string_size, '\0',
244                (self->pos - self->string_size) * sizeof(Py_UCS4));
245     }
246 
247     /* Copy the data to the internal buffer, overwriting some of the
248        existing data if self->pos < self->string_size. */
249     if (!PyUnicode_AsUCS4(decoded,
250                           self->buf + self->pos,
251                           self->buf_size - self->pos,
252                           0))
253         goto fail;
254 
255 success:
256     /* Set the new length of the internal string if it has changed. */
257     self->pos += len;
258     if (self->string_size < self->pos)
259         self->string_size = self->pos;
260 
261     Py_DECREF(decoded);
262     return 0;
263 
264 fail:
265     Py_XDECREF(decoded);
266     return -1;
267 }
268 
269 /*[clinic input]
270 _io.StringIO.getvalue
271 
272 Retrieve the entire contents of the object.
273 [clinic start generated code]*/
274 
275 static PyObject *
_io_StringIO_getvalue_impl(stringio * self)276 _io_StringIO_getvalue_impl(stringio *self)
277 /*[clinic end generated code: output=27b6a7bfeaebce01 input=d23cb81d6791cf88]*/
278 {
279     CHECK_INITIALIZED(self);
280     CHECK_CLOSED(self);
281     if (self->state == STATE_ACCUMULATING)
282         return make_intermediate(self);
283     return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
284                                      self->string_size);
285 }
286 
287 /*[clinic input]
288 _io.StringIO.tell
289 
290 Tell the current file position.
291 [clinic start generated code]*/
292 
293 static PyObject *
_io_StringIO_tell_impl(stringio * self)294 _io_StringIO_tell_impl(stringio *self)
295 /*[clinic end generated code: output=2e87ac67b116c77b input=ec866ebaff02f405]*/
296 {
297     CHECK_INITIALIZED(self);
298     CHECK_CLOSED(self);
299     return PyLong_FromSsize_t(self->pos);
300 }
301 
302 /*[clinic input]
303 _io.StringIO.read
304     size: Py_ssize_t(accept={int, NoneType}) = -1
305     /
306 
307 Read at most size characters, returned as a string.
308 
309 If the argument is negative or omitted, read until EOF
310 is reached. Return an empty string at EOF.
311 [clinic start generated code]*/
312 
313 static PyObject *
_io_StringIO_read_impl(stringio * self,Py_ssize_t size)314 _io_StringIO_read_impl(stringio *self, Py_ssize_t size)
315 /*[clinic end generated code: output=ae8cf6002f71626c input=0921093383dfb92d]*/
316 {
317     Py_ssize_t n;
318     Py_UCS4 *output;
319 
320     CHECK_INITIALIZED(self);
321     CHECK_CLOSED(self);
322 
323     /* adjust invalid sizes */
324     n = self->string_size - self->pos;
325     if (size < 0 || size > n) {
326         size = n;
327         if (size < 0)
328             size = 0;
329     }
330 
331     /* Optimization for seek(0); read() */
332     if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
333         PyObject *result = make_intermediate(self);
334         self->pos = self->string_size;
335         return result;
336     }
337 
338     ENSURE_REALIZED(self);
339     output = self->buf + self->pos;
340     self->pos += size;
341     return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
342 }
343 
344 /* Internal helper, used by stringio_readline and stringio_iternext */
345 static PyObject *
_stringio_readline(stringio * self,Py_ssize_t limit)346 _stringio_readline(stringio *self, Py_ssize_t limit)
347 {
348     Py_UCS4 *start, *end, old_char;
349     Py_ssize_t len, consumed;
350 
351     /* In case of overseek, return the empty string */
352     if (self->pos >= self->string_size)
353         return PyUnicode_New(0, 0);
354 
355     start = self->buf + self->pos;
356     if (limit < 0 || limit > self->string_size - self->pos)
357         limit = self->string_size - self->pos;
358 
359     end = start + limit;
360     old_char = *end;
361     *end = '\0';
362     len = _PyIO_find_line_ending(
363         self->readtranslate, self->readuniversal, self->readnl,
364         PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
365     *end = old_char;
366     /* If we haven't found any line ending, we just return everything
367        (`consumed` is ignored). */
368     if (len < 0)
369         len = limit;
370     self->pos += len;
371     return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
372 }
373 
374 /*[clinic input]
375 _io.StringIO.readline
376     size: Py_ssize_t(accept={int, NoneType}) = -1
377     /
378 
379 Read until newline or EOF.
380 
381 Returns an empty string if EOF is hit immediately.
382 [clinic start generated code]*/
383 
384 static PyObject *
_io_StringIO_readline_impl(stringio * self,Py_ssize_t size)385 _io_StringIO_readline_impl(stringio *self, Py_ssize_t size)
386 /*[clinic end generated code: output=cabd6452f1b7e85d input=a5bd70bf682aa276]*/
387 {
388     CHECK_INITIALIZED(self);
389     CHECK_CLOSED(self);
390     ENSURE_REALIZED(self);
391 
392     return _stringio_readline(self, size);
393 }
394 
395 static PyObject *
stringio_iternext(stringio * self)396 stringio_iternext(stringio *self)
397 {
398     PyObject *line;
399 
400     CHECK_INITIALIZED(self);
401     CHECK_CLOSED(self);
402     ENSURE_REALIZED(self);
403 
404     if (Py_TYPE(self) == &PyStringIO_Type) {
405         /* Skip method call overhead for speed */
406         line = _stringio_readline(self, -1);
407     }
408     else {
409         /* XXX is subclassing StringIO really supported? */
410         line = PyObject_CallMethodObjArgs((PyObject *)self,
411                                            _PyIO_str_readline, NULL);
412         if (line && !PyUnicode_Check(line)) {
413             PyErr_Format(PyExc_OSError,
414                          "readline() should have returned a str object, "
415                          "not '%.200s'", Py_TYPE(line)->tp_name);
416             Py_DECREF(line);
417             return NULL;
418         }
419     }
420 
421     if (line == NULL)
422         return NULL;
423 
424     if (PyUnicode_GET_LENGTH(line) == 0) {
425         /* Reached EOF */
426         Py_DECREF(line);
427         return NULL;
428     }
429 
430     return line;
431 }
432 
433 /*[clinic input]
434 _io.StringIO.truncate
435     pos as size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None
436     /
437 
438 Truncate size to pos.
439 
440 The pos argument defaults to the current file position, as
441 returned by tell().  The current file position is unchanged.
442 Returns the new absolute position.
443 [clinic start generated code]*/
444 
445 static PyObject *
_io_StringIO_truncate_impl(stringio * self,Py_ssize_t size)446 _io_StringIO_truncate_impl(stringio *self, Py_ssize_t size)
447 /*[clinic end generated code: output=eb3aef8e06701365 input=5505cff90ca48b96]*/
448 {
449     CHECK_INITIALIZED(self);
450     CHECK_CLOSED(self);
451 
452     if (size < 0) {
453         PyErr_Format(PyExc_ValueError,
454                      "Negative size value %zd", size);
455         return NULL;
456     }
457 
458     if (size < self->string_size) {
459         ENSURE_REALIZED(self);
460         if (resize_buffer(self, size) < 0)
461             return NULL;
462         self->string_size = size;
463     }
464 
465     return PyLong_FromSsize_t(size);
466 }
467 
468 /*[clinic input]
469 _io.StringIO.seek
470     pos: Py_ssize_t
471     whence: int = 0
472     /
473 
474 Change stream position.
475 
476 Seek to character offset pos relative to position indicated by whence:
477     0  Start of stream (the default).  pos should be >= 0;
478     1  Current position - pos must be 0;
479     2  End of stream - pos must be 0.
480 Returns the new absolute position.
481 [clinic start generated code]*/
482 
483 static PyObject *
_io_StringIO_seek_impl(stringio * self,Py_ssize_t pos,int whence)484 _io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence)
485 /*[clinic end generated code: output=e9e0ac9a8ae71c25 input=e3855b24e7cae06a]*/
486 {
487     CHECK_INITIALIZED(self);
488     CHECK_CLOSED(self);
489 
490     if (whence != 0 && whence != 1 && whence != 2) {
491         PyErr_Format(PyExc_ValueError,
492                      "Invalid whence (%i, should be 0, 1 or 2)", whence);
493         return NULL;
494     }
495     else if (pos < 0 && whence == 0) {
496         PyErr_Format(PyExc_ValueError,
497                      "Negative seek position %zd", pos);
498         return NULL;
499     }
500     else if (whence != 0 && pos != 0) {
501         PyErr_SetString(PyExc_OSError,
502                         "Can't do nonzero cur-relative seeks");
503         return NULL;
504     }
505 
506     /* whence = 0: offset relative to beginning of the string.
507        whence = 1: no change to current position.
508        whence = 2: change position to end of file. */
509     if (whence == 1) {
510         pos = self->pos;
511     }
512     else if (whence == 2) {
513         pos = self->string_size;
514     }
515 
516     self->pos = pos;
517 
518     return PyLong_FromSsize_t(self->pos);
519 }
520 
521 /*[clinic input]
522 _io.StringIO.write
523     s as obj: object
524     /
525 
526 Write string to file.
527 
528 Returns the number of characters written, which is always equal to
529 the length of the string.
530 [clinic start generated code]*/
531 
532 static PyObject *
_io_StringIO_write(stringio * self,PyObject * obj)533 _io_StringIO_write(stringio *self, PyObject *obj)
534 /*[clinic end generated code: output=0deaba91a15b94da input=cf96f3b16586e669]*/
535 {
536     Py_ssize_t size;
537 
538     CHECK_INITIALIZED(self);
539     if (!PyUnicode_Check(obj)) {
540         PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
541                      Py_TYPE(obj)->tp_name);
542         return NULL;
543     }
544     if (PyUnicode_READY(obj))
545         return NULL;
546     CHECK_CLOSED(self);
547     size = PyUnicode_GET_LENGTH(obj);
548 
549     if (size > 0 && write_str(self, obj) < 0)
550         return NULL;
551 
552     return PyLong_FromSsize_t(size);
553 }
554 
555 /*[clinic input]
556 _io.StringIO.close
557 
558 Close the IO object.
559 
560 Attempting any further operation after the object is closed
561 will raise a ValueError.
562 
563 This method has no effect if the file is already closed.
564 [clinic start generated code]*/
565 
566 static PyObject *
_io_StringIO_close_impl(stringio * self)567 _io_StringIO_close_impl(stringio *self)
568 /*[clinic end generated code: output=04399355cbe518f1 input=cbc10b45f35d6d46]*/
569 {
570     self->closed = 1;
571     /* Free up some memory */
572     if (resize_buffer(self, 0) < 0)
573         return NULL;
574     _PyAccu_Destroy(&self->accu);
575     Py_CLEAR(self->readnl);
576     Py_CLEAR(self->writenl);
577     Py_CLEAR(self->decoder);
578     Py_RETURN_NONE;
579 }
580 
581 static int
stringio_traverse(stringio * self,visitproc visit,void * arg)582 stringio_traverse(stringio *self, visitproc visit, void *arg)
583 {
584     Py_VISIT(self->dict);
585     return 0;
586 }
587 
588 static int
stringio_clear(stringio * self)589 stringio_clear(stringio *self)
590 {
591     Py_CLEAR(self->dict);
592     return 0;
593 }
594 
595 static void
stringio_dealloc(stringio * self)596 stringio_dealloc(stringio *self)
597 {
598     _PyObject_GC_UNTRACK(self);
599     self->ok = 0;
600     if (self->buf) {
601         PyMem_Free(self->buf);
602         self->buf = NULL;
603     }
604     _PyAccu_Destroy(&self->accu);
605     Py_CLEAR(self->readnl);
606     Py_CLEAR(self->writenl);
607     Py_CLEAR(self->decoder);
608     Py_CLEAR(self->dict);
609     if (self->weakreflist != NULL)
610         PyObject_ClearWeakRefs((PyObject *) self);
611     Py_TYPE(self)->tp_free(self);
612 }
613 
614 static PyObject *
stringio_new(PyTypeObject * type,PyObject * args,PyObject * kwds)615 stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
616 {
617     stringio *self;
618 
619     assert(type != NULL && type->tp_alloc != NULL);
620     self = (stringio *)type->tp_alloc(type, 0);
621     if (self == NULL)
622         return NULL;
623 
624     /* tp_alloc initializes all the fields to zero. So we don't have to
625        initialize them here. */
626 
627     self->buf = (Py_UCS4 *)PyMem_Malloc(0);
628     if (self->buf == NULL) {
629         Py_DECREF(self);
630         return PyErr_NoMemory();
631     }
632 
633     return (PyObject *)self;
634 }
635 
636 /*[clinic input]
637 _io.StringIO.__init__
638     initial_value as value: object(c_default="NULL") = ''
639     newline as newline_obj: object(c_default="NULL") = '\n'
640 
641 Text I/O implementation using an in-memory buffer.
642 
643 The initial_value argument sets the value of object.  The newline
644 argument is like the one of TextIOWrapper's constructor.
645 [clinic start generated code]*/
646 
647 static int
_io_StringIO___init___impl(stringio * self,PyObject * value,PyObject * newline_obj)648 _io_StringIO___init___impl(stringio *self, PyObject *value,
649                            PyObject *newline_obj)
650 /*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/
651 {
652     const char *newline = "\n";
653     Py_ssize_t value_len;
654 
655     /* Parse the newline argument. We only want to allow unicode objects or
656        None. */
657     if (newline_obj == Py_None) {
658         newline = NULL;
659     }
660     else if (newline_obj) {
661         if (!PyUnicode_Check(newline_obj)) {
662             PyErr_Format(PyExc_TypeError,
663                          "newline must be str or None, not %.200s",
664                          Py_TYPE(newline_obj)->tp_name);
665             return -1;
666         }
667         newline = PyUnicode_AsUTF8(newline_obj);
668         if (newline == NULL)
669             return -1;
670     }
671 
672     if (newline && newline[0] != '\0'
673         && !(newline[0] == '\n' && newline[1] == '\0')
674         && !(newline[0] == '\r' && newline[1] == '\0')
675         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
676         PyErr_Format(PyExc_ValueError,
677                      "illegal newline value: %R", newline_obj);
678         return -1;
679     }
680     if (value && value != Py_None && !PyUnicode_Check(value)) {
681         PyErr_Format(PyExc_TypeError,
682                      "initial_value must be str or None, not %.200s",
683                      Py_TYPE(value)->tp_name);
684         return -1;
685     }
686 
687     self->ok = 0;
688 
689     _PyAccu_Destroy(&self->accu);
690     Py_CLEAR(self->readnl);
691     Py_CLEAR(self->writenl);
692     Py_CLEAR(self->decoder);
693 
694     assert((newline != NULL && newline_obj != Py_None) ||
695            (newline == NULL && newline_obj == Py_None));
696 
697     if (newline) {
698         self->readnl = PyUnicode_FromString(newline);
699         if (self->readnl == NULL)
700             return -1;
701     }
702     self->readuniversal = (newline == NULL || newline[0] == '\0');
703     self->readtranslate = (newline == NULL);
704     /* If newline == "", we don't translate anything.
705        If newline == "\n" or newline == None, we translate to "\n", which is
706        a no-op.
707        (for newline == None, TextIOWrapper translates to os.linesep, but it
708        is pointless for StringIO)
709     */
710     if (newline != NULL && newline[0] == '\r') {
711         self->writenl = self->readnl;
712         Py_INCREF(self->writenl);
713     }
714 
715     if (self->readuniversal) {
716         self->decoder = PyObject_CallFunction(
717             (PyObject *)&PyIncrementalNewlineDecoder_Type,
718             "Oi", Py_None, (int) self->readtranslate);
719         if (self->decoder == NULL)
720             return -1;
721     }
722 
723     /* Now everything is set up, resize buffer to size of initial value,
724        and copy it */
725     self->string_size = 0;
726     if (value && value != Py_None)
727         value_len = PyUnicode_GetLength(value);
728     else
729         value_len = 0;
730     if (value_len > 0) {
731         /* This is a heuristic, for newline translation might change
732            the string length. */
733         if (resize_buffer(self, 0) < 0)
734             return -1;
735         self->state = STATE_REALIZED;
736         self->pos = 0;
737         if (write_str(self, value) < 0)
738             return -1;
739     }
740     else {
741         /* Empty stringio object, we can start by accumulating */
742         if (resize_buffer(self, 0) < 0)
743             return -1;
744         if (_PyAccu_Init(&self->accu))
745             return -1;
746         self->state = STATE_ACCUMULATING;
747     }
748     self->pos = 0;
749 
750     self->closed = 0;
751     self->ok = 1;
752     return 0;
753 }
754 
755 /* Properties and pseudo-properties */
756 
757 /*[clinic input]
758 _io.StringIO.readable
759 
760 Returns True if the IO object can be read.
761 [clinic start generated code]*/
762 
763 static PyObject *
_io_StringIO_readable_impl(stringio * self)764 _io_StringIO_readable_impl(stringio *self)
765 /*[clinic end generated code: output=b19d44dd8b1ceb99 input=39ce068b224c21ad]*/
766 {
767     CHECK_INITIALIZED(self);
768     CHECK_CLOSED(self);
769     Py_RETURN_TRUE;
770 }
771 
772 /*[clinic input]
773 _io.StringIO.writable
774 
775 Returns True if the IO object can be written.
776 [clinic start generated code]*/
777 
778 static PyObject *
_io_StringIO_writable_impl(stringio * self)779 _io_StringIO_writable_impl(stringio *self)
780 /*[clinic end generated code: output=13e4dd77187074ca input=7a691353aac38835]*/
781 {
782     CHECK_INITIALIZED(self);
783     CHECK_CLOSED(self);
784     Py_RETURN_TRUE;
785 }
786 
787 /*[clinic input]
788 _io.StringIO.seekable
789 
790 Returns True if the IO object can be seeked.
791 [clinic start generated code]*/
792 
793 static PyObject *
_io_StringIO_seekable_impl(stringio * self)794 _io_StringIO_seekable_impl(stringio *self)
795 /*[clinic end generated code: output=4d20b4641c756879 input=4c606d05b32952e6]*/
796 {
797     CHECK_INITIALIZED(self);
798     CHECK_CLOSED(self);
799     Py_RETURN_TRUE;
800 }
801 
802 /* Pickling support.
803 
804    The implementation of __getstate__ is similar to the one for BytesIO,
805    except that we also save the newline parameter. For __setstate__ and unlike
806    BytesIO, we call __init__ to restore the object's state. Doing so allows us
807    to avoid decoding the complex newline state while keeping the object
808    representation compact.
809 
810    See comment in bytesio.c regarding why only pickle protocols and onward are
811    supported.
812 */
813 
814 static PyObject *
stringio_getstate(stringio * self)815 stringio_getstate(stringio *self)
816 {
817     PyObject *initvalue = _io_StringIO_getvalue_impl(self);
818     PyObject *dict;
819     PyObject *state;
820 
821     if (initvalue == NULL)
822         return NULL;
823     if (self->dict == NULL) {
824         Py_INCREF(Py_None);
825         dict = Py_None;
826     }
827     else {
828         dict = PyDict_Copy(self->dict);
829         if (dict == NULL) {
830             Py_DECREF(initvalue);
831             return NULL;
832         }
833     }
834 
835     state = Py_BuildValue("(OOnN)", initvalue,
836                           self->readnl ? self->readnl : Py_None,
837                           self->pos, dict);
838     Py_DECREF(initvalue);
839     return state;
840 }
841 
842 static PyObject *
stringio_setstate(stringio * self,PyObject * state)843 stringio_setstate(stringio *self, PyObject *state)
844 {
845     PyObject *initarg;
846     PyObject *position_obj;
847     PyObject *dict;
848     Py_ssize_t pos;
849 
850     assert(state != NULL);
851     CHECK_CLOSED(self);
852 
853     /* We allow the state tuple to be longer than 4, because we may need
854        someday to extend the object's state without breaking
855        backward-compatibility. */
856     if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) {
857         PyErr_Format(PyExc_TypeError,
858                      "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
859                      Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
860         return NULL;
861     }
862 
863     /* Initialize the object's state. */
864     initarg = PyTuple_GetSlice(state, 0, 2);
865     if (initarg == NULL)
866         return NULL;
867     if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) {
868         Py_DECREF(initarg);
869         return NULL;
870     }
871     Py_DECREF(initarg);
872 
873     /* Restore the buffer state. Even if __init__ did initialize the buffer,
874        we have to initialize it again since __init__ may translate the
875        newlines in the initial_value string. We clearly do not want that
876        because the string value in the state tuple has already been translated
877        once by __init__. So we do not take any chance and replace object's
878        buffer completely. */
879     {
880         PyObject *item;
881         Py_UCS4 *buf;
882         Py_ssize_t bufsize;
883 
884         item = PyTuple_GET_ITEM(state, 0);
885         buf = PyUnicode_AsUCS4Copy(item);
886         if (buf == NULL)
887             return NULL;
888         bufsize = PyUnicode_GET_LENGTH(item);
889 
890         if (resize_buffer(self, bufsize) < 0) {
891             PyMem_Free(buf);
892             return NULL;
893         }
894         memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
895         PyMem_Free(buf);
896         self->string_size = bufsize;
897     }
898 
899     /* Set carefully the position value. Alternatively, we could use the seek
900        method instead of modifying self->pos directly to better protect the
901        object internal state against errneous (or malicious) inputs. */
902     position_obj = PyTuple_GET_ITEM(state, 2);
903     if (!PyLong_Check(position_obj)) {
904         PyErr_Format(PyExc_TypeError,
905                      "third item of state must be an integer, got %.200s",
906                      Py_TYPE(position_obj)->tp_name);
907         return NULL;
908     }
909     pos = PyLong_AsSsize_t(position_obj);
910     if (pos == -1 && PyErr_Occurred())
911         return NULL;
912     if (pos < 0) {
913         PyErr_SetString(PyExc_ValueError,
914                         "position value cannot be negative");
915         return NULL;
916     }
917     self->pos = pos;
918 
919     /* Set the dictionary of the instance variables. */
920     dict = PyTuple_GET_ITEM(state, 3);
921     if (dict != Py_None) {
922         if (!PyDict_Check(dict)) {
923             PyErr_Format(PyExc_TypeError,
924                          "fourth item of state should be a dict, got a %.200s",
925                          Py_TYPE(dict)->tp_name);
926             return NULL;
927         }
928         if (self->dict) {
929             /* Alternatively, we could replace the internal dictionary
930                completely. However, it seems more practical to just update it. */
931             if (PyDict_Update(self->dict, dict) < 0)
932                 return NULL;
933         }
934         else {
935             Py_INCREF(dict);
936             self->dict = dict;
937         }
938     }
939 
940     Py_RETURN_NONE;
941 }
942 
943 
944 static PyObject *
stringio_closed(stringio * self,void * context)945 stringio_closed(stringio *self, void *context)
946 {
947     CHECK_INITIALIZED(self);
948     return PyBool_FromLong(self->closed);
949 }
950 
951 static PyObject *
stringio_line_buffering(stringio * self,void * context)952 stringio_line_buffering(stringio *self, void *context)
953 {
954     CHECK_INITIALIZED(self);
955     CHECK_CLOSED(self);
956     Py_RETURN_FALSE;
957 }
958 
959 static PyObject *
stringio_newlines(stringio * self,void * context)960 stringio_newlines(stringio *self, void *context)
961 {
962     CHECK_INITIALIZED(self);
963     CHECK_CLOSED(self);
964     if (self->decoder == NULL)
965         Py_RETURN_NONE;
966     return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
967 }
968 
969 #include "clinic/stringio.c.h"
970 
971 static struct PyMethodDef stringio_methods[] = {
972     _IO_STRINGIO_CLOSE_METHODDEF
973     _IO_STRINGIO_GETVALUE_METHODDEF
974     _IO_STRINGIO_READ_METHODDEF
975     _IO_STRINGIO_READLINE_METHODDEF
976     _IO_STRINGIO_TELL_METHODDEF
977     _IO_STRINGIO_TRUNCATE_METHODDEF
978     _IO_STRINGIO_SEEK_METHODDEF
979     _IO_STRINGIO_WRITE_METHODDEF
980 
981     _IO_STRINGIO_SEEKABLE_METHODDEF
982     _IO_STRINGIO_READABLE_METHODDEF
983     _IO_STRINGIO_WRITABLE_METHODDEF
984 
985     {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
986     {"__setstate__", (PyCFunction)stringio_setstate, METH_O},
987     {NULL, NULL}        /* sentinel */
988 };
989 
990 static PyGetSetDef stringio_getset[] = {
991     {"closed",         (getter)stringio_closed,         NULL, NULL},
992     {"newlines",       (getter)stringio_newlines,       NULL, NULL},
993     /*  (following comments straight off of the original Python wrapper:)
994         XXX Cruft to support the TextIOWrapper API. This would only
995         be meaningful if StringIO supported the buffer attribute.
996         Hopefully, a better solution, than adding these pseudo-attributes,
997         will be found.
998     */
999     {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
1000     {NULL}
1001 };
1002 
1003 PyTypeObject PyStringIO_Type = {
1004     PyVarObject_HEAD_INIT(NULL, 0)
1005     "_io.StringIO",                            /*tp_name*/
1006     sizeof(stringio),                    /*tp_basicsize*/
1007     0,                                         /*tp_itemsize*/
1008     (destructor)stringio_dealloc,              /*tp_dealloc*/
1009     0,                                         /*tp_print*/
1010     0,                                         /*tp_getattr*/
1011     0,                                         /*tp_setattr*/
1012     0,                                         /*tp_reserved*/
1013     0,                                         /*tp_repr*/
1014     0,                                         /*tp_as_number*/
1015     0,                                         /*tp_as_sequence*/
1016     0,                                         /*tp_as_mapping*/
1017     0,                                         /*tp_hash*/
1018     0,                                         /*tp_call*/
1019     0,                                         /*tp_str*/
1020     0,                                         /*tp_getattro*/
1021     0,                                         /*tp_setattro*/
1022     0,                                         /*tp_as_buffer*/
1023     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
1024                        | Py_TPFLAGS_HAVE_GC,   /*tp_flags*/
1025     _io_StringIO___init____doc__,              /*tp_doc*/
1026     (traverseproc)stringio_traverse,           /*tp_traverse*/
1027     (inquiry)stringio_clear,                   /*tp_clear*/
1028     0,                                         /*tp_richcompare*/
1029     offsetof(stringio, weakreflist),            /*tp_weaklistoffset*/
1030     0,                                         /*tp_iter*/
1031     (iternextfunc)stringio_iternext,           /*tp_iternext*/
1032     stringio_methods,                          /*tp_methods*/
1033     0,                                         /*tp_members*/
1034     stringio_getset,                           /*tp_getset*/
1035     0,                                         /*tp_base*/
1036     0,                                         /*tp_dict*/
1037     0,                                         /*tp_descr_get*/
1038     0,                                         /*tp_descr_set*/
1039     offsetof(stringio, dict),                  /*tp_dictoffset*/
1040     _io_StringIO___init__,                     /*tp_init*/
1041     0,                                         /*tp_alloc*/
1042     stringio_new,                              /*tp_new*/
1043 };
1044