• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include <stddef.h>               // offsetof()
3 #include "pycore_object.h"
4 #include "_iomodule.h"
5 
6 /* Implementation note: the buffer is always at least one character longer
7    than the enclosed string, for proper functioning of _PyIO_find_line_ending.
8 */
9 
10 #define STATE_REALIZED 1
11 #define STATE_ACCUMULATING 2
12 
13 /*[clinic input]
14 module _io
15 class _io.StringIO "stringio *" "clinic_state()->PyStringIO_Type"
16 [clinic start generated code]*/
17 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2693eada0658d470]*/
18 
19 typedef struct {
20     PyObject_HEAD
21     Py_UCS4 *buf;
22     Py_ssize_t pos;
23     Py_ssize_t string_size;
24     size_t buf_size;
25 
26     /* The stringio object can be in two states: accumulating or realized.
27        In accumulating state, the internal buffer contains nothing and
28        the contents are given by the embedded _PyUnicodeWriter structure.
29        In realized state, the internal buffer is meaningful and the
30        _PyUnicodeWriter is destroyed.
31     */
32     int state;
33     _PyUnicodeWriter writer;
34 
35     char ok; /* initialized? */
36     char closed;
37     char readuniversal;
38     char readtranslate;
39     PyObject *decoder;
40     PyObject *readnl;
41     PyObject *writenl;
42 
43     PyObject *dict;
44     PyObject *weakreflist;
45     _PyIO_State *module_state;
46 } stringio;
47 
48 #define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
49 #include "clinic/stringio.c.h"
50 #undef clinic_state
51 
52 static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs);
53 
54 #define CHECK_INITIALIZED(self) \
55     if (self->ok <= 0) { \
56         PyErr_SetString(PyExc_ValueError, \
57             "I/O operation on uninitialized object"); \
58         return NULL; \
59     }
60 
61 #define CHECK_CLOSED(self) \
62     if (self->closed) { \
63         PyErr_SetString(PyExc_ValueError, \
64             "I/O operation on closed file"); \
65         return NULL; \
66     }
67 
68 #define ENSURE_REALIZED(self) \
69     if (realize(self) < 0) { \
70         return NULL; \
71     }
72 
73 
74 /* Internal routine for changing the size, in terms of characters, of the
75    buffer of StringIO objects.  The caller should ensure that the 'size'
76    argument is non-negative.  Returns 0 on success, -1 otherwise. */
77 static int
resize_buffer(stringio * self,size_t size)78 resize_buffer(stringio *self, size_t size)
79 {
80     /* Here, unsigned types are used to avoid dealing with signed integer
81        overflow, which is undefined in C. */
82     size_t alloc = self->buf_size;
83     Py_UCS4 *new_buf = NULL;
84 
85     assert(self->buf != NULL);
86 
87     /* Reserve one more char for line ending detection. */
88     size = size + 1;
89     /* For simplicity, stay in the range of the signed type. Anyway, Python
90        doesn't allow strings to be longer than this. */
91     if (size > PY_SSIZE_T_MAX)
92         goto overflow;
93 
94     if (size < alloc / 2) {
95         /* Major downsize; resize down to exact size. */
96         alloc = size + 1;
97     }
98     else if (size < alloc) {
99         /* Within allocated size; quick exit */
100         return 0;
101     }
102     else if (size <= alloc * 1.125) {
103         /* Moderate upsize; overallocate similar to list_resize() */
104         alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
105     }
106     else {
107         /* Major upsize; resize up to exact size */
108         alloc = size + 1;
109     }
110 
111     if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
112         goto overflow;
113     new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
114     if (new_buf == NULL) {
115         PyErr_NoMemory();
116         return -1;
117     }
118     self->buf_size = alloc;
119     self->buf = new_buf;
120 
121     return 0;
122 
123   overflow:
124     PyErr_SetString(PyExc_OverflowError,
125                     "new buffer size too large");
126     return -1;
127 }
128 
129 static PyObject *
make_intermediate(stringio * self)130 make_intermediate(stringio *self)
131 {
132     PyObject *intermediate = _PyUnicodeWriter_Finish(&self->writer);
133     self->state = STATE_REALIZED;
134     if (intermediate == NULL)
135         return NULL;
136 
137     _PyUnicodeWriter_Init(&self->writer);
138     self->writer.overallocate = 1;
139     if (_PyUnicodeWriter_WriteStr(&self->writer, intermediate)) {
140         Py_DECREF(intermediate);
141         return NULL;
142     }
143     self->state = STATE_ACCUMULATING;
144     return intermediate;
145 }
146 
147 static int
realize(stringio * self)148 realize(stringio *self)
149 {
150     Py_ssize_t len;
151     PyObject *intermediate;
152 
153     if (self->state == STATE_REALIZED)
154         return 0;
155     assert(self->state == STATE_ACCUMULATING);
156     self->state = STATE_REALIZED;
157 
158     intermediate = _PyUnicodeWriter_Finish(&self->writer);
159     if (intermediate == NULL)
160         return -1;
161 
162     /* Append the intermediate string to the internal buffer.
163        The length should be equal to the current cursor position.
164      */
165     len = PyUnicode_GET_LENGTH(intermediate);
166     if (resize_buffer(self, len) < 0) {
167         Py_DECREF(intermediate);
168         return -1;
169     }
170     if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
171         Py_DECREF(intermediate);
172         return -1;
173     }
174 
175     Py_DECREF(intermediate);
176     return 0;
177 }
178 
179 /* Internal routine for writing a whole PyUnicode object to the buffer of a
180    StringIO object. Returns 0 on success, or -1 on error. */
181 static Py_ssize_t
write_str(stringio * self,PyObject * obj)182 write_str(stringio *self, PyObject *obj)
183 {
184     Py_ssize_t len;
185     PyObject *decoded = NULL;
186 
187     assert(self->buf != NULL);
188     assert(self->pos >= 0);
189 
190     if (self->decoder != NULL) {
191         decoded = _PyIncrementalNewlineDecoder_decode(
192             self->decoder, obj, 1 /* always final */);
193     }
194     else {
195         decoded = Py_NewRef(obj);
196     }
197     if (self->writenl) {
198         PyObject *translated = PyUnicode_Replace(
199             decoded, _Py_LATIN1_CHR('\n'), self->writenl, -1);
200         Py_SETREF(decoded, translated);
201     }
202     if (decoded == NULL)
203         return -1;
204 
205     assert(PyUnicode_Check(decoded));
206     len = PyUnicode_GET_LENGTH(decoded);
207     assert(len >= 0);
208 
209     /* This overflow check is not strictly necessary. However, it avoids us to
210        deal with funky things like comparing an unsigned and a signed
211        integer. */
212     if (self->pos > PY_SSIZE_T_MAX - len) {
213         PyErr_SetString(PyExc_OverflowError,
214                         "new position too large");
215         goto fail;
216     }
217 
218     if (self->state == STATE_ACCUMULATING) {
219         if (self->string_size == self->pos) {
220             if (_PyUnicodeWriter_WriteStr(&self->writer, decoded))
221                 goto fail;
222             goto success;
223         }
224         if (realize(self))
225             goto fail;
226     }
227 
228     if (self->pos + len > self->string_size) {
229         if (resize_buffer(self, self->pos + len) < 0)
230             goto fail;
231     }
232 
233     if (self->pos > self->string_size) {
234         /* In case of overseek, pad with null bytes the buffer region between
235            the end of stream and the current position.
236 
237           0   lo      string_size                           hi
238           |   |<---used--->|<----------available----------->|
239           |   |            <--to pad-->|<---to write--->    |
240           0   buf                   position
241 
242         */
243         memset(self->buf + self->string_size, '\0',
244                (self->pos - self->string_size) * sizeof(Py_UCS4));
245     }
246 
247     /* Copy the data to the internal buffer, overwriting some of the
248        existing data if self->pos < self->string_size. */
249     if (!PyUnicode_AsUCS4(decoded,
250                           self->buf + self->pos,
251                           self->buf_size - self->pos,
252                           0))
253         goto fail;
254 
255 success:
256     /* Set the new length of the internal string if it has changed. */
257     self->pos += len;
258     if (self->string_size < self->pos)
259         self->string_size = self->pos;
260 
261     Py_DECREF(decoded);
262     return 0;
263 
264 fail:
265     Py_XDECREF(decoded);
266     return -1;
267 }
268 
269 /*[clinic input]
270 @critical_section
271 _io.StringIO.getvalue
272 
273 Retrieve the entire contents of the object.
274 [clinic start generated code]*/
275 
276 static PyObject *
_io_StringIO_getvalue_impl(stringio * self)277 _io_StringIO_getvalue_impl(stringio *self)
278 /*[clinic end generated code: output=27b6a7bfeaebce01 input=fb5dee06b8d467f3]*/
279 {
280     CHECK_INITIALIZED(self);
281     CHECK_CLOSED(self);
282     if (self->state == STATE_ACCUMULATING)
283         return make_intermediate(self);
284     return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
285                                      self->string_size);
286 }
287 
288 /*[clinic input]
289 @critical_section
290 _io.StringIO.tell
291 
292 Tell the current file position.
293 [clinic start generated code]*/
294 
295 static PyObject *
_io_StringIO_tell_impl(stringio * self)296 _io_StringIO_tell_impl(stringio *self)
297 /*[clinic end generated code: output=2e87ac67b116c77b input=98a08f3e2dae3550]*/
298 {
299     CHECK_INITIALIZED(self);
300     CHECK_CLOSED(self);
301     return PyLong_FromSsize_t(self->pos);
302 }
303 
304 /*[clinic input]
305 @critical_section
306 _io.StringIO.read
307     size: Py_ssize_t(accept={int, NoneType}) = -1
308     /
309 
310 Read at most size characters, returned as a string.
311 
312 If the argument is negative or omitted, read until EOF
313 is reached. Return an empty string at EOF.
314 [clinic start generated code]*/
315 
316 static PyObject *
_io_StringIO_read_impl(stringio * self,Py_ssize_t size)317 _io_StringIO_read_impl(stringio *self, Py_ssize_t size)
318 /*[clinic end generated code: output=ae8cf6002f71626c input=9fbef45d8aece8e7]*/
319 {
320     Py_ssize_t n;
321     Py_UCS4 *output;
322 
323     CHECK_INITIALIZED(self);
324     CHECK_CLOSED(self);
325 
326     /* adjust invalid sizes */
327     n = self->string_size - self->pos;
328     if (size < 0 || size > n) {
329         size = n;
330         if (size < 0)
331             size = 0;
332     }
333 
334     /* Optimization for seek(0); read() */
335     if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
336         PyObject *result = make_intermediate(self);
337         self->pos = self->string_size;
338         return result;
339     }
340 
341     ENSURE_REALIZED(self);
342     output = self->buf + self->pos;
343     self->pos += size;
344     return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
345 }
346 
347 /* Internal helper, used by stringio_readline and stringio_iternext */
348 static PyObject *
_stringio_readline(stringio * self,Py_ssize_t limit)349 _stringio_readline(stringio *self, Py_ssize_t limit)
350 {
351     Py_UCS4 *start, *end, old_char;
352     Py_ssize_t len, consumed;
353 
354     /* In case of overseek, return the empty string */
355     if (self->pos >= self->string_size)
356         return PyUnicode_New(0, 0);
357 
358     start = self->buf + self->pos;
359     if (limit < 0 || limit > self->string_size - self->pos)
360         limit = self->string_size - self->pos;
361 
362     end = start + limit;
363     old_char = *end;
364     *end = '\0';
365     len = _PyIO_find_line_ending(
366         self->readtranslate, self->readuniversal, self->readnl,
367         PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
368     *end = old_char;
369     /* If we haven't found any line ending, we just return everything
370        (`consumed` is ignored). */
371     if (len < 0)
372         len = limit;
373     self->pos += len;
374     return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
375 }
376 
377 /*[clinic input]
378 @critical_section
379 _io.StringIO.readline
380     size: Py_ssize_t(accept={int, NoneType}) = -1
381     /
382 
383 Read until newline or EOF.
384 
385 Returns an empty string if EOF is hit immediately.
386 [clinic start generated code]*/
387 
388 static PyObject *
_io_StringIO_readline_impl(stringio * self,Py_ssize_t size)389 _io_StringIO_readline_impl(stringio *self, Py_ssize_t size)
390 /*[clinic end generated code: output=cabd6452f1b7e85d input=4d14b8495dea1d98]*/
391 {
392     CHECK_INITIALIZED(self);
393     CHECK_CLOSED(self);
394     ENSURE_REALIZED(self);
395 
396     return _stringio_readline(self, size);
397 }
398 
399 static PyObject *
stringio_iternext(stringio * self)400 stringio_iternext(stringio *self)
401 {
402     PyObject *line;
403 
404     CHECK_INITIALIZED(self);
405     CHECK_CLOSED(self);
406     ENSURE_REALIZED(self);
407 
408     if (Py_IS_TYPE(self, self->module_state->PyStringIO_Type)) {
409         /* Skip method call overhead for speed */
410         line = _stringio_readline(self, -1);
411     }
412     else {
413         /* XXX is subclassing StringIO really supported? */
414         line = PyObject_CallMethodNoArgs((PyObject *)self,
415                                              &_Py_ID(readline));
416         if (line && !PyUnicode_Check(line)) {
417             PyErr_Format(PyExc_OSError,
418                          "readline() should have returned a str object, "
419                          "not '%.200s'", Py_TYPE(line)->tp_name);
420             Py_DECREF(line);
421             return NULL;
422         }
423     }
424 
425     if (line == NULL)
426         return NULL;
427 
428     if (PyUnicode_GET_LENGTH(line) == 0) {
429         /* Reached EOF */
430         Py_DECREF(line);
431         return NULL;
432     }
433 
434     return line;
435 }
436 
437 /*[clinic input]
438 @critical_section
439 _io.StringIO.truncate
440     pos as size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None
441     /
442 
443 Truncate size to pos.
444 
445 The pos argument defaults to the current file position, as
446 returned by tell().  The current file position is unchanged.
447 Returns the new absolute position.
448 [clinic start generated code]*/
449 
450 static PyObject *
_io_StringIO_truncate_impl(stringio * self,Py_ssize_t size)451 _io_StringIO_truncate_impl(stringio *self, Py_ssize_t size)
452 /*[clinic end generated code: output=eb3aef8e06701365 input=461b872dce238452]*/
453 {
454     CHECK_INITIALIZED(self);
455     CHECK_CLOSED(self);
456 
457     if (size < 0) {
458         PyErr_Format(PyExc_ValueError,
459                      "Negative size value %zd", size);
460         return NULL;
461     }
462 
463     if (size < self->string_size) {
464         ENSURE_REALIZED(self);
465         if (resize_buffer(self, size) < 0)
466             return NULL;
467         self->string_size = size;
468     }
469 
470     return PyLong_FromSsize_t(size);
471 }
472 
473 /*[clinic input]
474 @critical_section
475 _io.StringIO.seek
476     pos: Py_ssize_t
477     whence: int = 0
478     /
479 
480 Change stream position.
481 
482 Seek to character offset pos relative to position indicated by whence:
483     0  Start of stream (the default).  pos should be >= 0;
484     1  Current position - pos must be 0;
485     2  End of stream - pos must be 0.
486 Returns the new absolute position.
487 [clinic start generated code]*/
488 
489 static PyObject *
_io_StringIO_seek_impl(stringio * self,Py_ssize_t pos,int whence)490 _io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence)
491 /*[clinic end generated code: output=e9e0ac9a8ae71c25 input=c75ced09343a00d7]*/
492 {
493     CHECK_INITIALIZED(self);
494     CHECK_CLOSED(self);
495 
496     if (whence != 0 && whence != 1 && whence != 2) {
497         PyErr_Format(PyExc_ValueError,
498                      "Invalid whence (%i, should be 0, 1 or 2)", whence);
499         return NULL;
500     }
501     else if (pos < 0 && whence == 0) {
502         PyErr_Format(PyExc_ValueError,
503                      "Negative seek position %zd", pos);
504         return NULL;
505     }
506     else if (whence != 0 && pos != 0) {
507         PyErr_SetString(PyExc_OSError,
508                         "Can't do nonzero cur-relative seeks");
509         return NULL;
510     }
511 
512     /* whence = 0: offset relative to beginning of the string.
513        whence = 1: no change to current position.
514        whence = 2: change position to end of file. */
515     if (whence == 1) {
516         pos = self->pos;
517     }
518     else if (whence == 2) {
519         pos = self->string_size;
520     }
521 
522     self->pos = pos;
523 
524     return PyLong_FromSsize_t(self->pos);
525 }
526 
527 /*[clinic input]
528 @critical_section
529 _io.StringIO.write
530     s as obj: object
531     /
532 
533 Write string to file.
534 
535 Returns the number of characters written, which is always equal to
536 the length of the string.
537 [clinic start generated code]*/
538 
539 static PyObject *
_io_StringIO_write_impl(stringio * self,PyObject * obj)540 _io_StringIO_write_impl(stringio *self, PyObject *obj)
541 /*[clinic end generated code: output=d53b1d841d7db288 input=1561272c0da4651f]*/
542 {
543     Py_ssize_t size;
544 
545     CHECK_INITIALIZED(self);
546     if (!PyUnicode_Check(obj)) {
547         PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
548                      Py_TYPE(obj)->tp_name);
549         return NULL;
550     }
551     CHECK_CLOSED(self);
552     size = PyUnicode_GET_LENGTH(obj);
553 
554     if (size > 0 && write_str(self, obj) < 0)
555         return NULL;
556 
557     return PyLong_FromSsize_t(size);
558 }
559 
560 /*[clinic input]
561 @critical_section
562 _io.StringIO.close
563 
564 Close the IO object.
565 
566 Attempting any further operation after the object is closed
567 will raise a ValueError.
568 
569 This method has no effect if the file is already closed.
570 [clinic start generated code]*/
571 
572 static PyObject *
_io_StringIO_close_impl(stringio * self)573 _io_StringIO_close_impl(stringio *self)
574 /*[clinic end generated code: output=04399355cbe518f1 input=305d19aa29cc40b9]*/
575 {
576     self->closed = 1;
577     /* Free up some memory */
578     if (resize_buffer(self, 0) < 0)
579         return NULL;
580     _PyUnicodeWriter_Dealloc(&self->writer);
581     Py_CLEAR(self->readnl);
582     Py_CLEAR(self->writenl);
583     Py_CLEAR(self->decoder);
584     Py_RETURN_NONE;
585 }
586 
587 static int
stringio_traverse(stringio * self,visitproc visit,void * arg)588 stringio_traverse(stringio *self, visitproc visit, void *arg)
589 {
590     Py_VISIT(Py_TYPE(self));
591     Py_VISIT(self->readnl);
592     Py_VISIT(self->writenl);
593     Py_VISIT(self->decoder);
594     Py_VISIT(self->dict);
595     return 0;
596 }
597 
598 static int
stringio_clear(stringio * self)599 stringio_clear(stringio *self)
600 {
601     Py_CLEAR(self->readnl);
602     Py_CLEAR(self->writenl);
603     Py_CLEAR(self->decoder);
604     Py_CLEAR(self->dict);
605     return 0;
606 }
607 
608 static void
stringio_dealloc(stringio * self)609 stringio_dealloc(stringio *self)
610 {
611     PyTypeObject *tp = Py_TYPE(self);
612     _PyObject_GC_UNTRACK(self);
613     self->ok = 0;
614     if (self->buf) {
615         PyMem_Free(self->buf);
616         self->buf = NULL;
617     }
618     _PyUnicodeWriter_Dealloc(&self->writer);
619     (void)stringio_clear(self);
620     if (self->weakreflist != NULL) {
621         PyObject_ClearWeakRefs((PyObject *) self);
622     }
623     tp->tp_free(self);
624     Py_DECREF(tp);
625 }
626 
627 static PyObject *
stringio_new(PyTypeObject * type,PyObject * args,PyObject * kwds)628 stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
629 {
630     stringio *self;
631 
632     assert(type != NULL && type->tp_alloc != NULL);
633     self = (stringio *)type->tp_alloc(type, 0);
634     if (self == NULL)
635         return NULL;
636 
637     /* tp_alloc initializes all the fields to zero. So we don't have to
638        initialize them here. */
639 
640     self->buf = (Py_UCS4 *)PyMem_Malloc(0);
641     if (self->buf == NULL) {
642         Py_DECREF(self);
643         return PyErr_NoMemory();
644     }
645 
646     return (PyObject *)self;
647 }
648 
649 /*[clinic input]
650 _io.StringIO.__init__
651     initial_value as value: object(c_default="NULL") = ''
652     newline as newline_obj: object(c_default="NULL") = '\n'
653 
654 Text I/O implementation using an in-memory buffer.
655 
656 The initial_value argument sets the value of object.  The newline
657 argument is like the one of TextIOWrapper's constructor.
658 [clinic start generated code]*/
659 
660 static int
_io_StringIO___init___impl(stringio * self,PyObject * value,PyObject * newline_obj)661 _io_StringIO___init___impl(stringio *self, PyObject *value,
662                            PyObject *newline_obj)
663 /*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/
664 {
665     const char *newline = "\n";
666     Py_ssize_t value_len;
667 
668     /* Parse the newline argument. We only want to allow unicode objects or
669        None. */
670     if (newline_obj == Py_None) {
671         newline = NULL;
672     }
673     else if (newline_obj) {
674         if (!PyUnicode_Check(newline_obj)) {
675             PyErr_Format(PyExc_TypeError,
676                          "newline must be str or None, not %.200s",
677                          Py_TYPE(newline_obj)->tp_name);
678             return -1;
679         }
680         newline = PyUnicode_AsUTF8(newline_obj);
681         if (newline == NULL)
682             return -1;
683     }
684 
685     if (newline && newline[0] != '\0'
686         && !(newline[0] == '\n' && newline[1] == '\0')
687         && !(newline[0] == '\r' && newline[1] == '\0')
688         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
689         PyErr_Format(PyExc_ValueError,
690                      "illegal newline value: %R", newline_obj);
691         return -1;
692     }
693     if (value && value != Py_None && !PyUnicode_Check(value)) {
694         PyErr_Format(PyExc_TypeError,
695                      "initial_value must be str or None, not %.200s",
696                      Py_TYPE(value)->tp_name);
697         return -1;
698     }
699 
700     self->ok = 0;
701 
702     _PyUnicodeWriter_Dealloc(&self->writer);
703     Py_CLEAR(self->readnl);
704     Py_CLEAR(self->writenl);
705     Py_CLEAR(self->decoder);
706 
707     assert((newline != NULL && newline_obj != Py_None) ||
708            (newline == NULL && newline_obj == Py_None));
709 
710     if (newline) {
711         self->readnl = PyUnicode_FromString(newline);
712         if (self->readnl == NULL)
713             return -1;
714     }
715     self->readuniversal = (newline == NULL || newline[0] == '\0');
716     self->readtranslate = (newline == NULL);
717     /* If newline == "", we don't translate anything.
718        If newline == "\n" or newline == None, we translate to "\n", which is
719        a no-op.
720        (for newline == None, TextIOWrapper translates to os.linesep, but it
721        is pointless for StringIO)
722     */
723     if (newline != NULL && newline[0] == '\r') {
724         self->writenl = Py_NewRef(self->readnl);
725     }
726 
727     _PyIO_State *module_state = find_io_state_by_def(Py_TYPE(self));
728     if (self->readuniversal) {
729         self->decoder = PyObject_CallFunctionObjArgs(
730             (PyObject *)module_state->PyIncrementalNewlineDecoder_Type,
731             Py_None, self->readtranslate ? Py_True : Py_False, NULL);
732         if (self->decoder == NULL)
733             return -1;
734     }
735 
736     /* Now everything is set up, resize buffer to size of initial value,
737        and copy it */
738     self->string_size = 0;
739     if (value && value != Py_None)
740         value_len = PyUnicode_GetLength(value);
741     else
742         value_len = 0;
743     if (value_len > 0) {
744         /* This is a heuristic, for newline translation might change
745            the string length. */
746         if (resize_buffer(self, 0) < 0)
747             return -1;
748         self->state = STATE_REALIZED;
749         self->pos = 0;
750         if (write_str(self, value) < 0)
751             return -1;
752     }
753     else {
754         /* Empty stringio object, we can start by accumulating */
755         if (resize_buffer(self, 0) < 0)
756             return -1;
757         _PyUnicodeWriter_Init(&self->writer);
758         self->writer.overallocate = 1;
759         self->state = STATE_ACCUMULATING;
760     }
761     self->pos = 0;
762     self->module_state = module_state;
763     self->closed = 0;
764     self->ok = 1;
765     return 0;
766 }
767 
768 /* Properties and pseudo-properties */
769 
770 /*[clinic input]
771 @critical_section
772 _io.StringIO.readable
773 
774 Returns True if the IO object can be read.
775 [clinic start generated code]*/
776 
777 static PyObject *
_io_StringIO_readable_impl(stringio * self)778 _io_StringIO_readable_impl(stringio *self)
779 /*[clinic end generated code: output=b19d44dd8b1ceb99 input=6cd2ffd65a8e8763]*/
780 {
781     CHECK_INITIALIZED(self);
782     CHECK_CLOSED(self);
783     Py_RETURN_TRUE;
784 }
785 
786 /*[clinic input]
787 @critical_section
788 _io.StringIO.writable
789 
790 Returns True if the IO object can be written.
791 [clinic start generated code]*/
792 
793 static PyObject *
_io_StringIO_writable_impl(stringio * self)794 _io_StringIO_writable_impl(stringio *self)
795 /*[clinic end generated code: output=13e4dd77187074ca input=1b3c63dbaa761c69]*/
796 {
797     CHECK_INITIALIZED(self);
798     CHECK_CLOSED(self);
799     Py_RETURN_TRUE;
800 }
801 
802 /*[clinic input]
803 @critical_section
804 _io.StringIO.seekable
805 
806 Returns True if the IO object can be seeked.
807 [clinic start generated code]*/
808 
809 static PyObject *
_io_StringIO_seekable_impl(stringio * self)810 _io_StringIO_seekable_impl(stringio *self)
811 /*[clinic end generated code: output=4d20b4641c756879 input=a820fad2cf085fc3]*/
812 {
813     CHECK_INITIALIZED(self);
814     CHECK_CLOSED(self);
815     Py_RETURN_TRUE;
816 }
817 
818 /* Pickling support.
819 
820    The implementation of __getstate__ is similar to the one for BytesIO,
821    except that we also save the newline parameter. For __setstate__ and unlike
822    BytesIO, we call __init__ to restore the object's state. Doing so allows us
823    to avoid decoding the complex newline state while keeping the object
824    representation compact.
825 
826    See comment in bytesio.c regarding why only pickle protocols and onward are
827    supported.
828 */
829 
830 /*[clinic input]
831 @critical_section
832 _io.StringIO.__getstate__
833 
834 [clinic start generated code]*/
835 
836 static PyObject *
_io_StringIO___getstate___impl(stringio * self)837 _io_StringIO___getstate___impl(stringio *self)
838 /*[clinic end generated code: output=780be4a996410199 input=76f27255ef83bb92]*/
839 {
840     PyObject *initvalue = _io_StringIO_getvalue_impl(self);
841     PyObject *dict;
842     PyObject *state;
843 
844     if (initvalue == NULL)
845         return NULL;
846     if (self->dict == NULL) {
847         dict = Py_NewRef(Py_None);
848     }
849     else {
850         dict = PyDict_Copy(self->dict);
851         if (dict == NULL) {
852             Py_DECREF(initvalue);
853             return NULL;
854         }
855     }
856 
857     state = Py_BuildValue("(OOnN)", initvalue,
858                           self->readnl ? self->readnl : Py_None,
859                           self->pos, dict);
860     Py_DECREF(initvalue);
861     return state;
862 }
863 
864 /*[clinic input]
865 @critical_section
866 _io.StringIO.__setstate__
867 
868     state: object
869     /
870 [clinic start generated code]*/
871 
872 static PyObject *
_io_StringIO___setstate___impl(stringio * self,PyObject * state)873 _io_StringIO___setstate___impl(stringio *self, PyObject *state)
874 /*[clinic end generated code: output=cb3962bc6d5c5609 input=8a27784b11b82e47]*/
875 {
876     PyObject *initarg;
877     PyObject *position_obj;
878     PyObject *dict;
879     Py_ssize_t pos;
880 
881     assert(state != NULL);
882     CHECK_CLOSED(self);
883 
884     /* We allow the state tuple to be longer than 4, because we may need
885        someday to extend the object's state without breaking
886        backward-compatibility. */
887     if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) {
888         PyErr_Format(PyExc_TypeError,
889                      "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
890                      Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
891         return NULL;
892     }
893 
894     /* Initialize the object's state. */
895     initarg = PyTuple_GetSlice(state, 0, 2);
896     if (initarg == NULL)
897         return NULL;
898     if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) {
899         Py_DECREF(initarg);
900         return NULL;
901     }
902     Py_DECREF(initarg);
903 
904     /* Restore the buffer state. Even if __init__ did initialize the buffer,
905        we have to initialize it again since __init__ may translate the
906        newlines in the initial_value string. We clearly do not want that
907        because the string value in the state tuple has already been translated
908        once by __init__. So we do not take any chance and replace object's
909        buffer completely. */
910     {
911         PyObject *item = PyTuple_GET_ITEM(state, 0);
912         if (PyUnicode_Check(item)) {
913             Py_UCS4 *buf = PyUnicode_AsUCS4Copy(item);
914             if (buf == NULL)
915                 return NULL;
916             Py_ssize_t bufsize = PyUnicode_GET_LENGTH(item);
917 
918             if (resize_buffer(self, bufsize) < 0) {
919                 PyMem_Free(buf);
920                 return NULL;
921             }
922             memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
923             PyMem_Free(buf);
924             self->string_size = bufsize;
925         }
926         else {
927             assert(item == Py_None);
928             self->string_size = 0;
929         }
930     }
931 
932     /* Set carefully the position value. Alternatively, we could use the seek
933        method instead of modifying self->pos directly to better protect the
934        object internal state against erroneous (or malicious) inputs. */
935     position_obj = PyTuple_GET_ITEM(state, 2);
936     if (!PyLong_Check(position_obj)) {
937         PyErr_Format(PyExc_TypeError,
938                      "third item of state must be an integer, got %.200s",
939                      Py_TYPE(position_obj)->tp_name);
940         return NULL;
941     }
942     pos = PyLong_AsSsize_t(position_obj);
943     if (pos == -1 && PyErr_Occurred())
944         return NULL;
945     if (pos < 0) {
946         PyErr_SetString(PyExc_ValueError,
947                         "position value cannot be negative");
948         return NULL;
949     }
950     self->pos = pos;
951 
952     /* Set the dictionary of the instance variables. */
953     dict = PyTuple_GET_ITEM(state, 3);
954     if (dict != Py_None) {
955         if (!PyDict_Check(dict)) {
956             PyErr_Format(PyExc_TypeError,
957                          "fourth item of state should be a dict, got a %.200s",
958                          Py_TYPE(dict)->tp_name);
959             return NULL;
960         }
961         if (self->dict) {
962             /* Alternatively, we could replace the internal dictionary
963                completely. However, it seems more practical to just update it. */
964             if (PyDict_Update(self->dict, dict) < 0)
965                 return NULL;
966         }
967         else {
968             self->dict = Py_NewRef(dict);
969         }
970     }
971 
972     Py_RETURN_NONE;
973 }
974 
975 /*[clinic input]
976 @critical_section
977 @getter
978 _io.StringIO.closed
979 [clinic start generated code]*/
980 
981 static PyObject *
_io_StringIO_closed_get_impl(stringio * self)982 _io_StringIO_closed_get_impl(stringio *self)
983 /*[clinic end generated code: output=531ddca7954331d6 input=178d2ef24395fd49]*/
984 {
985     CHECK_INITIALIZED(self);
986     return PyBool_FromLong(self->closed);
987 }
988 
989 /*[clinic input]
990 @critical_section
991 @getter
992 _io.StringIO.line_buffering
993 [clinic start generated code]*/
994 
995 static PyObject *
_io_StringIO_line_buffering_get_impl(stringio * self)996 _io_StringIO_line_buffering_get_impl(stringio *self)
997 /*[clinic end generated code: output=360710e0112966ae input=6a7634e7f890745e]*/
998 {
999     CHECK_INITIALIZED(self);
1000     CHECK_CLOSED(self);
1001     Py_RETURN_FALSE;
1002 }
1003 
1004 /*[clinic input]
1005 @critical_section
1006 @getter
1007 _io.StringIO.newlines
1008 [clinic start generated code]*/
1009 
1010 static PyObject *
_io_StringIO_newlines_get_impl(stringio * self)1011 _io_StringIO_newlines_get_impl(stringio *self)
1012 /*[clinic end generated code: output=35d7c0b66d7e0160 input=092a14586718244b]*/
1013 {
1014     CHECK_INITIALIZED(self);
1015     CHECK_CLOSED(self);
1016     if (self->decoder == NULL) {
1017         Py_RETURN_NONE;
1018     }
1019     return PyObject_GetAttr(self->decoder, &_Py_ID(newlines));
1020 }
1021 
1022 static struct PyMethodDef stringio_methods[] = {
1023     _IO_STRINGIO_CLOSE_METHODDEF
1024     _IO_STRINGIO_GETVALUE_METHODDEF
1025     _IO_STRINGIO_READ_METHODDEF
1026     _IO_STRINGIO_READLINE_METHODDEF
1027     _IO_STRINGIO_TELL_METHODDEF
1028     _IO_STRINGIO_TRUNCATE_METHODDEF
1029     _IO_STRINGIO_SEEK_METHODDEF
1030     _IO_STRINGIO_WRITE_METHODDEF
1031 
1032     _IO_STRINGIO_SEEKABLE_METHODDEF
1033     _IO_STRINGIO_READABLE_METHODDEF
1034     _IO_STRINGIO_WRITABLE_METHODDEF
1035 
1036     _IO_STRINGIO___GETSTATE___METHODDEF
1037     _IO_STRINGIO___SETSTATE___METHODDEF
1038     {NULL, NULL}        /* sentinel */
1039 };
1040 
1041 static PyGetSetDef stringio_getset[] = {
1042     _IO_STRINGIO_CLOSED_GETSETDEF
1043     _IO_STRINGIO_NEWLINES_GETSETDEF
1044     /*  (following comments straight off of the original Python wrapper:)
1045         XXX Cruft to support the TextIOWrapper API. This would only
1046         be meaningful if StringIO supported the buffer attribute.
1047         Hopefully, a better solution, than adding these pseudo-attributes,
1048         will be found.
1049     */
1050     _IO_STRINGIO_LINE_BUFFERING_GETSETDEF
1051     {NULL}
1052 };
1053 
1054 static struct PyMemberDef stringio_members[] = {
1055     {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(stringio, weakreflist), Py_READONLY},
1056     {"__dictoffset__", Py_T_PYSSIZET, offsetof(stringio, dict), Py_READONLY},
1057     {NULL},
1058 };
1059 
1060 static PyType_Slot stringio_slots[] = {
1061     {Py_tp_dealloc, stringio_dealloc},
1062     {Py_tp_doc, (void *)_io_StringIO___init____doc__},
1063     {Py_tp_traverse, stringio_traverse},
1064     {Py_tp_clear, stringio_clear},
1065     {Py_tp_iternext, stringio_iternext},
1066     {Py_tp_methods, stringio_methods},
1067     {Py_tp_members, stringio_members},
1068     {Py_tp_getset, stringio_getset},
1069     {Py_tp_init, _io_StringIO___init__},
1070     {Py_tp_new, stringio_new},
1071     {0, NULL},
1072 };
1073 
1074 PyType_Spec stringio_spec = {
1075     .name = "_io.StringIO",
1076     .basicsize = sizeof(stringio),
1077     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1078               Py_TPFLAGS_IMMUTABLETYPE),
1079     .slots = stringio_slots,
1080 };
1081