1 #include "Python.h"
2 #include <stddef.h> // offsetof()
3 #include "pycore_object.h"
4 #include "_iomodule.h"
5
6 /* Implementation note: the buffer is always at least one character longer
7 than the enclosed string, for proper functioning of _PyIO_find_line_ending.
8 */
9
10 #define STATE_REALIZED 1
11 #define STATE_ACCUMULATING 2
12
13 /*[clinic input]
14 module _io
15 class _io.StringIO "stringio *" "clinic_state()->PyStringIO_Type"
16 [clinic start generated code]*/
17 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2693eada0658d470]*/
18
19 typedef struct {
20 PyObject_HEAD
21 Py_UCS4 *buf;
22 Py_ssize_t pos;
23 Py_ssize_t string_size;
24 size_t buf_size;
25
26 /* The stringio object can be in two states: accumulating or realized.
27 In accumulating state, the internal buffer contains nothing and
28 the contents are given by the embedded _PyUnicodeWriter structure.
29 In realized state, the internal buffer is meaningful and the
30 _PyUnicodeWriter is destroyed.
31 */
32 int state;
33 _PyUnicodeWriter writer;
34
35 char ok; /* initialized? */
36 char closed;
37 char readuniversal;
38 char readtranslate;
39 PyObject *decoder;
40 PyObject *readnl;
41 PyObject *writenl;
42
43 PyObject *dict;
44 PyObject *weakreflist;
45 _PyIO_State *module_state;
46 } stringio;
47
48 #define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
49 #include "clinic/stringio.c.h"
50 #undef clinic_state
51
52 static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs);
53
54 #define CHECK_INITIALIZED(self) \
55 if (self->ok <= 0) { \
56 PyErr_SetString(PyExc_ValueError, \
57 "I/O operation on uninitialized object"); \
58 return NULL; \
59 }
60
61 #define CHECK_CLOSED(self) \
62 if (self->closed) { \
63 PyErr_SetString(PyExc_ValueError, \
64 "I/O operation on closed file"); \
65 return NULL; \
66 }
67
68 #define ENSURE_REALIZED(self) \
69 if (realize(self) < 0) { \
70 return NULL; \
71 }
72
73
74 /* Internal routine for changing the size, in terms of characters, of the
75 buffer of StringIO objects. The caller should ensure that the 'size'
76 argument is non-negative. Returns 0 on success, -1 otherwise. */
77 static int
resize_buffer(stringio * self,size_t size)78 resize_buffer(stringio *self, size_t size)
79 {
80 /* Here, unsigned types are used to avoid dealing with signed integer
81 overflow, which is undefined in C. */
82 size_t alloc = self->buf_size;
83 Py_UCS4 *new_buf = NULL;
84
85 assert(self->buf != NULL);
86
87 /* Reserve one more char for line ending detection. */
88 size = size + 1;
89 /* For simplicity, stay in the range of the signed type. Anyway, Python
90 doesn't allow strings to be longer than this. */
91 if (size > PY_SSIZE_T_MAX)
92 goto overflow;
93
94 if (size < alloc / 2) {
95 /* Major downsize; resize down to exact size. */
96 alloc = size + 1;
97 }
98 else if (size < alloc) {
99 /* Within allocated size; quick exit */
100 return 0;
101 }
102 else if (size <= alloc * 1.125) {
103 /* Moderate upsize; overallocate similar to list_resize() */
104 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
105 }
106 else {
107 /* Major upsize; resize up to exact size */
108 alloc = size + 1;
109 }
110
111 if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
112 goto overflow;
113 new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
114 if (new_buf == NULL) {
115 PyErr_NoMemory();
116 return -1;
117 }
118 self->buf_size = alloc;
119 self->buf = new_buf;
120
121 return 0;
122
123 overflow:
124 PyErr_SetString(PyExc_OverflowError,
125 "new buffer size too large");
126 return -1;
127 }
128
129 static PyObject *
make_intermediate(stringio * self)130 make_intermediate(stringio *self)
131 {
132 PyObject *intermediate = _PyUnicodeWriter_Finish(&self->writer);
133 self->state = STATE_REALIZED;
134 if (intermediate == NULL)
135 return NULL;
136
137 _PyUnicodeWriter_Init(&self->writer);
138 self->writer.overallocate = 1;
139 if (_PyUnicodeWriter_WriteStr(&self->writer, intermediate)) {
140 Py_DECREF(intermediate);
141 return NULL;
142 }
143 self->state = STATE_ACCUMULATING;
144 return intermediate;
145 }
146
147 static int
realize(stringio * self)148 realize(stringio *self)
149 {
150 Py_ssize_t len;
151 PyObject *intermediate;
152
153 if (self->state == STATE_REALIZED)
154 return 0;
155 assert(self->state == STATE_ACCUMULATING);
156 self->state = STATE_REALIZED;
157
158 intermediate = _PyUnicodeWriter_Finish(&self->writer);
159 if (intermediate == NULL)
160 return -1;
161
162 /* Append the intermediate string to the internal buffer.
163 The length should be equal to the current cursor position.
164 */
165 len = PyUnicode_GET_LENGTH(intermediate);
166 if (resize_buffer(self, len) < 0) {
167 Py_DECREF(intermediate);
168 return -1;
169 }
170 if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
171 Py_DECREF(intermediate);
172 return -1;
173 }
174
175 Py_DECREF(intermediate);
176 return 0;
177 }
178
179 /* Internal routine for writing a whole PyUnicode object to the buffer of a
180 StringIO object. Returns 0 on success, or -1 on error. */
181 static Py_ssize_t
write_str(stringio * self,PyObject * obj)182 write_str(stringio *self, PyObject *obj)
183 {
184 Py_ssize_t len;
185 PyObject *decoded = NULL;
186
187 assert(self->buf != NULL);
188 assert(self->pos >= 0);
189
190 if (self->decoder != NULL) {
191 decoded = _PyIncrementalNewlineDecoder_decode(
192 self->decoder, obj, 1 /* always final */);
193 }
194 else {
195 decoded = Py_NewRef(obj);
196 }
197 if (self->writenl) {
198 PyObject *translated = PyUnicode_Replace(
199 decoded, _Py_LATIN1_CHR('\n'), self->writenl, -1);
200 Py_SETREF(decoded, translated);
201 }
202 if (decoded == NULL)
203 return -1;
204
205 assert(PyUnicode_Check(decoded));
206 len = PyUnicode_GET_LENGTH(decoded);
207 assert(len >= 0);
208
209 /* This overflow check is not strictly necessary. However, it avoids us to
210 deal with funky things like comparing an unsigned and a signed
211 integer. */
212 if (self->pos > PY_SSIZE_T_MAX - len) {
213 PyErr_SetString(PyExc_OverflowError,
214 "new position too large");
215 goto fail;
216 }
217
218 if (self->state == STATE_ACCUMULATING) {
219 if (self->string_size == self->pos) {
220 if (_PyUnicodeWriter_WriteStr(&self->writer, decoded))
221 goto fail;
222 goto success;
223 }
224 if (realize(self))
225 goto fail;
226 }
227
228 if (self->pos + len > self->string_size) {
229 if (resize_buffer(self, self->pos + len) < 0)
230 goto fail;
231 }
232
233 if (self->pos > self->string_size) {
234 /* In case of overseek, pad with null bytes the buffer region between
235 the end of stream and the current position.
236
237 0 lo string_size hi
238 | |<---used--->|<----------available----------->|
239 | | <--to pad-->|<---to write---> |
240 0 buf position
241
242 */
243 memset(self->buf + self->string_size, '\0',
244 (self->pos - self->string_size) * sizeof(Py_UCS4));
245 }
246
247 /* Copy the data to the internal buffer, overwriting some of the
248 existing data if self->pos < self->string_size. */
249 if (!PyUnicode_AsUCS4(decoded,
250 self->buf + self->pos,
251 self->buf_size - self->pos,
252 0))
253 goto fail;
254
255 success:
256 /* Set the new length of the internal string if it has changed. */
257 self->pos += len;
258 if (self->string_size < self->pos)
259 self->string_size = self->pos;
260
261 Py_DECREF(decoded);
262 return 0;
263
264 fail:
265 Py_XDECREF(decoded);
266 return -1;
267 }
268
269 /*[clinic input]
270 @critical_section
271 _io.StringIO.getvalue
272
273 Retrieve the entire contents of the object.
274 [clinic start generated code]*/
275
276 static PyObject *
_io_StringIO_getvalue_impl(stringio * self)277 _io_StringIO_getvalue_impl(stringio *self)
278 /*[clinic end generated code: output=27b6a7bfeaebce01 input=fb5dee06b8d467f3]*/
279 {
280 CHECK_INITIALIZED(self);
281 CHECK_CLOSED(self);
282 if (self->state == STATE_ACCUMULATING)
283 return make_intermediate(self);
284 return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
285 self->string_size);
286 }
287
288 /*[clinic input]
289 @critical_section
290 _io.StringIO.tell
291
292 Tell the current file position.
293 [clinic start generated code]*/
294
295 static PyObject *
_io_StringIO_tell_impl(stringio * self)296 _io_StringIO_tell_impl(stringio *self)
297 /*[clinic end generated code: output=2e87ac67b116c77b input=98a08f3e2dae3550]*/
298 {
299 CHECK_INITIALIZED(self);
300 CHECK_CLOSED(self);
301 return PyLong_FromSsize_t(self->pos);
302 }
303
304 /*[clinic input]
305 @critical_section
306 _io.StringIO.read
307 size: Py_ssize_t(accept={int, NoneType}) = -1
308 /
309
310 Read at most size characters, returned as a string.
311
312 If the argument is negative or omitted, read until EOF
313 is reached. Return an empty string at EOF.
314 [clinic start generated code]*/
315
316 static PyObject *
_io_StringIO_read_impl(stringio * self,Py_ssize_t size)317 _io_StringIO_read_impl(stringio *self, Py_ssize_t size)
318 /*[clinic end generated code: output=ae8cf6002f71626c input=9fbef45d8aece8e7]*/
319 {
320 Py_ssize_t n;
321 Py_UCS4 *output;
322
323 CHECK_INITIALIZED(self);
324 CHECK_CLOSED(self);
325
326 /* adjust invalid sizes */
327 n = self->string_size - self->pos;
328 if (size < 0 || size > n) {
329 size = n;
330 if (size < 0)
331 size = 0;
332 }
333
334 /* Optimization for seek(0); read() */
335 if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
336 PyObject *result = make_intermediate(self);
337 self->pos = self->string_size;
338 return result;
339 }
340
341 ENSURE_REALIZED(self);
342 output = self->buf + self->pos;
343 self->pos += size;
344 return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
345 }
346
347 /* Internal helper, used by stringio_readline and stringio_iternext */
348 static PyObject *
_stringio_readline(stringio * self,Py_ssize_t limit)349 _stringio_readline(stringio *self, Py_ssize_t limit)
350 {
351 Py_UCS4 *start, *end, old_char;
352 Py_ssize_t len, consumed;
353
354 /* In case of overseek, return the empty string */
355 if (self->pos >= self->string_size)
356 return PyUnicode_New(0, 0);
357
358 start = self->buf + self->pos;
359 if (limit < 0 || limit > self->string_size - self->pos)
360 limit = self->string_size - self->pos;
361
362 end = start + limit;
363 old_char = *end;
364 *end = '\0';
365 len = _PyIO_find_line_ending(
366 self->readtranslate, self->readuniversal, self->readnl,
367 PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
368 *end = old_char;
369 /* If we haven't found any line ending, we just return everything
370 (`consumed` is ignored). */
371 if (len < 0)
372 len = limit;
373 self->pos += len;
374 return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
375 }
376
377 /*[clinic input]
378 @critical_section
379 _io.StringIO.readline
380 size: Py_ssize_t(accept={int, NoneType}) = -1
381 /
382
383 Read until newline or EOF.
384
385 Returns an empty string if EOF is hit immediately.
386 [clinic start generated code]*/
387
388 static PyObject *
_io_StringIO_readline_impl(stringio * self,Py_ssize_t size)389 _io_StringIO_readline_impl(stringio *self, Py_ssize_t size)
390 /*[clinic end generated code: output=cabd6452f1b7e85d input=4d14b8495dea1d98]*/
391 {
392 CHECK_INITIALIZED(self);
393 CHECK_CLOSED(self);
394 ENSURE_REALIZED(self);
395
396 return _stringio_readline(self, size);
397 }
398
399 static PyObject *
stringio_iternext(stringio * self)400 stringio_iternext(stringio *self)
401 {
402 PyObject *line;
403
404 CHECK_INITIALIZED(self);
405 CHECK_CLOSED(self);
406 ENSURE_REALIZED(self);
407
408 if (Py_IS_TYPE(self, self->module_state->PyStringIO_Type)) {
409 /* Skip method call overhead for speed */
410 line = _stringio_readline(self, -1);
411 }
412 else {
413 /* XXX is subclassing StringIO really supported? */
414 line = PyObject_CallMethodNoArgs((PyObject *)self,
415 &_Py_ID(readline));
416 if (line && !PyUnicode_Check(line)) {
417 PyErr_Format(PyExc_OSError,
418 "readline() should have returned a str object, "
419 "not '%.200s'", Py_TYPE(line)->tp_name);
420 Py_DECREF(line);
421 return NULL;
422 }
423 }
424
425 if (line == NULL)
426 return NULL;
427
428 if (PyUnicode_GET_LENGTH(line) == 0) {
429 /* Reached EOF */
430 Py_DECREF(line);
431 return NULL;
432 }
433
434 return line;
435 }
436
437 /*[clinic input]
438 @critical_section
439 _io.StringIO.truncate
440 pos as size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None
441 /
442
443 Truncate size to pos.
444
445 The pos argument defaults to the current file position, as
446 returned by tell(). The current file position is unchanged.
447 Returns the new absolute position.
448 [clinic start generated code]*/
449
450 static PyObject *
_io_StringIO_truncate_impl(stringio * self,Py_ssize_t size)451 _io_StringIO_truncate_impl(stringio *self, Py_ssize_t size)
452 /*[clinic end generated code: output=eb3aef8e06701365 input=461b872dce238452]*/
453 {
454 CHECK_INITIALIZED(self);
455 CHECK_CLOSED(self);
456
457 if (size < 0) {
458 PyErr_Format(PyExc_ValueError,
459 "Negative size value %zd", size);
460 return NULL;
461 }
462
463 if (size < self->string_size) {
464 ENSURE_REALIZED(self);
465 if (resize_buffer(self, size) < 0)
466 return NULL;
467 self->string_size = size;
468 }
469
470 return PyLong_FromSsize_t(size);
471 }
472
473 /*[clinic input]
474 @critical_section
475 _io.StringIO.seek
476 pos: Py_ssize_t
477 whence: int = 0
478 /
479
480 Change stream position.
481
482 Seek to character offset pos relative to position indicated by whence:
483 0 Start of stream (the default). pos should be >= 0;
484 1 Current position - pos must be 0;
485 2 End of stream - pos must be 0.
486 Returns the new absolute position.
487 [clinic start generated code]*/
488
489 static PyObject *
_io_StringIO_seek_impl(stringio * self,Py_ssize_t pos,int whence)490 _io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence)
491 /*[clinic end generated code: output=e9e0ac9a8ae71c25 input=c75ced09343a00d7]*/
492 {
493 CHECK_INITIALIZED(self);
494 CHECK_CLOSED(self);
495
496 if (whence != 0 && whence != 1 && whence != 2) {
497 PyErr_Format(PyExc_ValueError,
498 "Invalid whence (%i, should be 0, 1 or 2)", whence);
499 return NULL;
500 }
501 else if (pos < 0 && whence == 0) {
502 PyErr_Format(PyExc_ValueError,
503 "Negative seek position %zd", pos);
504 return NULL;
505 }
506 else if (whence != 0 && pos != 0) {
507 PyErr_SetString(PyExc_OSError,
508 "Can't do nonzero cur-relative seeks");
509 return NULL;
510 }
511
512 /* whence = 0: offset relative to beginning of the string.
513 whence = 1: no change to current position.
514 whence = 2: change position to end of file. */
515 if (whence == 1) {
516 pos = self->pos;
517 }
518 else if (whence == 2) {
519 pos = self->string_size;
520 }
521
522 self->pos = pos;
523
524 return PyLong_FromSsize_t(self->pos);
525 }
526
527 /*[clinic input]
528 @critical_section
529 _io.StringIO.write
530 s as obj: object
531 /
532
533 Write string to file.
534
535 Returns the number of characters written, which is always equal to
536 the length of the string.
537 [clinic start generated code]*/
538
539 static PyObject *
_io_StringIO_write_impl(stringio * self,PyObject * obj)540 _io_StringIO_write_impl(stringio *self, PyObject *obj)
541 /*[clinic end generated code: output=d53b1d841d7db288 input=1561272c0da4651f]*/
542 {
543 Py_ssize_t size;
544
545 CHECK_INITIALIZED(self);
546 if (!PyUnicode_Check(obj)) {
547 PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
548 Py_TYPE(obj)->tp_name);
549 return NULL;
550 }
551 CHECK_CLOSED(self);
552 size = PyUnicode_GET_LENGTH(obj);
553
554 if (size > 0 && write_str(self, obj) < 0)
555 return NULL;
556
557 return PyLong_FromSsize_t(size);
558 }
559
560 /*[clinic input]
561 @critical_section
562 _io.StringIO.close
563
564 Close the IO object.
565
566 Attempting any further operation after the object is closed
567 will raise a ValueError.
568
569 This method has no effect if the file is already closed.
570 [clinic start generated code]*/
571
572 static PyObject *
_io_StringIO_close_impl(stringio * self)573 _io_StringIO_close_impl(stringio *self)
574 /*[clinic end generated code: output=04399355cbe518f1 input=305d19aa29cc40b9]*/
575 {
576 self->closed = 1;
577 /* Free up some memory */
578 if (resize_buffer(self, 0) < 0)
579 return NULL;
580 _PyUnicodeWriter_Dealloc(&self->writer);
581 Py_CLEAR(self->readnl);
582 Py_CLEAR(self->writenl);
583 Py_CLEAR(self->decoder);
584 Py_RETURN_NONE;
585 }
586
587 static int
stringio_traverse(stringio * self,visitproc visit,void * arg)588 stringio_traverse(stringio *self, visitproc visit, void *arg)
589 {
590 Py_VISIT(Py_TYPE(self));
591 Py_VISIT(self->readnl);
592 Py_VISIT(self->writenl);
593 Py_VISIT(self->decoder);
594 Py_VISIT(self->dict);
595 return 0;
596 }
597
598 static int
stringio_clear(stringio * self)599 stringio_clear(stringio *self)
600 {
601 Py_CLEAR(self->readnl);
602 Py_CLEAR(self->writenl);
603 Py_CLEAR(self->decoder);
604 Py_CLEAR(self->dict);
605 return 0;
606 }
607
608 static void
stringio_dealloc(stringio * self)609 stringio_dealloc(stringio *self)
610 {
611 PyTypeObject *tp = Py_TYPE(self);
612 _PyObject_GC_UNTRACK(self);
613 self->ok = 0;
614 if (self->buf) {
615 PyMem_Free(self->buf);
616 self->buf = NULL;
617 }
618 _PyUnicodeWriter_Dealloc(&self->writer);
619 (void)stringio_clear(self);
620 if (self->weakreflist != NULL) {
621 PyObject_ClearWeakRefs((PyObject *) self);
622 }
623 tp->tp_free(self);
624 Py_DECREF(tp);
625 }
626
627 static PyObject *
stringio_new(PyTypeObject * type,PyObject * args,PyObject * kwds)628 stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
629 {
630 stringio *self;
631
632 assert(type != NULL && type->tp_alloc != NULL);
633 self = (stringio *)type->tp_alloc(type, 0);
634 if (self == NULL)
635 return NULL;
636
637 /* tp_alloc initializes all the fields to zero. So we don't have to
638 initialize them here. */
639
640 self->buf = (Py_UCS4 *)PyMem_Malloc(0);
641 if (self->buf == NULL) {
642 Py_DECREF(self);
643 return PyErr_NoMemory();
644 }
645
646 return (PyObject *)self;
647 }
648
649 /*[clinic input]
650 _io.StringIO.__init__
651 initial_value as value: object(c_default="NULL") = ''
652 newline as newline_obj: object(c_default="NULL") = '\n'
653
654 Text I/O implementation using an in-memory buffer.
655
656 The initial_value argument sets the value of object. The newline
657 argument is like the one of TextIOWrapper's constructor.
658 [clinic start generated code]*/
659
660 static int
_io_StringIO___init___impl(stringio * self,PyObject * value,PyObject * newline_obj)661 _io_StringIO___init___impl(stringio *self, PyObject *value,
662 PyObject *newline_obj)
663 /*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/
664 {
665 const char *newline = "\n";
666 Py_ssize_t value_len;
667
668 /* Parse the newline argument. We only want to allow unicode objects or
669 None. */
670 if (newline_obj == Py_None) {
671 newline = NULL;
672 }
673 else if (newline_obj) {
674 if (!PyUnicode_Check(newline_obj)) {
675 PyErr_Format(PyExc_TypeError,
676 "newline must be str or None, not %.200s",
677 Py_TYPE(newline_obj)->tp_name);
678 return -1;
679 }
680 newline = PyUnicode_AsUTF8(newline_obj);
681 if (newline == NULL)
682 return -1;
683 }
684
685 if (newline && newline[0] != '\0'
686 && !(newline[0] == '\n' && newline[1] == '\0')
687 && !(newline[0] == '\r' && newline[1] == '\0')
688 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
689 PyErr_Format(PyExc_ValueError,
690 "illegal newline value: %R", newline_obj);
691 return -1;
692 }
693 if (value && value != Py_None && !PyUnicode_Check(value)) {
694 PyErr_Format(PyExc_TypeError,
695 "initial_value must be str or None, not %.200s",
696 Py_TYPE(value)->tp_name);
697 return -1;
698 }
699
700 self->ok = 0;
701
702 _PyUnicodeWriter_Dealloc(&self->writer);
703 Py_CLEAR(self->readnl);
704 Py_CLEAR(self->writenl);
705 Py_CLEAR(self->decoder);
706
707 assert((newline != NULL && newline_obj != Py_None) ||
708 (newline == NULL && newline_obj == Py_None));
709
710 if (newline) {
711 self->readnl = PyUnicode_FromString(newline);
712 if (self->readnl == NULL)
713 return -1;
714 }
715 self->readuniversal = (newline == NULL || newline[0] == '\0');
716 self->readtranslate = (newline == NULL);
717 /* If newline == "", we don't translate anything.
718 If newline == "\n" or newline == None, we translate to "\n", which is
719 a no-op.
720 (for newline == None, TextIOWrapper translates to os.linesep, but it
721 is pointless for StringIO)
722 */
723 if (newline != NULL && newline[0] == '\r') {
724 self->writenl = Py_NewRef(self->readnl);
725 }
726
727 _PyIO_State *module_state = find_io_state_by_def(Py_TYPE(self));
728 if (self->readuniversal) {
729 self->decoder = PyObject_CallFunctionObjArgs(
730 (PyObject *)module_state->PyIncrementalNewlineDecoder_Type,
731 Py_None, self->readtranslate ? Py_True : Py_False, NULL);
732 if (self->decoder == NULL)
733 return -1;
734 }
735
736 /* Now everything is set up, resize buffer to size of initial value,
737 and copy it */
738 self->string_size = 0;
739 if (value && value != Py_None)
740 value_len = PyUnicode_GetLength(value);
741 else
742 value_len = 0;
743 if (value_len > 0) {
744 /* This is a heuristic, for newline translation might change
745 the string length. */
746 if (resize_buffer(self, 0) < 0)
747 return -1;
748 self->state = STATE_REALIZED;
749 self->pos = 0;
750 if (write_str(self, value) < 0)
751 return -1;
752 }
753 else {
754 /* Empty stringio object, we can start by accumulating */
755 if (resize_buffer(self, 0) < 0)
756 return -1;
757 _PyUnicodeWriter_Init(&self->writer);
758 self->writer.overallocate = 1;
759 self->state = STATE_ACCUMULATING;
760 }
761 self->pos = 0;
762 self->module_state = module_state;
763 self->closed = 0;
764 self->ok = 1;
765 return 0;
766 }
767
768 /* Properties and pseudo-properties */
769
770 /*[clinic input]
771 @critical_section
772 _io.StringIO.readable
773
774 Returns True if the IO object can be read.
775 [clinic start generated code]*/
776
777 static PyObject *
_io_StringIO_readable_impl(stringio * self)778 _io_StringIO_readable_impl(stringio *self)
779 /*[clinic end generated code: output=b19d44dd8b1ceb99 input=6cd2ffd65a8e8763]*/
780 {
781 CHECK_INITIALIZED(self);
782 CHECK_CLOSED(self);
783 Py_RETURN_TRUE;
784 }
785
786 /*[clinic input]
787 @critical_section
788 _io.StringIO.writable
789
790 Returns True if the IO object can be written.
791 [clinic start generated code]*/
792
793 static PyObject *
_io_StringIO_writable_impl(stringio * self)794 _io_StringIO_writable_impl(stringio *self)
795 /*[clinic end generated code: output=13e4dd77187074ca input=1b3c63dbaa761c69]*/
796 {
797 CHECK_INITIALIZED(self);
798 CHECK_CLOSED(self);
799 Py_RETURN_TRUE;
800 }
801
802 /*[clinic input]
803 @critical_section
804 _io.StringIO.seekable
805
806 Returns True if the IO object can be seeked.
807 [clinic start generated code]*/
808
809 static PyObject *
_io_StringIO_seekable_impl(stringio * self)810 _io_StringIO_seekable_impl(stringio *self)
811 /*[clinic end generated code: output=4d20b4641c756879 input=a820fad2cf085fc3]*/
812 {
813 CHECK_INITIALIZED(self);
814 CHECK_CLOSED(self);
815 Py_RETURN_TRUE;
816 }
817
818 /* Pickling support.
819
820 The implementation of __getstate__ is similar to the one for BytesIO,
821 except that we also save the newline parameter. For __setstate__ and unlike
822 BytesIO, we call __init__ to restore the object's state. Doing so allows us
823 to avoid decoding the complex newline state while keeping the object
824 representation compact.
825
826 See comment in bytesio.c regarding why only pickle protocols and onward are
827 supported.
828 */
829
830 /*[clinic input]
831 @critical_section
832 _io.StringIO.__getstate__
833
834 [clinic start generated code]*/
835
836 static PyObject *
_io_StringIO___getstate___impl(stringio * self)837 _io_StringIO___getstate___impl(stringio *self)
838 /*[clinic end generated code: output=780be4a996410199 input=76f27255ef83bb92]*/
839 {
840 PyObject *initvalue = _io_StringIO_getvalue_impl(self);
841 PyObject *dict;
842 PyObject *state;
843
844 if (initvalue == NULL)
845 return NULL;
846 if (self->dict == NULL) {
847 dict = Py_NewRef(Py_None);
848 }
849 else {
850 dict = PyDict_Copy(self->dict);
851 if (dict == NULL) {
852 Py_DECREF(initvalue);
853 return NULL;
854 }
855 }
856
857 state = Py_BuildValue("(OOnN)", initvalue,
858 self->readnl ? self->readnl : Py_None,
859 self->pos, dict);
860 Py_DECREF(initvalue);
861 return state;
862 }
863
864 /*[clinic input]
865 @critical_section
866 _io.StringIO.__setstate__
867
868 state: object
869 /
870 [clinic start generated code]*/
871
872 static PyObject *
_io_StringIO___setstate___impl(stringio * self,PyObject * state)873 _io_StringIO___setstate___impl(stringio *self, PyObject *state)
874 /*[clinic end generated code: output=cb3962bc6d5c5609 input=8a27784b11b82e47]*/
875 {
876 PyObject *initarg;
877 PyObject *position_obj;
878 PyObject *dict;
879 Py_ssize_t pos;
880
881 assert(state != NULL);
882 CHECK_CLOSED(self);
883
884 /* We allow the state tuple to be longer than 4, because we may need
885 someday to extend the object's state without breaking
886 backward-compatibility. */
887 if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) {
888 PyErr_Format(PyExc_TypeError,
889 "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
890 Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
891 return NULL;
892 }
893
894 /* Initialize the object's state. */
895 initarg = PyTuple_GetSlice(state, 0, 2);
896 if (initarg == NULL)
897 return NULL;
898 if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) {
899 Py_DECREF(initarg);
900 return NULL;
901 }
902 Py_DECREF(initarg);
903
904 /* Restore the buffer state. Even if __init__ did initialize the buffer,
905 we have to initialize it again since __init__ may translate the
906 newlines in the initial_value string. We clearly do not want that
907 because the string value in the state tuple has already been translated
908 once by __init__. So we do not take any chance and replace object's
909 buffer completely. */
910 {
911 PyObject *item = PyTuple_GET_ITEM(state, 0);
912 if (PyUnicode_Check(item)) {
913 Py_UCS4 *buf = PyUnicode_AsUCS4Copy(item);
914 if (buf == NULL)
915 return NULL;
916 Py_ssize_t bufsize = PyUnicode_GET_LENGTH(item);
917
918 if (resize_buffer(self, bufsize) < 0) {
919 PyMem_Free(buf);
920 return NULL;
921 }
922 memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
923 PyMem_Free(buf);
924 self->string_size = bufsize;
925 }
926 else {
927 assert(item == Py_None);
928 self->string_size = 0;
929 }
930 }
931
932 /* Set carefully the position value. Alternatively, we could use the seek
933 method instead of modifying self->pos directly to better protect the
934 object internal state against erroneous (or malicious) inputs. */
935 position_obj = PyTuple_GET_ITEM(state, 2);
936 if (!PyLong_Check(position_obj)) {
937 PyErr_Format(PyExc_TypeError,
938 "third item of state must be an integer, got %.200s",
939 Py_TYPE(position_obj)->tp_name);
940 return NULL;
941 }
942 pos = PyLong_AsSsize_t(position_obj);
943 if (pos == -1 && PyErr_Occurred())
944 return NULL;
945 if (pos < 0) {
946 PyErr_SetString(PyExc_ValueError,
947 "position value cannot be negative");
948 return NULL;
949 }
950 self->pos = pos;
951
952 /* Set the dictionary of the instance variables. */
953 dict = PyTuple_GET_ITEM(state, 3);
954 if (dict != Py_None) {
955 if (!PyDict_Check(dict)) {
956 PyErr_Format(PyExc_TypeError,
957 "fourth item of state should be a dict, got a %.200s",
958 Py_TYPE(dict)->tp_name);
959 return NULL;
960 }
961 if (self->dict) {
962 /* Alternatively, we could replace the internal dictionary
963 completely. However, it seems more practical to just update it. */
964 if (PyDict_Update(self->dict, dict) < 0)
965 return NULL;
966 }
967 else {
968 self->dict = Py_NewRef(dict);
969 }
970 }
971
972 Py_RETURN_NONE;
973 }
974
975 /*[clinic input]
976 @critical_section
977 @getter
978 _io.StringIO.closed
979 [clinic start generated code]*/
980
981 static PyObject *
_io_StringIO_closed_get_impl(stringio * self)982 _io_StringIO_closed_get_impl(stringio *self)
983 /*[clinic end generated code: output=531ddca7954331d6 input=178d2ef24395fd49]*/
984 {
985 CHECK_INITIALIZED(self);
986 return PyBool_FromLong(self->closed);
987 }
988
989 /*[clinic input]
990 @critical_section
991 @getter
992 _io.StringIO.line_buffering
993 [clinic start generated code]*/
994
995 static PyObject *
_io_StringIO_line_buffering_get_impl(stringio * self)996 _io_StringIO_line_buffering_get_impl(stringio *self)
997 /*[clinic end generated code: output=360710e0112966ae input=6a7634e7f890745e]*/
998 {
999 CHECK_INITIALIZED(self);
1000 CHECK_CLOSED(self);
1001 Py_RETURN_FALSE;
1002 }
1003
1004 /*[clinic input]
1005 @critical_section
1006 @getter
1007 _io.StringIO.newlines
1008 [clinic start generated code]*/
1009
1010 static PyObject *
_io_StringIO_newlines_get_impl(stringio * self)1011 _io_StringIO_newlines_get_impl(stringio *self)
1012 /*[clinic end generated code: output=35d7c0b66d7e0160 input=092a14586718244b]*/
1013 {
1014 CHECK_INITIALIZED(self);
1015 CHECK_CLOSED(self);
1016 if (self->decoder == NULL) {
1017 Py_RETURN_NONE;
1018 }
1019 return PyObject_GetAttr(self->decoder, &_Py_ID(newlines));
1020 }
1021
1022 static struct PyMethodDef stringio_methods[] = {
1023 _IO_STRINGIO_CLOSE_METHODDEF
1024 _IO_STRINGIO_GETVALUE_METHODDEF
1025 _IO_STRINGIO_READ_METHODDEF
1026 _IO_STRINGIO_READLINE_METHODDEF
1027 _IO_STRINGIO_TELL_METHODDEF
1028 _IO_STRINGIO_TRUNCATE_METHODDEF
1029 _IO_STRINGIO_SEEK_METHODDEF
1030 _IO_STRINGIO_WRITE_METHODDEF
1031
1032 _IO_STRINGIO_SEEKABLE_METHODDEF
1033 _IO_STRINGIO_READABLE_METHODDEF
1034 _IO_STRINGIO_WRITABLE_METHODDEF
1035
1036 _IO_STRINGIO___GETSTATE___METHODDEF
1037 _IO_STRINGIO___SETSTATE___METHODDEF
1038 {NULL, NULL} /* sentinel */
1039 };
1040
1041 static PyGetSetDef stringio_getset[] = {
1042 _IO_STRINGIO_CLOSED_GETSETDEF
1043 _IO_STRINGIO_NEWLINES_GETSETDEF
1044 /* (following comments straight off of the original Python wrapper:)
1045 XXX Cruft to support the TextIOWrapper API. This would only
1046 be meaningful if StringIO supported the buffer attribute.
1047 Hopefully, a better solution, than adding these pseudo-attributes,
1048 will be found.
1049 */
1050 _IO_STRINGIO_LINE_BUFFERING_GETSETDEF
1051 {NULL}
1052 };
1053
1054 static struct PyMemberDef stringio_members[] = {
1055 {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(stringio, weakreflist), Py_READONLY},
1056 {"__dictoffset__", Py_T_PYSSIZET, offsetof(stringio, dict), Py_READONLY},
1057 {NULL},
1058 };
1059
1060 static PyType_Slot stringio_slots[] = {
1061 {Py_tp_dealloc, stringio_dealloc},
1062 {Py_tp_doc, (void *)_io_StringIO___init____doc__},
1063 {Py_tp_traverse, stringio_traverse},
1064 {Py_tp_clear, stringio_clear},
1065 {Py_tp_iternext, stringio_iternext},
1066 {Py_tp_methods, stringio_methods},
1067 {Py_tp_members, stringio_members},
1068 {Py_tp_getset, stringio_getset},
1069 {Py_tp_init, _io_StringIO___init__},
1070 {Py_tp_new, stringio_new},
1071 {0, NULL},
1072 };
1073
1074 PyType_Spec stringio_spec = {
1075 .name = "_io.StringIO",
1076 .basicsize = sizeof(stringio),
1077 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1078 Py_TPFLAGS_IMMUTABLETYPE),
1079 .slots = stringio_slots,
1080 };
1081