• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2     An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3 
4     Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5 
6     Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8 
9 #include "Python.h"
10 #include "pycore_call.h"              // _PyObject_CallMethod()
11 #include "pycore_codecs.h"            // _PyCodecInfo_GetIncrementalDecoder()
12 #include "pycore_fileutils.h"         // _Py_GetLocaleEncoding()
13 #include "pycore_interp.h"            // PyInterpreterState.fs_codec
14 #include "pycore_long.h"              // _PyLong_GetZero()
15 #include "pycore_object.h"            // _PyObject_GC_UNTRACK()
16 #include "pycore_pyerrors.h"          // _PyErr_ChainExceptions1()
17 #include "pycore_pystate.h"           // _PyInterpreterState_GET()
18 
19 #include "_iomodule.h"
20 
21 /*[clinic input]
22 module _io
23 class _io.IncrementalNewlineDecoder "nldecoder_object *" "clinic_state()->PyIncrementalNewlineDecoder_Type"
24 class _io.TextIOWrapper "textio *" "clinic_state()->TextIOWrapper_Type"
25 class _io._TextIOBase "PyObject *" "&PyTextIOBase_Type"
26 [clinic start generated code]*/
27 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=8b7f24fa13bfdd7f]*/
28 
29 typedef struct nldecoder_object nldecoder_object;
30 typedef struct textio textio;
31 
32 #define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
33 #include "clinic/textio.c.h"
34 #undef clinic_state
35 
36 /* TextIOBase */
37 
38 PyDoc_STRVAR(textiobase_doc,
39     "Base class for text I/O.\n"
40     "\n"
41     "This class provides a character and line based interface to stream\n"
42     "I/O. There is no readinto method because Python's character strings\n"
43     "are immutable.\n"
44     );
45 
46 static PyObject *
_unsupported(_PyIO_State * state,const char * message)47 _unsupported(_PyIO_State *state, const char *message)
48 {
49     PyErr_SetString(state->unsupported_operation, message);
50     return NULL;
51 }
52 
53 /*[clinic input]
54 _io._TextIOBase.detach
55     cls: defining_class
56     /
57 
58 Separate the underlying buffer from the TextIOBase and return it.
59 
60 After the underlying buffer has been detached, the TextIO is in an unusable state.
61 [clinic start generated code]*/
62 
63 static PyObject *
_io__TextIOBase_detach_impl(PyObject * self,PyTypeObject * cls)64 _io__TextIOBase_detach_impl(PyObject *self, PyTypeObject *cls)
65 /*[clinic end generated code: output=50915f40c609eaa4 input=987ca3640d0a3776]*/
66 {
67     _PyIO_State *state = get_io_state_by_cls(cls);
68     return _unsupported(state, "detach");
69 }
70 
71 /*[clinic input]
72 _io._TextIOBase.read
73     cls: defining_class
74     size: int(unused=True) = -1
75     /
76 
77 Read at most size characters from stream.
78 
79 Read from underlying buffer until we have size characters or we hit EOF.
80 If size is negative or omitted, read until EOF.
81 [clinic start generated code]*/
82 
83 static PyObject *
_io__TextIOBase_read_impl(PyObject * self,PyTypeObject * cls,int Py_UNUSED (size))84 _io__TextIOBase_read_impl(PyObject *self, PyTypeObject *cls,
85                           int Py_UNUSED(size))
86 /*[clinic end generated code: output=51a5178a309ce647 input=f5e37720f9fc563f]*/
87 {
88     _PyIO_State *state = get_io_state_by_cls(cls);
89     return _unsupported(state, "read");
90 }
91 
92 /*[clinic input]
93 _io._TextIOBase.readline
94     cls: defining_class
95     size: int(unused=True) = -1
96     /
97 
98 Read until newline or EOF.
99 
100 Return an empty string if EOF is hit immediately.
101 If size is specified, at most size characters will be read.
102 [clinic start generated code]*/
103 
104 static PyObject *
_io__TextIOBase_readline_impl(PyObject * self,PyTypeObject * cls,int Py_UNUSED (size))105 _io__TextIOBase_readline_impl(PyObject *self, PyTypeObject *cls,
106                               int Py_UNUSED(size))
107 /*[clinic end generated code: output=3f47d7966d6d074e input=42eafec94107fa27]*/
108 {
109     _PyIO_State *state = get_io_state_by_cls(cls);
110     return _unsupported(state, "readline");
111 }
112 
113 /*[clinic input]
114 _io._TextIOBase.write
115     cls: defining_class
116     s: str(unused=True)
117     /
118 
119 Write string s to stream.
120 
121 Return the number of characters written
122 (which is always equal to the length of the string).
123 [clinic start generated code]*/
124 
125 static PyObject *
_io__TextIOBase_write_impl(PyObject * self,PyTypeObject * cls,const char * Py_UNUSED (s))126 _io__TextIOBase_write_impl(PyObject *self, PyTypeObject *cls,
127                            const char *Py_UNUSED(s))
128 /*[clinic end generated code: output=18b28231460275de input=e9cabaa5f6732b07]*/
129 {
130     _PyIO_State *state = get_io_state_by_cls(cls);
131     return _unsupported(state, "write");
132 }
133 
134 /*[clinic input]
135 @getter
136 _io._TextIOBase.encoding
137 
138 Encoding of the text stream.
139 
140 Subclasses should override.
141 [clinic start generated code]*/
142 
143 static PyObject *
_io__TextIOBase_encoding_get_impl(PyObject * self)144 _io__TextIOBase_encoding_get_impl(PyObject *self)
145 /*[clinic end generated code: output=e0f5d8f548b92432 input=4736d7621dd38f43]*/
146 {
147     Py_RETURN_NONE;
148 }
149 
150 /*[clinic input]
151 @getter
152 _io._TextIOBase.newlines
153 
154 Line endings translated so far.
155 
156 Only line endings translated during reading are considered.
157 
158 Subclasses should override.
159 [clinic start generated code]*/
160 
161 static PyObject *
_io__TextIOBase_newlines_get_impl(PyObject * self)162 _io__TextIOBase_newlines_get_impl(PyObject *self)
163 /*[clinic end generated code: output=46ec147fb9f00c2a input=a5b196d076af1164]*/
164 {
165     Py_RETURN_NONE;
166 }
167 
168 /*[clinic input]
169 @getter
170 _io._TextIOBase.errors
171 
172 The error setting of the decoder or encoder.
173 
174 Subclasses should override.
175 [clinic start generated code]*/
176 
177 static PyObject *
_io__TextIOBase_errors_get_impl(PyObject * self)178 _io__TextIOBase_errors_get_impl(PyObject *self)
179 /*[clinic end generated code: output=c6623d6addcd087d input=974aa52d1db93a82]*/
180 {
181     Py_RETURN_NONE;
182 }
183 
184 
185 static PyMethodDef textiobase_methods[] = {
186     _IO__TEXTIOBASE_DETACH_METHODDEF
187     _IO__TEXTIOBASE_READ_METHODDEF
188     _IO__TEXTIOBASE_READLINE_METHODDEF
189     _IO__TEXTIOBASE_WRITE_METHODDEF
190     {NULL, NULL}
191 };
192 
193 static PyGetSetDef textiobase_getset[] = {
194     _IO__TEXTIOBASE_ENCODING_GETSETDEF
195     _IO__TEXTIOBASE_NEWLINES_GETSETDEF
196     _IO__TEXTIOBASE_ERRORS_GETSETDEF
197     {NULL}
198 };
199 
200 static PyType_Slot textiobase_slots[] = {
201     {Py_tp_doc, (void *)textiobase_doc},
202     {Py_tp_methods, textiobase_methods},
203     {Py_tp_getset, textiobase_getset},
204     {0, NULL},
205 };
206 
207 /* Do not set Py_TPFLAGS_HAVE_GC so that tp_traverse and tp_clear are inherited */
208 PyType_Spec textiobase_spec = {
209     .name = "_io._TextIOBase",
210     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
211               Py_TPFLAGS_IMMUTABLETYPE),
212     .slots = textiobase_slots,
213 };
214 
215 /* IncrementalNewlineDecoder */
216 
217 struct nldecoder_object {
218     PyObject_HEAD
219     PyObject *decoder;
220     PyObject *errors;
221     unsigned int pendingcr: 1;
222     unsigned int translate: 1;
223     unsigned int seennl: 3;
224 };
225 
226 /*[clinic input]
227 _io.IncrementalNewlineDecoder.__init__
228     decoder: object
229     translate: bool
230     errors: object(c_default="NULL") = "strict"
231 
232 Codec used when reading a file in universal newlines mode.
233 
234 It wraps another incremental decoder, translating \r\n and \r into \n.
235 It also records the types of newlines encountered.  When used with
236 translate=False, it ensures that the newline sequence is returned in
237 one piece. When used with decoder=None, it expects unicode strings as
238 decode input and translates newlines without first invoking an external
239 decoder.
240 [clinic start generated code]*/
241 
242 static int
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object * self,PyObject * decoder,int translate,PyObject * errors)243 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
244                                             PyObject *decoder, int translate,
245                                             PyObject *errors)
246 /*[clinic end generated code: output=fbd04d443e764ec2 input=ed547aa257616b0e]*/
247 {
248 
249     if (errors == NULL) {
250         errors = &_Py_ID(strict);
251     }
252     else {
253         errors = Py_NewRef(errors);
254     }
255 
256     Py_XSETREF(self->errors, errors);
257     Py_XSETREF(self->decoder, Py_NewRef(decoder));
258     self->translate = translate ? 1 : 0;
259     self->seennl = 0;
260     self->pendingcr = 0;
261 
262     return 0;
263 }
264 
265 static int
incrementalnewlinedecoder_traverse(nldecoder_object * self,visitproc visit,void * arg)266 incrementalnewlinedecoder_traverse(nldecoder_object *self, visitproc visit,
267                                    void *arg)
268 {
269     Py_VISIT(Py_TYPE(self));
270     Py_VISIT(self->decoder);
271     Py_VISIT(self->errors);
272     return 0;
273 }
274 
275 static int
incrementalnewlinedecoder_clear(nldecoder_object * self)276 incrementalnewlinedecoder_clear(nldecoder_object *self)
277 {
278     Py_CLEAR(self->decoder);
279     Py_CLEAR(self->errors);
280     return 0;
281 }
282 
283 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)284 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
285 {
286     PyTypeObject *tp = Py_TYPE(self);
287     _PyObject_GC_UNTRACK(self);
288     (void)incrementalnewlinedecoder_clear(self);
289     tp->tp_free((PyObject *)self);
290     Py_DECREF(tp);
291 }
292 
293 static int
check_decoded(PyObject * decoded)294 check_decoded(PyObject *decoded)
295 {
296     if (decoded == NULL)
297         return -1;
298     if (!PyUnicode_Check(decoded)) {
299         PyErr_Format(PyExc_TypeError,
300                      "decoder should return a string result, not '%.200s'",
301                      Py_TYPE(decoded)->tp_name);
302         Py_DECREF(decoded);
303         return -1;
304     }
305     return 0;
306 }
307 
308 #define CHECK_INITIALIZED_DECODER(self) \
309     if (self->errors == NULL) { \
310         PyErr_SetString(PyExc_ValueError, \
311                         "IncrementalNewlineDecoder.__init__() not called"); \
312         return NULL; \
313     }
314 
315 #define SEEN_CR   1
316 #define SEEN_LF   2
317 #define SEEN_CRLF 4
318 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
319 
320 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * myself,PyObject * input,int final)321 _PyIncrementalNewlineDecoder_decode(PyObject *myself,
322                                     PyObject *input, int final)
323 {
324     PyObject *output;
325     Py_ssize_t output_len;
326     nldecoder_object *self = (nldecoder_object *) myself;
327 
328     CHECK_INITIALIZED_DECODER(self);
329 
330     /* decode input (with the eventual \r from a previous pass) */
331     if (self->decoder != Py_None) {
332         output = PyObject_CallMethodObjArgs(self->decoder,
333             &_Py_ID(decode), input, final ? Py_True : Py_False, NULL);
334     }
335     else {
336         output = Py_NewRef(input);
337     }
338 
339     if (check_decoded(output) < 0)
340         return NULL;
341 
342     output_len = PyUnicode_GET_LENGTH(output);
343     if (self->pendingcr && (final || output_len > 0)) {
344         /* Prefix output with CR */
345         int kind;
346         PyObject *modified;
347         char *out;
348 
349         modified = PyUnicode_New(output_len + 1,
350                                  PyUnicode_MAX_CHAR_VALUE(output));
351         if (modified == NULL)
352             goto error;
353         kind = PyUnicode_KIND(modified);
354         out = PyUnicode_DATA(modified);
355         PyUnicode_WRITE(kind, out, 0, '\r');
356         memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
357         Py_SETREF(output, modified); /* output remains ready */
358         self->pendingcr = 0;
359         output_len++;
360     }
361 
362     /* retain last \r even when not translating data:
363      * then readline() is sure to get \r\n in one pass
364      */
365     if (!final) {
366         if (output_len > 0
367             && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
368         {
369             PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
370             if (modified == NULL)
371                 goto error;
372             Py_SETREF(output, modified);
373             self->pendingcr = 1;
374         }
375     }
376 
377     /* Record which newlines are read and do newline translation if desired,
378        all in one pass. */
379     {
380         const void *in_str;
381         Py_ssize_t len;
382         int seennl = self->seennl;
383         int only_lf = 0;
384         int kind;
385 
386         in_str = PyUnicode_DATA(output);
387         len = PyUnicode_GET_LENGTH(output);
388         kind = PyUnicode_KIND(output);
389 
390         if (len == 0)
391             return output;
392 
393         /* If, up to now, newlines are consistently \n, do a quick check
394            for the \r *byte* with the libc's optimized memchr.
395            */
396         if (seennl == SEEN_LF || seennl == 0) {
397             only_lf = (memchr(in_str, '\r', kind * len) == NULL);
398         }
399 
400         if (only_lf) {
401             /* If not already seen, quick scan for a possible "\n" character.
402                (there's nothing else to be done, even when in translation mode)
403             */
404             if (seennl == 0 &&
405                 memchr(in_str, '\n', kind * len) != NULL) {
406                 if (kind == PyUnicode_1BYTE_KIND)
407                     seennl |= SEEN_LF;
408                 else {
409                     Py_ssize_t i = 0;
410                     for (;;) {
411                         Py_UCS4 c;
412                         /* Fast loop for non-control characters */
413                         while (PyUnicode_READ(kind, in_str, i) > '\n')
414                             i++;
415                         c = PyUnicode_READ(kind, in_str, i++);
416                         if (c == '\n') {
417                             seennl |= SEEN_LF;
418                             break;
419                         }
420                         if (i >= len)
421                             break;
422                     }
423                 }
424             }
425             /* Finished: we have scanned for newlines, and none of them
426                need translating */
427         }
428         else if (!self->translate) {
429             Py_ssize_t i = 0;
430             /* We have already seen all newline types, no need to scan again */
431             if (seennl == SEEN_ALL)
432                 goto endscan;
433             for (;;) {
434                 Py_UCS4 c;
435                 /* Fast loop for non-control characters */
436                 while (PyUnicode_READ(kind, in_str, i) > '\r')
437                     i++;
438                 c = PyUnicode_READ(kind, in_str, i++);
439                 if (c == '\n')
440                     seennl |= SEEN_LF;
441                 else if (c == '\r') {
442                     if (PyUnicode_READ(kind, in_str, i) == '\n') {
443                         seennl |= SEEN_CRLF;
444                         i++;
445                     }
446                     else
447                         seennl |= SEEN_CR;
448                 }
449                 if (i >= len)
450                     break;
451                 if (seennl == SEEN_ALL)
452                     break;
453             }
454         endscan:
455             ;
456         }
457         else {
458             void *translated;
459             int kind = PyUnicode_KIND(output);
460             const void *in_str = PyUnicode_DATA(output);
461             Py_ssize_t in, out;
462             /* XXX: Previous in-place translation here is disabled as
463                resizing is not possible anymore */
464             /* We could try to optimize this so that we only do a copy
465                when there is something to translate. On the other hand,
466                we already know there is a \r byte, so chances are high
467                that something needs to be done. */
468             translated = PyMem_Malloc(kind * len);
469             if (translated == NULL) {
470                 PyErr_NoMemory();
471                 goto error;
472             }
473             in = out = 0;
474             for (;;) {
475                 Py_UCS4 c;
476                 /* Fast loop for non-control characters */
477                 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
478                     PyUnicode_WRITE(kind, translated, out++, c);
479                 if (c == '\n') {
480                     PyUnicode_WRITE(kind, translated, out++, c);
481                     seennl |= SEEN_LF;
482                     continue;
483                 }
484                 if (c == '\r') {
485                     if (PyUnicode_READ(kind, in_str, in) == '\n') {
486                         in++;
487                         seennl |= SEEN_CRLF;
488                     }
489                     else
490                         seennl |= SEEN_CR;
491                     PyUnicode_WRITE(kind, translated, out++, '\n');
492                     continue;
493                 }
494                 if (in > len)
495                     break;
496                 PyUnicode_WRITE(kind, translated, out++, c);
497             }
498             Py_DECREF(output);
499             output = PyUnicode_FromKindAndData(kind, translated, out);
500             PyMem_Free(translated);
501             if (!output)
502                 return NULL;
503         }
504         self->seennl |= seennl;
505     }
506 
507     return output;
508 
509   error:
510     Py_DECREF(output);
511     return NULL;
512 }
513 
514 /*[clinic input]
515 _io.IncrementalNewlineDecoder.decode
516     input: object
517     final: bool = False
518 [clinic start generated code]*/
519 
520 static PyObject *
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object * self,PyObject * input,int final)521 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
522                                           PyObject *input, int final)
523 /*[clinic end generated code: output=0d486755bb37a66e input=90e223c70322c5cd]*/
524 {
525     return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
526 }
527 
528 /*[clinic input]
529 _io.IncrementalNewlineDecoder.getstate
530 [clinic start generated code]*/
531 
532 static PyObject *
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object * self)533 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
534 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
535 {
536     PyObject *buffer;
537     unsigned long long flag;
538 
539     CHECK_INITIALIZED_DECODER(self);
540 
541     if (self->decoder != Py_None) {
542         PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
543            &_Py_ID(getstate));
544         if (state == NULL)
545             return NULL;
546         if (!PyTuple_Check(state)) {
547             PyErr_SetString(PyExc_TypeError,
548                             "illegal decoder state");
549             Py_DECREF(state);
550             return NULL;
551         }
552         if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
553                               &buffer, &flag))
554         {
555             Py_DECREF(state);
556             return NULL;
557         }
558         Py_INCREF(buffer);
559         Py_DECREF(state);
560     }
561     else {
562         buffer = PyBytes_FromString("");
563         flag = 0;
564     }
565     flag <<= 1;
566     if (self->pendingcr)
567         flag |= 1;
568     return Py_BuildValue("NK", buffer, flag);
569 }
570 
571 /*[clinic input]
572 _io.IncrementalNewlineDecoder.setstate
573     state: object
574     /
575 [clinic start generated code]*/
576 
577 static PyObject *
_io_IncrementalNewlineDecoder_setstate(nldecoder_object * self,PyObject * state)578 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
579                                        PyObject *state)
580 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
581 {
582     PyObject *buffer;
583     unsigned long long flag;
584 
585     CHECK_INITIALIZED_DECODER(self);
586 
587     if (!PyTuple_Check(state)) {
588         PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
589         return NULL;
590     }
591     if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
592                           &buffer, &flag))
593     {
594         return NULL;
595     }
596 
597     self->pendingcr = (int) (flag & 1);
598     flag >>= 1;
599 
600     if (self->decoder != Py_None) {
601         return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
602                                     "((OK))", buffer, flag);
603     }
604     else {
605         Py_RETURN_NONE;
606     }
607 }
608 
609 /*[clinic input]
610 _io.IncrementalNewlineDecoder.reset
611 [clinic start generated code]*/
612 
613 static PyObject *
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object * self)614 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
615 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
616 {
617     CHECK_INITIALIZED_DECODER(self);
618 
619     self->seennl = 0;
620     self->pendingcr = 0;
621     if (self->decoder != Py_None)
622         return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
623     else
624         Py_RETURN_NONE;
625 }
626 
627 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)628 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
629 {
630     CHECK_INITIALIZED_DECODER(self);
631 
632     switch (self->seennl) {
633     case SEEN_CR:
634         return PyUnicode_FromString("\r");
635     case SEEN_LF:
636         return PyUnicode_FromString("\n");
637     case SEEN_CRLF:
638         return PyUnicode_FromString("\r\n");
639     case SEEN_CR | SEEN_LF:
640         return Py_BuildValue("ss", "\r", "\n");
641     case SEEN_CR | SEEN_CRLF:
642         return Py_BuildValue("ss", "\r", "\r\n");
643     case SEEN_LF | SEEN_CRLF:
644         return Py_BuildValue("ss", "\n", "\r\n");
645     case SEEN_CR | SEEN_LF | SEEN_CRLF:
646         return Py_BuildValue("sss", "\r", "\n", "\r\n");
647     default:
648         Py_RETURN_NONE;
649    }
650 
651 }
652 
653 /* TextIOWrapper */
654 
655 typedef PyObject *
656         (*encodefunc_t)(PyObject *, PyObject *);
657 
658 struct textio
659 {
660     PyObject_HEAD
661     int ok; /* initialized? */
662     int detached;
663     Py_ssize_t chunk_size;
664     PyObject *buffer;
665     PyObject *encoding;
666     PyObject *encoder;
667     PyObject *decoder;
668     PyObject *readnl;
669     PyObject *errors;
670     const char *writenl; /* ASCII-encoded; NULL stands for \n */
671     char line_buffering;
672     char write_through;
673     char readuniversal;
674     char readtranslate;
675     char writetranslate;
676     char seekable;
677     char has_read1;
678     char telling;
679     char finalizing;
680     /* Specialized encoding func (see below) */
681     encodefunc_t encodefunc;
682     /* Whether or not it's the start of the stream */
683     char encoding_start_of_stream;
684 
685     /* Reads and writes are internally buffered in order to speed things up.
686        However, any read will first flush the write buffer if itsn't empty.
687 
688        Please also note that text to be written is first encoded before being
689        buffered. This is necessary so that encoding errors are immediately
690        reported to the caller, but it unfortunately means that the
691        IncrementalEncoder (whose encode() method is always written in Python)
692        becomes a bottleneck for small writes.
693     */
694     PyObject *decoded_chars;       /* buffer for text returned from decoder */
695     Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
696     PyObject *pending_bytes;       // data waiting to be written.
697                                    // ascii unicode, bytes, or list of them.
698     Py_ssize_t pending_bytes_count;
699 
700     /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
701      * dec_flags is the second (integer) item of the decoder state and
702      * next_input is the chunk of input bytes that comes next after the
703      * snapshot point.  We use this to reconstruct decoder states in tell().
704      */
705     PyObject *snapshot;
706     /* Bytes-to-characters ratio for the current chunk. Serves as input for
707        the heuristic in tell(). */
708     double b2cratio;
709 
710     /* Cache raw object if it's a FileIO object */
711     PyObject *raw;
712 
713     PyObject *weakreflist;
714     PyObject *dict;
715 
716     _PyIO_State *state;
717 };
718 
719 static void
720 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
721 
722 /* A couple of specialized cases in order to bypass the slow incremental
723    encoding methods for the most popular encodings. */
724 
725 static PyObject *
ascii_encode(textio * self,PyObject * text)726 ascii_encode(textio *self, PyObject *text)
727 {
728     return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
729 }
730 
731 static PyObject *
utf16be_encode(textio * self,PyObject * text)732 utf16be_encode(textio *self, PyObject *text)
733 {
734     return _PyUnicode_EncodeUTF16(text,
735                                   PyUnicode_AsUTF8(self->errors), 1);
736 }
737 
738 static PyObject *
utf16le_encode(textio * self,PyObject * text)739 utf16le_encode(textio *self, PyObject *text)
740 {
741     return _PyUnicode_EncodeUTF16(text,
742                                   PyUnicode_AsUTF8(self->errors), -1);
743 }
744 
745 static PyObject *
utf16_encode(textio * self,PyObject * text)746 utf16_encode(textio *self, PyObject *text)
747 {
748     if (!self->encoding_start_of_stream) {
749         /* Skip the BOM and use native byte ordering */
750 #if PY_BIG_ENDIAN
751         return utf16be_encode(self, text);
752 #else
753         return utf16le_encode(self, text);
754 #endif
755     }
756     return _PyUnicode_EncodeUTF16(text,
757                                   PyUnicode_AsUTF8(self->errors), 0);
758 }
759 
760 static PyObject *
utf32be_encode(textio * self,PyObject * text)761 utf32be_encode(textio *self, PyObject *text)
762 {
763     return _PyUnicode_EncodeUTF32(text,
764                                   PyUnicode_AsUTF8(self->errors), 1);
765 }
766 
767 static PyObject *
utf32le_encode(textio * self,PyObject * text)768 utf32le_encode(textio *self, PyObject *text)
769 {
770     return _PyUnicode_EncodeUTF32(text,
771                                   PyUnicode_AsUTF8(self->errors), -1);
772 }
773 
774 static PyObject *
utf32_encode(textio * self,PyObject * text)775 utf32_encode(textio *self, PyObject *text)
776 {
777     if (!self->encoding_start_of_stream) {
778         /* Skip the BOM and use native byte ordering */
779 #if PY_BIG_ENDIAN
780         return utf32be_encode(self, text);
781 #else
782         return utf32le_encode(self, text);
783 #endif
784     }
785     return _PyUnicode_EncodeUTF32(text,
786                                   PyUnicode_AsUTF8(self->errors), 0);
787 }
788 
789 static PyObject *
utf8_encode(textio * self,PyObject * text)790 utf8_encode(textio *self, PyObject *text)
791 {
792     return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
793 }
794 
795 static PyObject *
latin1_encode(textio * self,PyObject * text)796 latin1_encode(textio *self, PyObject *text)
797 {
798     return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
799 }
800 
801 // Return true when encoding can be skipped when text is ascii.
802 static inline int
is_asciicompat_encoding(encodefunc_t f)803 is_asciicompat_encoding(encodefunc_t f)
804 {
805     return f == (encodefunc_t) ascii_encode
806         || f == (encodefunc_t) latin1_encode
807         || f == (encodefunc_t) utf8_encode;
808 }
809 
810 /* Map normalized encoding names onto the specialized encoding funcs */
811 
812 typedef struct {
813     const char *name;
814     encodefunc_t encodefunc;
815 } encodefuncentry;
816 
817 static const encodefuncentry encodefuncs[] = {
818     {"ascii",       (encodefunc_t) ascii_encode},
819     {"iso8859-1",   (encodefunc_t) latin1_encode},
820     {"utf-8",       (encodefunc_t) utf8_encode},
821     {"utf-16-be",   (encodefunc_t) utf16be_encode},
822     {"utf-16-le",   (encodefunc_t) utf16le_encode},
823     {"utf-16",      (encodefunc_t) utf16_encode},
824     {"utf-32-be",   (encodefunc_t) utf32be_encode},
825     {"utf-32-le",   (encodefunc_t) utf32le_encode},
826     {"utf-32",      (encodefunc_t) utf32_encode},
827     {NULL, NULL}
828 };
829 
830 static int
validate_newline(const char * newline)831 validate_newline(const char *newline)
832 {
833     if (newline && newline[0] != '\0'
834         && !(newline[0] == '\n' && newline[1] == '\0')
835         && !(newline[0] == '\r' && newline[1] == '\0')
836         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
837         PyErr_Format(PyExc_ValueError,
838                      "illegal newline value: %s", newline);
839         return -1;
840     }
841     return 0;
842 }
843 
844 static int
set_newline(textio * self,const char * newline)845 set_newline(textio *self, const char *newline)
846 {
847     PyObject *old = self->readnl;
848     if (newline == NULL) {
849         self->readnl = NULL;
850     }
851     else {
852         self->readnl = PyUnicode_FromString(newline);
853         if (self->readnl == NULL) {
854             self->readnl = old;
855             return -1;
856         }
857     }
858     self->readuniversal = (newline == NULL || newline[0] == '\0');
859     self->readtranslate = (newline == NULL);
860     self->writetranslate = (newline == NULL || newline[0] != '\0');
861     if (!self->readuniversal && self->readnl != NULL) {
862         // validate_newline() accepts only ASCII newlines.
863         assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
864         self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
865         if (strcmp(self->writenl, "\n") == 0) {
866             self->writenl = NULL;
867         }
868     }
869     else {
870 #ifdef MS_WINDOWS
871         self->writenl = "\r\n";
872 #else
873         self->writenl = NULL;
874 #endif
875     }
876     Py_XDECREF(old);
877     return 0;
878 }
879 
880 static int
_textiowrapper_set_decoder(textio * self,PyObject * codec_info,const char * errors)881 _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
882                            const char *errors)
883 {
884     PyObject *res;
885     int r;
886 
887     res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
888     if (res == NULL)
889         return -1;
890 
891     r = PyObject_IsTrue(res);
892     Py_DECREF(res);
893     if (r == -1)
894         return -1;
895 
896     if (r != 1)
897         return 0;
898 
899     Py_CLEAR(self->decoder);
900     self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
901     if (self->decoder == NULL)
902         return -1;
903 
904     if (self->readuniversal) {
905         _PyIO_State *state = self->state;
906         PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
907             (PyObject *)state->PyIncrementalNewlineDecoder_Type,
908             self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
909         if (incrementalDecoder == NULL)
910             return -1;
911         Py_XSETREF(self->decoder, incrementalDecoder);
912     }
913 
914     return 0;
915 }
916 
917 static PyObject*
_textiowrapper_decode(_PyIO_State * state,PyObject * decoder,PyObject * bytes,int eof)918 _textiowrapper_decode(_PyIO_State *state, PyObject *decoder, PyObject *bytes,
919                       int eof)
920 {
921     PyObject *chars;
922 
923     if (Py_IS_TYPE(decoder, state->PyIncrementalNewlineDecoder_Type))
924         chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
925     else
926         chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes,
927                                            eof ? Py_True : Py_False, NULL);
928 
929     if (check_decoded(chars) < 0)
930         // check_decoded already decreases refcount
931         return NULL;
932 
933     return chars;
934 }
935 
936 static int
_textiowrapper_set_encoder(textio * self,PyObject * codec_info,const char * errors)937 _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
938                            const char *errors)
939 {
940     PyObject *res;
941     int r;
942 
943     res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
944     if (res == NULL)
945         return -1;
946 
947     r = PyObject_IsTrue(res);
948     Py_DECREF(res);
949     if (r == -1)
950         return -1;
951 
952     if (r != 1)
953         return 0;
954 
955     Py_CLEAR(self->encoder);
956     self->encodefunc = NULL;
957     self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
958     if (self->encoder == NULL)
959         return -1;
960 
961     /* Get the normalized named of the codec */
962     if (PyObject_GetOptionalAttr(codec_info, &_Py_ID(name), &res) < 0) {
963         return -1;
964     }
965     if (res != NULL && PyUnicode_Check(res)) {
966         const encodefuncentry *e = encodefuncs;
967         while (e->name != NULL) {
968             if (_PyUnicode_EqualToASCIIString(res, e->name)) {
969                 self->encodefunc = e->encodefunc;
970                 break;
971             }
972             e++;
973         }
974     }
975     Py_XDECREF(res);
976 
977     return 0;
978 }
979 
980 static int
_textiowrapper_fix_encoder_state(textio * self)981 _textiowrapper_fix_encoder_state(textio *self)
982 {
983     if (!self->seekable || !self->encoder) {
984         return 0;
985     }
986 
987     self->encoding_start_of_stream = 1;
988 
989     PyObject *cookieObj = PyObject_CallMethodNoArgs(
990         self->buffer, &_Py_ID(tell));
991     if (cookieObj == NULL) {
992         return -1;
993     }
994 
995     int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
996     Py_DECREF(cookieObj);
997     if (cmp < 0) {
998         return -1;
999     }
1000 
1001     if (cmp == 0) {
1002         self->encoding_start_of_stream = 0;
1003         PyObject *res = PyObject_CallMethodOneArg(
1004             self->encoder, &_Py_ID(setstate), _PyLong_GetZero());
1005         if (res == NULL) {
1006             return -1;
1007         }
1008         Py_DECREF(res);
1009     }
1010 
1011     return 0;
1012 }
1013 
1014 static int
io_check_errors(PyObject * errors)1015 io_check_errors(PyObject *errors)
1016 {
1017     assert(errors != NULL && errors != Py_None);
1018 
1019     PyInterpreterState *interp = _PyInterpreterState_GET();
1020 #ifndef Py_DEBUG
1021     /* In release mode, only check in development mode (-X dev) */
1022     if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
1023         return 0;
1024     }
1025 #else
1026     /* Always check in debug mode */
1027 #endif
1028 
1029     /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1030        before_PyUnicode_InitEncodings() is called. */
1031     if (!interp->unicode.fs_codec.encoding) {
1032         return 0;
1033     }
1034 
1035     const char *name = _PyUnicode_AsUTF8NoNUL(errors);
1036     if (name == NULL) {
1037         return -1;
1038     }
1039     PyObject *handler = PyCodec_LookupError(name);
1040     if (handler != NULL) {
1041         Py_DECREF(handler);
1042         return 0;
1043     }
1044     return -1;
1045 }
1046 
1047 
1048 
1049 /*[clinic input]
1050 _io.TextIOWrapper.__init__
1051     buffer: object
1052     encoding: str(accept={str, NoneType}) = None
1053     errors: object = None
1054     newline: str(accept={str, NoneType}) = None
1055     line_buffering: bool = False
1056     write_through: bool = False
1057 
1058 Character and line based layer over a BufferedIOBase object, buffer.
1059 
1060 encoding gives the name of the encoding that the stream will be
1061 decoded or encoded with. It defaults to locale.getencoding().
1062 
1063 errors determines the strictness of encoding and decoding (see
1064 help(codecs.Codec) or the documentation for codecs.register) and
1065 defaults to "strict".
1066 
1067 newline controls how line endings are handled. It can be None, '',
1068 '\n', '\r', and '\r\n'.  It works as follows:
1069 
1070 * On input, if newline is None, universal newlines mode is
1071   enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1072   these are translated into '\n' before being returned to the
1073   caller. If it is '', universal newline mode is enabled, but line
1074   endings are returned to the caller untranslated. If it has any of
1075   the other legal values, input lines are only terminated by the given
1076   string, and the line ending is returned to the caller untranslated.
1077 
1078 * On output, if newline is None, any '\n' characters written are
1079   translated to the system default line separator, os.linesep. If
1080   newline is '' or '\n', no translation takes place. If newline is any
1081   of the other legal values, any '\n' characters written are translated
1082   to the given string.
1083 
1084 If line_buffering is True, a call to flush is implied when a call to
1085 write contains a newline character.
1086 [clinic start generated code]*/
1087 
1088 static int
_io_TextIOWrapper___init___impl(textio * self,PyObject * buffer,const char * encoding,PyObject * errors,const char * newline,int line_buffering,int write_through)1089 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1090                                 const char *encoding, PyObject *errors,
1091                                 const char *newline, int line_buffering,
1092                                 int write_through)
1093 /*[clinic end generated code: output=72267c0c01032ed2 input=e6cfaaaf6059d4f5]*/
1094 {
1095     PyObject *raw, *codec_info = NULL;
1096     PyObject *res;
1097     int r;
1098 
1099     self->ok = 0;
1100     self->detached = 0;
1101 
1102     if (encoding == NULL) {
1103         PyInterpreterState *interp = _PyInterpreterState_GET();
1104         if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
1105             if (PyErr_WarnEx(PyExc_EncodingWarning,
1106                              "'encoding' argument not specified", 1)) {
1107                 return -1;
1108             }
1109         }
1110     }
1111 
1112     if (errors == Py_None) {
1113         errors = &_Py_ID(strict);
1114     }
1115     else if (!PyUnicode_Check(errors)) {
1116         // Check 'errors' argument here because Argument Clinic doesn't support
1117         // 'str(accept={str, NoneType})' converter.
1118         PyErr_Format(
1119             PyExc_TypeError,
1120             "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1121             Py_TYPE(errors)->tp_name);
1122         return -1;
1123     }
1124     else if (io_check_errors(errors)) {
1125         return -1;
1126     }
1127     const char *errors_str = _PyUnicode_AsUTF8NoNUL(errors);
1128     if (errors_str == NULL) {
1129         return -1;
1130     }
1131 
1132     if (validate_newline(newline) < 0) {
1133         return -1;
1134     }
1135 
1136     Py_CLEAR(self->buffer);
1137     Py_CLEAR(self->encoding);
1138     Py_CLEAR(self->encoder);
1139     Py_CLEAR(self->decoder);
1140     Py_CLEAR(self->readnl);
1141     Py_CLEAR(self->decoded_chars);
1142     Py_CLEAR(self->pending_bytes);
1143     Py_CLEAR(self->snapshot);
1144     Py_CLEAR(self->errors);
1145     Py_CLEAR(self->raw);
1146     self->decoded_chars_used = 0;
1147     self->pending_bytes_count = 0;
1148     self->encodefunc = NULL;
1149     self->b2cratio = 0.0;
1150 
1151     if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) {
1152         _Py_DECLARE_STR(utf_8, "utf-8");
1153         self->encoding = &_Py_STR(utf_8);
1154     }
1155     else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) {
1156         self->encoding = _Py_GetLocaleEncodingObject();
1157         if (self->encoding == NULL) {
1158             goto error;
1159         }
1160         assert(PyUnicode_Check(self->encoding));
1161     }
1162 
1163     if (self->encoding != NULL) {
1164         encoding = PyUnicode_AsUTF8(self->encoding);
1165         if (encoding == NULL)
1166             goto error;
1167     }
1168     else if (encoding != NULL) {
1169         self->encoding = PyUnicode_FromString(encoding);
1170         if (self->encoding == NULL)
1171             goto error;
1172     }
1173     else {
1174         PyErr_SetString(PyExc_OSError,
1175                         "could not determine default encoding");
1176         goto error;
1177     }
1178 
1179     /* Check we have been asked for a real text encoding */
1180     codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1181     if (codec_info == NULL) {
1182         Py_CLEAR(self->encoding);
1183         goto error;
1184     }
1185 
1186     /* XXX: Failures beyond this point have the potential to leak elements
1187      * of the partially constructed object (like self->encoding)
1188      */
1189 
1190     self->errors = Py_NewRef(errors);
1191     self->chunk_size = 8192;
1192     self->line_buffering = line_buffering;
1193     self->write_through = write_through;
1194     if (set_newline(self, newline) < 0) {
1195         goto error;
1196     }
1197 
1198     self->buffer = Py_NewRef(buffer);
1199 
1200     /* Build the decoder object */
1201     _PyIO_State *state = find_io_state_by_def(Py_TYPE(self));
1202     self->state = state;
1203     if (_textiowrapper_set_decoder(self, codec_info, errors_str) != 0)
1204         goto error;
1205 
1206     /* Build the encoder object */
1207     if (_textiowrapper_set_encoder(self, codec_info, errors_str) != 0)
1208         goto error;
1209 
1210     /* Finished sorting out the codec details */
1211     Py_CLEAR(codec_info);
1212 
1213     if (Py_IS_TYPE(buffer, state->PyBufferedReader_Type) ||
1214         Py_IS_TYPE(buffer, state->PyBufferedWriter_Type) ||
1215         Py_IS_TYPE(buffer, state->PyBufferedRandom_Type))
1216     {
1217         if (PyObject_GetOptionalAttr(buffer, &_Py_ID(raw), &raw) < 0)
1218             goto error;
1219         /* Cache the raw FileIO object to speed up 'closed' checks */
1220         if (raw != NULL) {
1221             if (Py_IS_TYPE(raw, state->PyFileIO_Type))
1222                 self->raw = raw;
1223             else
1224                 Py_DECREF(raw);
1225         }
1226     }
1227 
1228     res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable));
1229     if (res == NULL)
1230         goto error;
1231     r = PyObject_IsTrue(res);
1232     Py_DECREF(res);
1233     if (r < 0)
1234         goto error;
1235     self->seekable = self->telling = r;
1236 
1237     r = PyObject_HasAttrWithError(buffer, &_Py_ID(read1));
1238     if (r < 0) {
1239         goto error;
1240     }
1241     self->has_read1 = r;
1242 
1243     self->encoding_start_of_stream = 0;
1244     if (_textiowrapper_fix_encoder_state(self) < 0) {
1245         goto error;
1246     }
1247 
1248     self->ok = 1;
1249     return 0;
1250 
1251   error:
1252     Py_XDECREF(codec_info);
1253     return -1;
1254 }
1255 
1256 /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1257  * -1 on error.
1258  */
1259 static int
convert_optional_bool(PyObject * obj,int default_value)1260 convert_optional_bool(PyObject *obj, int default_value)
1261 {
1262     long v;
1263     if (obj == Py_None) {
1264         v = default_value;
1265     }
1266     else {
1267         v = PyLong_AsLong(obj);
1268         if (v == -1 && PyErr_Occurred())
1269             return -1;
1270     }
1271     return v != 0;
1272 }
1273 
1274 static int
textiowrapper_change_encoding(textio * self,PyObject * encoding,PyObject * errors,int newline_changed)1275 textiowrapper_change_encoding(textio *self, PyObject *encoding,
1276                               PyObject *errors, int newline_changed)
1277 {
1278     /* Use existing settings where new settings are not specified */
1279     if (encoding == Py_None && errors == Py_None && !newline_changed) {
1280         return 0;  // no change
1281     }
1282 
1283     if (encoding == Py_None) {
1284         encoding = self->encoding;
1285         if (errors == Py_None) {
1286             errors = self->errors;
1287         }
1288         Py_INCREF(encoding);
1289     }
1290     else {
1291         if (_PyUnicode_EqualToASCIIString(encoding, "locale")) {
1292             encoding = _Py_GetLocaleEncodingObject();
1293             if (encoding == NULL) {
1294                 return -1;
1295             }
1296         } else {
1297             Py_INCREF(encoding);
1298         }
1299         if (errors == Py_None) {
1300             errors = &_Py_ID(strict);
1301         }
1302     }
1303     Py_INCREF(errors);
1304 
1305     const char *c_encoding = PyUnicode_AsUTF8(encoding);
1306     if (c_encoding == NULL) {
1307         Py_DECREF(encoding);
1308         Py_DECREF(errors);
1309         return -1;
1310     }
1311     const char *c_errors = PyUnicode_AsUTF8(errors);
1312     if (c_errors == NULL) {
1313         Py_DECREF(encoding);
1314         Py_DECREF(errors);
1315         return -1;
1316     }
1317 
1318     // Create new encoder & decoder
1319     PyObject *codec_info = _PyCodec_LookupTextEncoding(
1320         c_encoding, "codecs.open()");
1321     if (codec_info == NULL) {
1322         Py_DECREF(encoding);
1323         Py_DECREF(errors);
1324         return -1;
1325     }
1326     if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1327             _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1328         Py_DECREF(codec_info);
1329         Py_DECREF(encoding);
1330         Py_DECREF(errors);
1331         return -1;
1332     }
1333     Py_DECREF(codec_info);
1334 
1335     Py_SETREF(self->encoding, encoding);
1336     Py_SETREF(self->errors, errors);
1337 
1338     return _textiowrapper_fix_encoder_state(self);
1339 }
1340 
1341 /*[clinic input]
1342 @critical_section
1343 _io.TextIOWrapper.reconfigure
1344     *
1345     encoding: object = None
1346     errors: object = None
1347     newline as newline_obj: object(c_default="NULL") = None
1348     line_buffering as line_buffering_obj: object = None
1349     write_through as write_through_obj: object = None
1350 
1351 Reconfigure the text stream with new parameters.
1352 
1353 This also does an implicit stream flush.
1354 
1355 [clinic start generated code]*/
1356 
1357 static PyObject *
_io_TextIOWrapper_reconfigure_impl(textio * self,PyObject * encoding,PyObject * errors,PyObject * newline_obj,PyObject * line_buffering_obj,PyObject * write_through_obj)1358 _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1359                                    PyObject *errors, PyObject *newline_obj,
1360                                    PyObject *line_buffering_obj,
1361                                    PyObject *write_through_obj)
1362 /*[clinic end generated code: output=52b812ff4b3d4b0f input=dc3bd35ebda702a7]*/
1363 {
1364     int line_buffering;
1365     int write_through;
1366     const char *newline = NULL;
1367 
1368     if (encoding != Py_None && !PyUnicode_Check(encoding)) {
1369         PyErr_Format(PyExc_TypeError,
1370                 "reconfigure() argument 'encoding' must be str or None, not %s",
1371                 Py_TYPE(encoding)->tp_name);
1372         return NULL;
1373     }
1374     if (errors != Py_None && !PyUnicode_Check(errors)) {
1375         PyErr_Format(PyExc_TypeError,
1376                 "reconfigure() argument 'errors' must be str or None, not %s",
1377                 Py_TYPE(errors)->tp_name);
1378         return NULL;
1379     }
1380     if (newline_obj != NULL && newline_obj != Py_None &&
1381         !PyUnicode_Check(newline_obj))
1382     {
1383         PyErr_Format(PyExc_TypeError,
1384                 "reconfigure() argument 'newline' must be str or None, not %s",
1385                 Py_TYPE(newline_obj)->tp_name);
1386         return NULL;
1387     }
1388     /* Check if something is in the read buffer */
1389     if (self->decoded_chars != NULL) {
1390         if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1391             _unsupported(self->state,
1392                          "It is not possible to set the encoding or newline "
1393                          "of stream after the first read");
1394             return NULL;
1395         }
1396     }
1397 
1398     if (newline_obj != NULL && newline_obj != Py_None) {
1399         newline = PyUnicode_AsUTF8(newline_obj);
1400         if (newline == NULL || validate_newline(newline) < 0) {
1401             return NULL;
1402         }
1403     }
1404 
1405     line_buffering = convert_optional_bool(line_buffering_obj,
1406                                            self->line_buffering);
1407     if (line_buffering < 0) {
1408         return NULL;
1409     }
1410     write_through = convert_optional_bool(write_through_obj,
1411                                           self->write_through);
1412     if (write_through < 0) {
1413         return NULL;
1414     }
1415 
1416     if (_PyFile_Flush((PyObject *)self) < 0) {
1417         return NULL;
1418     }
1419     self->b2cratio = 0;
1420 
1421     if (newline_obj != NULL && set_newline(self, newline) < 0) {
1422         return NULL;
1423     }
1424 
1425     if (textiowrapper_change_encoding(
1426             self, encoding, errors, newline_obj != NULL) < 0) {
1427         return NULL;
1428     }
1429 
1430     self->line_buffering = line_buffering;
1431     self->write_through = write_through;
1432     Py_RETURN_NONE;
1433 }
1434 
1435 static int
textiowrapper_clear(textio * self)1436 textiowrapper_clear(textio *self)
1437 {
1438     self->ok = 0;
1439     Py_CLEAR(self->buffer);
1440     Py_CLEAR(self->encoding);
1441     Py_CLEAR(self->encoder);
1442     Py_CLEAR(self->decoder);
1443     Py_CLEAR(self->readnl);
1444     Py_CLEAR(self->decoded_chars);
1445     Py_CLEAR(self->pending_bytes);
1446     Py_CLEAR(self->snapshot);
1447     Py_CLEAR(self->errors);
1448     Py_CLEAR(self->raw);
1449 
1450     Py_CLEAR(self->dict);
1451     return 0;
1452 }
1453 
1454 static void
textiowrapper_dealloc(textio * self)1455 textiowrapper_dealloc(textio *self)
1456 {
1457     PyTypeObject *tp = Py_TYPE(self);
1458     self->finalizing = 1;
1459     if (_PyIOBase_finalize((PyObject *) self) < 0)
1460         return;
1461     self->ok = 0;
1462     _PyObject_GC_UNTRACK(self);
1463     if (self->weakreflist != NULL)
1464         PyObject_ClearWeakRefs((PyObject *)self);
1465     (void)textiowrapper_clear(self);
1466     tp->tp_free((PyObject *)self);
1467     Py_DECREF(tp);
1468 }
1469 
1470 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1471 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1472 {
1473     Py_VISIT(Py_TYPE(self));
1474     Py_VISIT(self->buffer);
1475     Py_VISIT(self->encoding);
1476     Py_VISIT(self->encoder);
1477     Py_VISIT(self->decoder);
1478     Py_VISIT(self->readnl);
1479     Py_VISIT(self->decoded_chars);
1480     Py_VISIT(self->pending_bytes);
1481     Py_VISIT(self->snapshot);
1482     Py_VISIT(self->errors);
1483     Py_VISIT(self->raw);
1484 
1485     Py_VISIT(self->dict);
1486     return 0;
1487 }
1488 
1489 static PyObject *
1490 _io_TextIOWrapper_closed_get_impl(textio *self);
1491 
1492 /* This macro takes some shortcuts to make the common case faster. */
1493 #define CHECK_CLOSED(self) \
1494     do { \
1495         int r; \
1496         PyObject *_res; \
1497         if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) { \
1498             if (self->raw != NULL) \
1499                 r = _PyFileIO_closed(self->raw); \
1500             else { \
1501                 _res = _io_TextIOWrapper_closed_get_impl(self); \
1502                 if (_res == NULL) \
1503                     return NULL; \
1504                 r = PyObject_IsTrue(_res); \
1505                 Py_DECREF(_res); \
1506                 if (r < 0) \
1507                     return NULL; \
1508             } \
1509             if (r > 0) { \
1510                 PyErr_SetString(PyExc_ValueError, \
1511                                 "I/O operation on closed file."); \
1512                 return NULL; \
1513             } \
1514         } \
1515         else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1516             return NULL; \
1517     } while (0)
1518 
1519 #define CHECK_INITIALIZED(self) \
1520     if (self->ok <= 0) { \
1521         PyErr_SetString(PyExc_ValueError, \
1522             "I/O operation on uninitialized object"); \
1523         return NULL; \
1524     }
1525 
1526 #define CHECK_ATTACHED(self) \
1527     CHECK_INITIALIZED(self); \
1528     if (self->detached) { \
1529         PyErr_SetString(PyExc_ValueError, \
1530              "underlying buffer has been detached"); \
1531         return NULL; \
1532     }
1533 
1534 #define CHECK_ATTACHED_INT(self) \
1535     if (self->ok <= 0) { \
1536         PyErr_SetString(PyExc_ValueError, \
1537             "I/O operation on uninitialized object"); \
1538         return -1; \
1539     } else if (self->detached) { \
1540         PyErr_SetString(PyExc_ValueError, \
1541              "underlying buffer has been detached"); \
1542         return -1; \
1543     }
1544 
1545 
1546 /*[clinic input]
1547 @critical_section
1548 _io.TextIOWrapper.detach
1549 [clinic start generated code]*/
1550 
1551 static PyObject *
_io_TextIOWrapper_detach_impl(textio * self)1552 _io_TextIOWrapper_detach_impl(textio *self)
1553 /*[clinic end generated code: output=7ba3715cd032d5f2 input=c908a3b4ef203b0f]*/
1554 {
1555     PyObject *buffer;
1556     CHECK_ATTACHED(self);
1557     if (_PyFile_Flush((PyObject *)self) < 0) {
1558         return NULL;
1559     }
1560     buffer = self->buffer;
1561     self->buffer = NULL;
1562     self->detached = 1;
1563     return buffer;
1564 }
1565 
1566 /* Flush the internal write buffer. This doesn't explicitly flush the
1567    underlying buffered object, though. */
1568 static int
_textiowrapper_writeflush(textio * self)1569 _textiowrapper_writeflush(textio *self)
1570 {
1571     if (self->pending_bytes == NULL)
1572         return 0;
1573 
1574     PyObject *pending = self->pending_bytes;
1575     PyObject *b;
1576 
1577     if (PyBytes_Check(pending)) {
1578         b = Py_NewRef(pending);
1579     }
1580     else if (PyUnicode_Check(pending)) {
1581         assert(PyUnicode_IS_ASCII(pending));
1582         assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1583         b = PyBytes_FromStringAndSize(
1584                 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1585         if (b == NULL) {
1586             return -1;
1587         }
1588     }
1589     else {
1590         assert(PyList_Check(pending));
1591         b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1592         if (b == NULL) {
1593             return -1;
1594         }
1595 
1596         char *buf = PyBytes_AsString(b);
1597         Py_ssize_t pos = 0;
1598 
1599         for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1600             PyObject *obj = PyList_GET_ITEM(pending, i);
1601             char *src;
1602             Py_ssize_t len;
1603             if (PyUnicode_Check(obj)) {
1604                 assert(PyUnicode_IS_ASCII(obj));
1605                 src = PyUnicode_DATA(obj);
1606                 len = PyUnicode_GET_LENGTH(obj);
1607             }
1608             else {
1609                 assert(PyBytes_Check(obj));
1610                 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1611                     Py_DECREF(b);
1612                     return -1;
1613                 }
1614             }
1615             memcpy(buf + pos, src, len);
1616             pos += len;
1617         }
1618         assert(pos == self->pending_bytes_count);
1619     }
1620 
1621     self->pending_bytes_count = 0;
1622     self->pending_bytes = NULL;
1623     Py_DECREF(pending);
1624 
1625     PyObject *ret;
1626     do {
1627         ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
1628     } while (ret == NULL && _PyIO_trap_eintr());
1629     Py_DECREF(b);
1630     // NOTE: We cleared buffer but we don't know how many bytes are actually written
1631     // when an error occurred.
1632     if (ret == NULL)
1633         return -1;
1634     Py_DECREF(ret);
1635     return 0;
1636 }
1637 
1638 /*[clinic input]
1639 @critical_section
1640 _io.TextIOWrapper.write
1641     text: unicode
1642     /
1643 [clinic start generated code]*/
1644 
1645 static PyObject *
_io_TextIOWrapper_write_impl(textio * self,PyObject * text)1646 _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1647 /*[clinic end generated code: output=d2deb0d50771fcec input=73ec95c5c4a3489c]*/
1648 {
1649     PyObject *ret;
1650     PyObject *b;
1651     Py_ssize_t textlen;
1652     int haslf = 0;
1653     int needflush = 0, text_needflush = 0;
1654 
1655     CHECK_ATTACHED(self);
1656     CHECK_CLOSED(self);
1657 
1658     if (self->encoder == NULL) {
1659         return _unsupported(self->state, "not writable");
1660     }
1661 
1662     Py_INCREF(text);
1663 
1664     textlen = PyUnicode_GET_LENGTH(text);
1665 
1666     if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1667         if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1668             haslf = 1;
1669 
1670     if (haslf && self->writetranslate && self->writenl != NULL) {
1671         PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace),
1672                                                  "ss", "\n", self->writenl);
1673         Py_DECREF(text);
1674         if (newtext == NULL)
1675             return NULL;
1676         text = newtext;
1677     }
1678 
1679     if (self->write_through)
1680         text_needflush = 1;
1681     if (self->line_buffering &&
1682         (haslf ||
1683          PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1684         needflush = 1;
1685 
1686     /* XXX What if we were just reading? */
1687     if (self->encodefunc != NULL) {
1688         if (PyUnicode_IS_ASCII(text) &&
1689                 // See bpo-43260
1690                 PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1691                 is_asciicompat_encoding(self->encodefunc)) {
1692             b = Py_NewRef(text);
1693         }
1694         else {
1695             b = (*self->encodefunc)((PyObject *) self, text);
1696         }
1697         self->encoding_start_of_stream = 0;
1698     }
1699     else {
1700         b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text);
1701     }
1702 
1703     Py_DECREF(text);
1704     if (b == NULL)
1705         return NULL;
1706     if (b != text && !PyBytes_Check(b)) {
1707         PyErr_Format(PyExc_TypeError,
1708                      "encoder should return a bytes object, not '%.200s'",
1709                      Py_TYPE(b)->tp_name);
1710         Py_DECREF(b);
1711         return NULL;
1712     }
1713 
1714     Py_ssize_t bytes_len;
1715     if (b == text) {
1716         bytes_len = PyUnicode_GET_LENGTH(b);
1717     }
1718     else {
1719         bytes_len = PyBytes_GET_SIZE(b);
1720     }
1721 
1722     // We should avoid concatinating huge data.
1723     // Flush the buffer before adding b to the buffer if b is not small.
1724     // https://github.com/python/cpython/issues/87426
1725     if (bytes_len >= self->chunk_size) {
1726         // _textiowrapper_writeflush() calls buffer.write().
1727         // self->pending_bytes can be appended during buffer->write()
1728         // or other thread.
1729         // We need to loop until buffer becomes empty.
1730         // https://github.com/python/cpython/issues/118138
1731         // https://github.com/python/cpython/issues/119506
1732         while (self->pending_bytes != NULL) {
1733             if (_textiowrapper_writeflush(self) < 0) {
1734                 Py_DECREF(b);
1735                 return NULL;
1736             }
1737         }
1738     }
1739 
1740     if (self->pending_bytes == NULL) {
1741         assert(self->pending_bytes_count == 0);
1742         self->pending_bytes = b;
1743     }
1744     else if (!PyList_CheckExact(self->pending_bytes)) {
1745         PyObject *list = PyList_New(2);
1746         if (list == NULL) {
1747             Py_DECREF(b);
1748             return NULL;
1749         }
1750         // Since Python 3.12, allocating GC object won't trigger GC and release
1751         // GIL. See https://github.com/python/cpython/issues/97922
1752         assert(!PyList_CheckExact(self->pending_bytes));
1753         PyList_SET_ITEM(list, 0, self->pending_bytes);
1754         PyList_SET_ITEM(list, 1, b);
1755         self->pending_bytes = list;
1756     }
1757     else {
1758         if (PyList_Append(self->pending_bytes, b) < 0) {
1759             Py_DECREF(b);
1760             return NULL;
1761         }
1762         Py_DECREF(b);
1763     }
1764 
1765     self->pending_bytes_count += bytes_len;
1766     if (self->pending_bytes_count >= self->chunk_size || needflush ||
1767         text_needflush) {
1768         if (_textiowrapper_writeflush(self) < 0)
1769             return NULL;
1770     }
1771 
1772     if (needflush) {
1773         if (_PyFile_Flush(self->buffer) < 0) {
1774             return NULL;
1775         }
1776     }
1777 
1778     if (self->snapshot != NULL) {
1779         textiowrapper_set_decoded_chars(self, NULL);
1780         Py_CLEAR(self->snapshot);
1781     }
1782 
1783     if (self->decoder) {
1784         ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
1785         if (ret == NULL)
1786             return NULL;
1787         Py_DECREF(ret);
1788     }
1789 
1790     return PyLong_FromSsize_t(textlen);
1791 }
1792 
1793 /* Steal a reference to chars and store it in the decoded_char buffer;
1794  */
1795 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1796 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1797 {
1798     Py_XSETREF(self->decoded_chars, chars);
1799     self->decoded_chars_used = 0;
1800 }
1801 
1802 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1803 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1804 {
1805     PyObject *chars;
1806     Py_ssize_t avail;
1807 
1808     if (self->decoded_chars == NULL)
1809         return PyUnicode_FromStringAndSize(NULL, 0);
1810 
1811     /* decoded_chars is guaranteed to be "ready". */
1812     avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1813              - self->decoded_chars_used);
1814 
1815     assert(avail >= 0);
1816 
1817     if (n < 0 || n > avail)
1818         n = avail;
1819 
1820     if (self->decoded_chars_used > 0 || n < avail) {
1821         chars = PyUnicode_Substring(self->decoded_chars,
1822                                     self->decoded_chars_used,
1823                                     self->decoded_chars_used + n);
1824         if (chars == NULL)
1825             return NULL;
1826     }
1827     else {
1828         chars = Py_NewRef(self->decoded_chars);
1829     }
1830 
1831     self->decoded_chars_used += n;
1832     return chars;
1833 }
1834 
1835 /* Read and decode the next chunk of data from the BufferedReader.
1836  */
1837 static int
textiowrapper_read_chunk(textio * self,Py_ssize_t size_hint)1838 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1839 {
1840     PyObject *dec_buffer = NULL;
1841     PyObject *dec_flags = NULL;
1842     PyObject *input_chunk = NULL;
1843     Py_buffer input_chunk_buf;
1844     PyObject *decoded_chars, *chunk_size;
1845     Py_ssize_t nbytes, nchars;
1846     int eof;
1847 
1848     /* The return value is True unless EOF was reached.  The decoded string is
1849      * placed in self._decoded_chars (replacing its previous value).  The
1850      * entire input chunk is sent to the decoder, though some of it may remain
1851      * buffered in the decoder, yet to be converted.
1852      */
1853 
1854     if (self->decoder == NULL) {
1855         _unsupported(self->state, "not readable");
1856         return -1;
1857     }
1858 
1859     if (self->telling) {
1860         /* To prepare for tell(), we need to snapshot a point in the file
1861          * where the decoder's input buffer is empty.
1862          */
1863         PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1864                                                      &_Py_ID(getstate));
1865         if (state == NULL)
1866             return -1;
1867         /* Given this, we know there was a valid snapshot point
1868          * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1869          */
1870         if (!PyTuple_Check(state)) {
1871             PyErr_SetString(PyExc_TypeError,
1872                             "illegal decoder state");
1873             Py_DECREF(state);
1874             return -1;
1875         }
1876         if (!PyArg_ParseTuple(state,
1877                               "OO;illegal decoder state", &dec_buffer, &dec_flags))
1878         {
1879             Py_DECREF(state);
1880             return -1;
1881         }
1882 
1883         if (!PyBytes_Check(dec_buffer)) {
1884             PyErr_Format(PyExc_TypeError,
1885                          "illegal decoder state: the first item should be a "
1886                          "bytes object, not '%.200s'",
1887                          Py_TYPE(dec_buffer)->tp_name);
1888             Py_DECREF(state);
1889             return -1;
1890         }
1891         Py_INCREF(dec_buffer);
1892         Py_INCREF(dec_flags);
1893         Py_DECREF(state);
1894     }
1895 
1896     /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1897     if (size_hint > 0) {
1898         size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1899     }
1900     chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1901     if (chunk_size == NULL)
1902         goto fail;
1903 
1904     input_chunk = PyObject_CallMethodOneArg(self->buffer,
1905         (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
1906         chunk_size);
1907     Py_DECREF(chunk_size);
1908     if (input_chunk == NULL)
1909         goto fail;
1910 
1911     if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1912         PyErr_Format(PyExc_TypeError,
1913                      "underlying %s() should have returned a bytes-like object, "
1914                      "not '%.200s'", (self->has_read1 ? "read1": "read"),
1915                      Py_TYPE(input_chunk)->tp_name);
1916         goto fail;
1917     }
1918 
1919     nbytes = input_chunk_buf.len;
1920     eof = (nbytes == 0);
1921 
1922     decoded_chars = _textiowrapper_decode(self->state, self->decoder,
1923                                           input_chunk, eof);
1924     PyBuffer_Release(&input_chunk_buf);
1925     if (decoded_chars == NULL)
1926         goto fail;
1927 
1928     textiowrapper_set_decoded_chars(self, decoded_chars);
1929     nchars = PyUnicode_GET_LENGTH(decoded_chars);
1930     if (nchars > 0)
1931         self->b2cratio = (double) nbytes / nchars;
1932     else
1933         self->b2cratio = 0.0;
1934     if (nchars > 0)
1935         eof = 0;
1936 
1937     if (self->telling) {
1938         /* At the snapshot point, len(dec_buffer) bytes before the read, the
1939          * next input to be decoded is dec_buffer + input_chunk.
1940          */
1941         PyObject *next_input = dec_buffer;
1942         PyBytes_Concat(&next_input, input_chunk);
1943         dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1944         if (next_input == NULL) {
1945             goto fail;
1946         }
1947         PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1948         if (snapshot == NULL) {
1949             dec_flags = NULL;
1950             goto fail;
1951         }
1952         Py_XSETREF(self->snapshot, snapshot);
1953     }
1954     Py_DECREF(input_chunk);
1955 
1956     return (eof == 0);
1957 
1958   fail:
1959     Py_XDECREF(dec_buffer);
1960     Py_XDECREF(dec_flags);
1961     Py_XDECREF(input_chunk);
1962     return -1;
1963 }
1964 
1965 /*[clinic input]
1966 @critical_section
1967 _io.TextIOWrapper.read
1968     size as n: Py_ssize_t(accept={int, NoneType}) = -1
1969     /
1970 [clinic start generated code]*/
1971 
1972 static PyObject *
_io_TextIOWrapper_read_impl(textio * self,Py_ssize_t n)1973 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1974 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=67d14c5661121377]*/
1975 {
1976     PyObject *result = NULL, *chunks = NULL;
1977 
1978     CHECK_ATTACHED(self);
1979     CHECK_CLOSED(self);
1980 
1981     if (self->decoder == NULL) {
1982         return _unsupported(self->state, "not readable");
1983     }
1984 
1985     if (_textiowrapper_writeflush(self) < 0)
1986         return NULL;
1987 
1988     if (n < 0) {
1989         /* Read everything */
1990         PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
1991         PyObject *decoded;
1992         if (bytes == NULL)
1993             goto fail;
1994 
1995         _PyIO_State *state = self->state;
1996         if (Py_IS_TYPE(self->decoder, state->PyIncrementalNewlineDecoder_Type))
1997             decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1998                                                           bytes, 1);
1999         else
2000             decoded = PyObject_CallMethodObjArgs(
2001                 self->decoder, &_Py_ID(decode), bytes, Py_True, NULL);
2002         Py_DECREF(bytes);
2003         if (check_decoded(decoded) < 0)
2004             goto fail;
2005 
2006         result = textiowrapper_get_decoded_chars(self, -1);
2007 
2008         if (result == NULL) {
2009             Py_DECREF(decoded);
2010             return NULL;
2011         }
2012 
2013         PyUnicode_AppendAndDel(&result, decoded);
2014         if (result == NULL)
2015             goto fail;
2016 
2017         if (self->snapshot != NULL) {
2018             textiowrapper_set_decoded_chars(self, NULL);
2019             Py_CLEAR(self->snapshot);
2020         }
2021         return result;
2022     }
2023     else {
2024         int res = 1;
2025         Py_ssize_t remaining = n;
2026 
2027         result = textiowrapper_get_decoded_chars(self, n);
2028         if (result == NULL)
2029             goto fail;
2030         remaining -= PyUnicode_GET_LENGTH(result);
2031 
2032         /* Keep reading chunks until we have n characters to return */
2033         while (remaining > 0) {
2034             res = textiowrapper_read_chunk(self, remaining);
2035             if (res < 0) {
2036                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2037                    when EINTR occurs so we needn't do it ourselves. */
2038                 if (_PyIO_trap_eintr()) {
2039                     continue;
2040                 }
2041                 goto fail;
2042             }
2043             if (res == 0)  /* EOF */
2044                 break;
2045             if (chunks == NULL) {
2046                 chunks = PyList_New(0);
2047                 if (chunks == NULL)
2048                     goto fail;
2049             }
2050             if (PyUnicode_GET_LENGTH(result) > 0 &&
2051                 PyList_Append(chunks, result) < 0)
2052                 goto fail;
2053             Py_DECREF(result);
2054             result = textiowrapper_get_decoded_chars(self, remaining);
2055             if (result == NULL)
2056                 goto fail;
2057             remaining -= PyUnicode_GET_LENGTH(result);
2058         }
2059         if (chunks != NULL) {
2060             if (result != NULL && PyList_Append(chunks, result) < 0)
2061                 goto fail;
2062             _Py_DECLARE_STR(empty, "");
2063             Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks));
2064             if (result == NULL)
2065                 goto fail;
2066             Py_CLEAR(chunks);
2067         }
2068         return result;
2069     }
2070   fail:
2071     Py_XDECREF(result);
2072     Py_XDECREF(chunks);
2073     return NULL;
2074 }
2075 
2076 
2077 /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2078    that is to the NUL character. Otherwise the function will produce
2079    incorrect results. */
2080 static const char *
find_control_char(int kind,const char * s,const char * end,Py_UCS4 ch)2081 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2082 {
2083     if (kind == PyUnicode_1BYTE_KIND) {
2084         assert(ch < 256);
2085         return (char *) memchr((const void *) s, (char) ch, end - s);
2086     }
2087     for (;;) {
2088         while (PyUnicode_READ(kind, s, 0) > ch)
2089             s += kind;
2090         if (PyUnicode_READ(kind, s, 0) == ch)
2091             return s;
2092         if (s == end)
2093             return NULL;
2094         s += kind;
2095     }
2096 }
2097 
2098 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,int kind,const char * start,const char * end,Py_ssize_t * consumed)2099 _PyIO_find_line_ending(
2100     int translated, int universal, PyObject *readnl,
2101     int kind, const char *start, const char *end, Py_ssize_t *consumed)
2102 {
2103     Py_ssize_t len = (end - start)/kind;
2104 
2105     if (translated) {
2106         /* Newlines are already translated, only search for \n */
2107         const char *pos = find_control_char(kind, start, end, '\n');
2108         if (pos != NULL)
2109             return (pos - start)/kind + 1;
2110         else {
2111             *consumed = len;
2112             return -1;
2113         }
2114     }
2115     else if (universal) {
2116         /* Universal newline search. Find any of \r, \r\n, \n
2117          * The decoder ensures that \r\n are not split in two pieces
2118          */
2119         const char *s = start;
2120         for (;;) {
2121             Py_UCS4 ch;
2122             /* Fast path for non-control chars. The loop always ends
2123                since the Unicode string is NUL-terminated. */
2124             while (PyUnicode_READ(kind, s, 0) > '\r')
2125                 s += kind;
2126             if (s >= end) {
2127                 *consumed = len;
2128                 return -1;
2129             }
2130             ch = PyUnicode_READ(kind, s, 0);
2131             s += kind;
2132             if (ch == '\n')
2133                 return (s - start)/kind;
2134             if (ch == '\r') {
2135                 if (PyUnicode_READ(kind, s, 0) == '\n')
2136                     return (s - start)/kind + 1;
2137                 else
2138                     return (s - start)/kind;
2139             }
2140         }
2141     }
2142     else {
2143         /* Non-universal mode. */
2144         Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2145         const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2146         /* Assume that readnl is an ASCII character. */
2147         assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2148         if (readnl_len == 1) {
2149             const char *pos = find_control_char(kind, start, end, nl[0]);
2150             if (pos != NULL)
2151                 return (pos - start)/kind + 1;
2152             *consumed = len;
2153             return -1;
2154         }
2155         else {
2156             const char *s = start;
2157             const char *e = end - (readnl_len - 1)*kind;
2158             const char *pos;
2159             if (e < s)
2160                 e = s;
2161             while (s < e) {
2162                 Py_ssize_t i;
2163                 const char *pos = find_control_char(kind, s, end, nl[0]);
2164                 if (pos == NULL || pos >= e)
2165                     break;
2166                 for (i = 1; i < readnl_len; i++) {
2167                     if (PyUnicode_READ(kind, pos, i) != nl[i])
2168                         break;
2169                 }
2170                 if (i == readnl_len)
2171                     return (pos - start)/kind + readnl_len;
2172                 s = pos + kind;
2173             }
2174             pos = find_control_char(kind, e, end, nl[0]);
2175             if (pos == NULL)
2176                 *consumed = len;
2177             else
2178                 *consumed = (pos - start)/kind;
2179             return -1;
2180         }
2181     }
2182 }
2183 
2184 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)2185 _textiowrapper_readline(textio *self, Py_ssize_t limit)
2186 {
2187     PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2188     Py_ssize_t start, endpos, chunked, offset_to_buffer;
2189     int res;
2190 
2191     CHECK_CLOSED(self);
2192 
2193     if (_textiowrapper_writeflush(self) < 0)
2194         return NULL;
2195 
2196     chunked = 0;
2197 
2198     while (1) {
2199         const char *ptr;
2200         Py_ssize_t line_len;
2201         int kind;
2202         Py_ssize_t consumed = 0;
2203 
2204         /* First, get some data if necessary */
2205         res = 1;
2206         while (!self->decoded_chars ||
2207                !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2208             res = textiowrapper_read_chunk(self, 0);
2209             if (res < 0) {
2210                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2211                    when EINTR occurs so we needn't do it ourselves. */
2212                 if (_PyIO_trap_eintr()) {
2213                     continue;
2214                 }
2215                 goto error;
2216             }
2217             if (res == 0)
2218                 break;
2219         }
2220         if (res == 0) {
2221             /* end of file */
2222             textiowrapper_set_decoded_chars(self, NULL);
2223             Py_CLEAR(self->snapshot);
2224             start = endpos = offset_to_buffer = 0;
2225             break;
2226         }
2227 
2228         if (remaining == NULL) {
2229             line = Py_NewRef(self->decoded_chars);
2230             start = self->decoded_chars_used;
2231             offset_to_buffer = 0;
2232         }
2233         else {
2234             assert(self->decoded_chars_used == 0);
2235             line = PyUnicode_Concat(remaining, self->decoded_chars);
2236             start = 0;
2237             offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2238             Py_CLEAR(remaining);
2239             if (line == NULL)
2240                 goto error;
2241         }
2242 
2243         ptr = PyUnicode_DATA(line);
2244         line_len = PyUnicode_GET_LENGTH(line);
2245         kind = PyUnicode_KIND(line);
2246 
2247         endpos = _PyIO_find_line_ending(
2248             self->readtranslate, self->readuniversal, self->readnl,
2249             kind,
2250             ptr + kind * start,
2251             ptr + kind * line_len,
2252             &consumed);
2253         if (endpos >= 0) {
2254             endpos += start;
2255             if (limit >= 0 && (endpos - start) + chunked >= limit)
2256                 endpos = start + limit - chunked;
2257             break;
2258         }
2259 
2260         /* We can put aside up to `endpos` */
2261         endpos = consumed + start;
2262         if (limit >= 0 && (endpos - start) + chunked >= limit) {
2263             /* Didn't find line ending, but reached length limit */
2264             endpos = start + limit - chunked;
2265             break;
2266         }
2267 
2268         if (endpos > start) {
2269             /* No line ending seen yet - put aside current data */
2270             PyObject *s;
2271             if (chunks == NULL) {
2272                 chunks = PyList_New(0);
2273                 if (chunks == NULL)
2274                     goto error;
2275             }
2276             s = PyUnicode_Substring(line, start, endpos);
2277             if (s == NULL)
2278                 goto error;
2279             if (PyList_Append(chunks, s) < 0) {
2280                 Py_DECREF(s);
2281                 goto error;
2282             }
2283             chunked += PyUnicode_GET_LENGTH(s);
2284             Py_DECREF(s);
2285         }
2286         /* There may be some remaining bytes we'll have to prepend to the
2287            next chunk of data */
2288         if (endpos < line_len) {
2289             remaining = PyUnicode_Substring(line, endpos, line_len);
2290             if (remaining == NULL)
2291                 goto error;
2292         }
2293         Py_CLEAR(line);
2294         /* We have consumed the buffer */
2295         textiowrapper_set_decoded_chars(self, NULL);
2296     }
2297 
2298     if (line != NULL) {
2299         /* Our line ends in the current buffer */
2300         self->decoded_chars_used = endpos - offset_to_buffer;
2301         if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2302             PyObject *s = PyUnicode_Substring(line, start, endpos);
2303             Py_CLEAR(line);
2304             if (s == NULL)
2305                 goto error;
2306             line = s;
2307         }
2308     }
2309     if (remaining != NULL) {
2310         if (chunks == NULL) {
2311             chunks = PyList_New(0);
2312             if (chunks == NULL)
2313                 goto error;
2314         }
2315         if (PyList_Append(chunks, remaining) < 0)
2316             goto error;
2317         Py_CLEAR(remaining);
2318     }
2319     if (chunks != NULL) {
2320         if (line != NULL) {
2321             if (PyList_Append(chunks, line) < 0)
2322                 goto error;
2323             Py_DECREF(line);
2324         }
2325         line = PyUnicode_Join(&_Py_STR(empty), chunks);
2326         if (line == NULL)
2327             goto error;
2328         Py_CLEAR(chunks);
2329     }
2330     if (line == NULL) {
2331         line = &_Py_STR(empty);
2332     }
2333 
2334     return line;
2335 
2336   error:
2337     Py_XDECREF(chunks);
2338     Py_XDECREF(remaining);
2339     Py_XDECREF(line);
2340     return NULL;
2341 }
2342 
2343 /*[clinic input]
2344 @critical_section
2345 _io.TextIOWrapper.readline
2346     size: Py_ssize_t = -1
2347     /
2348 [clinic start generated code]*/
2349 
2350 static PyObject *
_io_TextIOWrapper_readline_impl(textio * self,Py_ssize_t size)2351 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2352 /*[clinic end generated code: output=344afa98804e8b25 input=b65bab871dc3ddba]*/
2353 {
2354     CHECK_ATTACHED(self);
2355     return _textiowrapper_readline(self, size);
2356 }
2357 
2358 /* Seek and Tell */
2359 
2360 typedef struct {
2361     Py_off_t start_pos;
2362     int dec_flags;
2363     int bytes_to_feed;
2364     int chars_to_skip;
2365     char need_eof;
2366 } cookie_type;
2367 
2368 /*
2369    To speed up cookie packing/unpacking, we store the fields in a temporary
2370    string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2371    The following macros define at which offsets in the intermediary byte
2372    string the various CookieStruct fields will be stored.
2373  */
2374 
2375 #define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2376 
2377 #if PY_BIG_ENDIAN
2378 /* We want the least significant byte of start_pos to also be the least
2379    significant byte of the cookie, which means that in big-endian mode we
2380    must copy the fields in reverse order. */
2381 
2382 # define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
2383 # define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
2384 # define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
2385 # define OFF_CHARS_TO_SKIP  (sizeof(char))
2386 # define OFF_NEED_EOF       0
2387 
2388 #else
2389 /* Little-endian mode: the least significant byte of start_pos will
2390    naturally end up the least significant byte of the cookie. */
2391 
2392 # define OFF_START_POS      0
2393 # define OFF_DEC_FLAGS      (sizeof(Py_off_t))
2394 # define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
2395 # define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
2396 # define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
2397 
2398 #endif
2399 
2400 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)2401 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2402 {
2403     unsigned char buffer[COOKIE_BUF_LEN];
2404     PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2405     if (cookieLong == NULL)
2406         return -1;
2407 
2408     if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2409                             PY_LITTLE_ENDIAN, 0, 1) < 0) {
2410         Py_DECREF(cookieLong);
2411         return -1;
2412     }
2413     Py_DECREF(cookieLong);
2414 
2415     memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2416     memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2417     memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2418     memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2419     memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2420 
2421     return 0;
2422 }
2423 
2424 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)2425 textiowrapper_build_cookie(cookie_type *cookie)
2426 {
2427     unsigned char buffer[COOKIE_BUF_LEN];
2428 
2429     memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2430     memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2431     memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2432     memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2433     memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2434 
2435     return _PyLong_FromByteArray(buffer, sizeof(buffer),
2436                                  PY_LITTLE_ENDIAN, 0);
2437 }
2438 
2439 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)2440 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2441 {
2442     PyObject *res;
2443     /* When seeking to the start of the stream, we call decoder.reset()
2444        rather than decoder.getstate().
2445        This is for a few decoders such as utf-16 for which the state value
2446        at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2447        utf-16, that we are expecting a BOM).
2448     */
2449     if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2450         res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2451     }
2452     else {
2453         res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
2454                                    "((yi))", "", cookie->dec_flags);
2455     }
2456     if (res == NULL) {
2457         return -1;
2458     }
2459     Py_DECREF(res);
2460     return 0;
2461 }
2462 
2463 static int
_textiowrapper_encoder_reset(textio * self,int start_of_stream)2464 _textiowrapper_encoder_reset(textio *self, int start_of_stream)
2465 {
2466     PyObject *res;
2467     if (start_of_stream) {
2468         res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset));
2469         self->encoding_start_of_stream = 1;
2470     }
2471     else {
2472         res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate),
2473                                         _PyLong_GetZero());
2474         self->encoding_start_of_stream = 0;
2475     }
2476     if (res == NULL)
2477         return -1;
2478     Py_DECREF(res);
2479     return 0;
2480 }
2481 
2482 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)2483 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2484 {
2485     /* Same as _textiowrapper_decoder_setstate() above. */
2486     return _textiowrapper_encoder_reset(
2487         self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2488 }
2489 
2490 /*[clinic input]
2491 @critical_section
2492 _io.TextIOWrapper.seek
2493     cookie as cookieObj: object
2494       Zero or an opaque number returned by tell().
2495     whence: int(c_default='0') = os.SEEK_SET
2496       The relative position to seek from.
2497     /
2498 
2499 Set the stream position, and return the new stream position.
2500 
2501 Four operations are supported, given by the following argument
2502 combinations:
2503 
2504 - seek(0, SEEK_SET): Rewind to the start of the stream.
2505 - seek(cookie, SEEK_SET): Restore a previous position;
2506   'cookie' must be a number returned by tell().
2507 - seek(0, SEEK_END): Fast-forward to the end of the stream.
2508 - seek(0, SEEK_CUR): Leave the current stream position unchanged.
2509 
2510 Any other argument combinations are invalid,
2511 and may raise exceptions.
2512 [clinic start generated code]*/
2513 
2514 static PyObject *
_io_TextIOWrapper_seek_impl(textio * self,PyObject * cookieObj,int whence)2515 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2516 /*[clinic end generated code: output=0a15679764e2d04d input=4bea78698be23d7e]*/
2517 {
2518     PyObject *posobj;
2519     cookie_type cookie;
2520     PyObject *res;
2521     int cmp;
2522     PyObject *snapshot;
2523 
2524     CHECK_ATTACHED(self);
2525     CHECK_CLOSED(self);
2526 
2527     Py_INCREF(cookieObj);
2528 
2529     if (!self->seekable) {
2530         _unsupported(self->state, "underlying stream is not seekable");
2531         goto fail;
2532     }
2533 
2534     PyObject *zero = _PyLong_GetZero();  // borrowed reference
2535 
2536     switch (whence) {
2537     case SEEK_CUR:
2538         /* seek relative to current position */
2539         cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2540         if (cmp < 0)
2541             goto fail;
2542 
2543         if (cmp == 0) {
2544             _unsupported(self->state, "can't do nonzero cur-relative seeks");
2545             goto fail;
2546         }
2547 
2548         /* Seeking to the current position should attempt to
2549          * sync the underlying buffer with the current position.
2550          */
2551         Py_DECREF(cookieObj);
2552         cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell));
2553         if (cookieObj == NULL)
2554             goto fail;
2555         break;
2556 
2557     case SEEK_END:
2558         /* seek relative to end of file */
2559         cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2560         if (cmp < 0)
2561             goto fail;
2562 
2563         if (cmp == 0) {
2564             _unsupported(self->state, "can't do nonzero end-relative seeks");
2565             goto fail;
2566         }
2567 
2568         if (_PyFile_Flush((PyObject *)self) < 0) {
2569             goto fail;
2570         }
2571 
2572         textiowrapper_set_decoded_chars(self, NULL);
2573         Py_CLEAR(self->snapshot);
2574         if (self->decoder) {
2575             res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2576             if (res == NULL)
2577                 goto fail;
2578             Py_DECREF(res);
2579         }
2580 
2581         res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
2582         Py_CLEAR(cookieObj);
2583         if (res == NULL)
2584             goto fail;
2585         if (self->encoder) {
2586             /* If seek() == 0, we are at the start of stream, otherwise not */
2587             cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
2588             if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2589                 Py_DECREF(res);
2590                 goto fail;
2591             }
2592         }
2593         return res;
2594 
2595     case SEEK_SET:
2596         break;
2597 
2598     default:
2599         PyErr_Format(PyExc_ValueError,
2600                      "invalid whence (%d, should be %d, %d or %d)", whence,
2601                      SEEK_SET, SEEK_CUR, SEEK_END);
2602         goto fail;
2603     }
2604 
2605     cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
2606     if (cmp < 0)
2607         goto fail;
2608 
2609     if (cmp == 1) {
2610         PyErr_Format(PyExc_ValueError,
2611                      "negative seek position %R", cookieObj);
2612         goto fail;
2613     }
2614 
2615     if (_PyFile_Flush((PyObject *)self) < 0) {
2616         goto fail;
2617     }
2618 
2619     /* The strategy of seek() is to go back to the safe start point
2620      * and replay the effect of read(chars_to_skip) from there.
2621      */
2622     if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2623         goto fail;
2624 
2625     /* Seek back to the safe start point. */
2626     posobj = PyLong_FromOff_t(cookie.start_pos);
2627     if (posobj == NULL)
2628         goto fail;
2629     res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
2630     Py_DECREF(posobj);
2631     if (res == NULL)
2632         goto fail;
2633     Py_DECREF(res);
2634 
2635     textiowrapper_set_decoded_chars(self, NULL);
2636     Py_CLEAR(self->snapshot);
2637 
2638     /* Restore the decoder to its state from the safe start point. */
2639     if (self->decoder) {
2640         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2641             goto fail;
2642     }
2643 
2644     if (cookie.chars_to_skip) {
2645         /* Just like _read_chunk, feed the decoder and save a snapshot. */
2646         PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
2647                                                      "i", cookie.bytes_to_feed);
2648         PyObject *decoded;
2649 
2650         if (input_chunk == NULL)
2651             goto fail;
2652 
2653         if (!PyBytes_Check(input_chunk)) {
2654             PyErr_Format(PyExc_TypeError,
2655                          "underlying read() should have returned a bytes "
2656                          "object, not '%.200s'",
2657                          Py_TYPE(input_chunk)->tp_name);
2658             Py_DECREF(input_chunk);
2659             goto fail;
2660         }
2661 
2662         snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2663         if (snapshot == NULL) {
2664             goto fail;
2665         }
2666         Py_XSETREF(self->snapshot, snapshot);
2667 
2668         decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode),
2669             input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2670 
2671         if (check_decoded(decoded) < 0)
2672             goto fail;
2673 
2674         textiowrapper_set_decoded_chars(self, decoded);
2675 
2676         /* Skip chars_to_skip of the decoded characters. */
2677         if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2678             PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2679             goto fail;
2680         }
2681         self->decoded_chars_used = cookie.chars_to_skip;
2682     }
2683     else {
2684         snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2685         if (snapshot == NULL)
2686             goto fail;
2687         Py_XSETREF(self->snapshot, snapshot);
2688     }
2689 
2690     /* Finally, reset the encoder (merely useful for proper BOM handling) */
2691     if (self->encoder) {
2692         if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2693             goto fail;
2694     }
2695     return cookieObj;
2696   fail:
2697     Py_XDECREF(cookieObj);
2698     return NULL;
2699 
2700 }
2701 
2702 /*[clinic input]
2703 @critical_section
2704 _io.TextIOWrapper.tell
2705 
2706 Return the stream position as an opaque number.
2707 
2708 The return value of tell() can be given as input to seek(), to restore a
2709 previous stream position.
2710 [clinic start generated code]*/
2711 
2712 static PyObject *
_io_TextIOWrapper_tell_impl(textio * self)2713 _io_TextIOWrapper_tell_impl(textio *self)
2714 /*[clinic end generated code: output=4f168c08bf34ad5f input=415d6b4e4f8e6e8c]*/
2715 {
2716     PyObject *res;
2717     PyObject *posobj = NULL;
2718     cookie_type cookie = {0,0,0,0,0};
2719     PyObject *next_input;
2720     Py_ssize_t chars_to_skip, chars_decoded;
2721     Py_ssize_t skip_bytes, skip_back;
2722     PyObject *saved_state = NULL;
2723     const char *input, *input_end;
2724     Py_ssize_t dec_buffer_len;
2725     int dec_flags;
2726 
2727     CHECK_ATTACHED(self);
2728     CHECK_CLOSED(self);
2729 
2730     if (!self->seekable) {
2731         _unsupported(self->state, "underlying stream is not seekable");
2732         goto fail;
2733     }
2734     if (!self->telling) {
2735         PyErr_SetString(PyExc_OSError,
2736                         "telling position disabled by next() call");
2737         goto fail;
2738     }
2739 
2740     if (_textiowrapper_writeflush(self) < 0)
2741         return NULL;
2742     if (_PyFile_Flush((PyObject *)self) < 0) {
2743         goto fail;
2744     }
2745 
2746     posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
2747     if (posobj == NULL)
2748         goto fail;
2749 
2750     if (self->decoder == NULL || self->snapshot == NULL) {
2751         assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2752         return posobj;
2753     }
2754 
2755 #if defined(HAVE_LARGEFILE_SUPPORT)
2756     cookie.start_pos = PyLong_AsLongLong(posobj);
2757 #else
2758     cookie.start_pos = PyLong_AsLong(posobj);
2759 #endif
2760     Py_DECREF(posobj);
2761     if (PyErr_Occurred())
2762         goto fail;
2763 
2764     /* Skip backward to the snapshot point (see _read_chunk). */
2765     assert(PyTuple_Check(self->snapshot));
2766     if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2767         goto fail;
2768 
2769     assert (PyBytes_Check(next_input));
2770 
2771     cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2772 
2773     /* How many decoded characters have been used up since the snapshot? */
2774     if (self->decoded_chars_used == 0)  {
2775         /* We haven't moved from the snapshot point. */
2776         return textiowrapper_build_cookie(&cookie);
2777     }
2778 
2779     chars_to_skip = self->decoded_chars_used;
2780 
2781     /* Decoder state will be restored at the end */
2782     saved_state = PyObject_CallMethodNoArgs(self->decoder,
2783                                              &_Py_ID(getstate));
2784     if (saved_state == NULL)
2785         goto fail;
2786 
2787 #define DECODER_GETSTATE() do { \
2788         PyObject *dec_buffer; \
2789         PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2790             &_Py_ID(getstate)); \
2791         if (_state == NULL) \
2792             goto fail; \
2793         if (!PyTuple_Check(_state)) { \
2794             PyErr_SetString(PyExc_TypeError, \
2795                             "illegal decoder state"); \
2796             Py_DECREF(_state); \
2797             goto fail; \
2798         } \
2799         if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2800                               &dec_buffer, &dec_flags)) \
2801         { \
2802             Py_DECREF(_state); \
2803             goto fail; \
2804         } \
2805         if (!PyBytes_Check(dec_buffer)) { \
2806             PyErr_Format(PyExc_TypeError, \
2807                          "illegal decoder state: the first item should be a " \
2808                          "bytes object, not '%.200s'", \
2809                          Py_TYPE(dec_buffer)->tp_name); \
2810             Py_DECREF(_state); \
2811             goto fail; \
2812         } \
2813         dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2814         Py_DECREF(_state); \
2815     } while (0)
2816 
2817 #define DECODER_DECODE(start, len, res) do { \
2818         PyObject *_decoded = _PyObject_CallMethod( \
2819             self->decoder, &_Py_ID(decode), "y#", start, len); \
2820         if (check_decoded(_decoded) < 0) \
2821             goto fail; \
2822         res = PyUnicode_GET_LENGTH(_decoded); \
2823         Py_DECREF(_decoded); \
2824     } while (0)
2825 
2826     /* Fast search for an acceptable start point, close to our
2827        current pos */
2828     skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2829     skip_back = 1;
2830     assert(skip_back <= PyBytes_GET_SIZE(next_input));
2831     input = PyBytes_AS_STRING(next_input);
2832     while (skip_bytes > 0) {
2833         /* Decode up to temptative start point */
2834         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2835             goto fail;
2836         DECODER_DECODE(input, skip_bytes, chars_decoded);
2837         if (chars_decoded <= chars_to_skip) {
2838             DECODER_GETSTATE();
2839             if (dec_buffer_len == 0) {
2840                 /* Before pos and no bytes buffered in decoder => OK */
2841                 cookie.dec_flags = dec_flags;
2842                 chars_to_skip -= chars_decoded;
2843                 break;
2844             }
2845             /* Skip back by buffered amount and reset heuristic */
2846             skip_bytes -= dec_buffer_len;
2847             skip_back = 1;
2848         }
2849         else {
2850             /* We're too far ahead, skip back a bit */
2851             skip_bytes -= skip_back;
2852             skip_back *= 2;
2853         }
2854     }
2855     if (skip_bytes <= 0) {
2856         skip_bytes = 0;
2857         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2858             goto fail;
2859     }
2860 
2861     /* Note our initial start point. */
2862     cookie.start_pos += skip_bytes;
2863     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2864     if (chars_to_skip == 0)
2865         goto finally;
2866 
2867     /* We should be close to the desired position.  Now feed the decoder one
2868      * byte at a time until we reach the `chars_to_skip` target.
2869      * As we go, note the nearest "safe start point" before the current
2870      * location (a point where the decoder has nothing buffered, so seek()
2871      * can safely start from there and advance to this location).
2872      */
2873     chars_decoded = 0;
2874     input = PyBytes_AS_STRING(next_input);
2875     input_end = input + PyBytes_GET_SIZE(next_input);
2876     input += skip_bytes;
2877     while (input < input_end) {
2878         Py_ssize_t n;
2879 
2880         DECODER_DECODE(input, (Py_ssize_t)1, n);
2881         /* We got n chars for 1 byte */
2882         chars_decoded += n;
2883         cookie.bytes_to_feed += 1;
2884         DECODER_GETSTATE();
2885 
2886         if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2887             /* Decoder buffer is empty, so this is a safe start point. */
2888             cookie.start_pos += cookie.bytes_to_feed;
2889             chars_to_skip -= chars_decoded;
2890             cookie.dec_flags = dec_flags;
2891             cookie.bytes_to_feed = 0;
2892             chars_decoded = 0;
2893         }
2894         if (chars_decoded >= chars_to_skip)
2895             break;
2896         input++;
2897     }
2898     if (input == input_end) {
2899         /* We didn't get enough decoded data; signal EOF to get more. */
2900         PyObject *decoded = _PyObject_CallMethod(
2901             self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True);
2902         if (check_decoded(decoded) < 0)
2903             goto fail;
2904         chars_decoded += PyUnicode_GET_LENGTH(decoded);
2905         Py_DECREF(decoded);
2906         cookie.need_eof = 1;
2907 
2908         if (chars_decoded < chars_to_skip) {
2909             PyErr_SetString(PyExc_OSError,
2910                             "can't reconstruct logical file position");
2911             goto fail;
2912         }
2913     }
2914 
2915 finally:
2916     res = PyObject_CallMethodOneArg(
2917             self->decoder, &_Py_ID(setstate), saved_state);
2918     Py_DECREF(saved_state);
2919     if (res == NULL)
2920         return NULL;
2921     Py_DECREF(res);
2922 
2923     /* The returned cookie corresponds to the last safe start point. */
2924     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2925     return textiowrapper_build_cookie(&cookie);
2926 
2927 fail:
2928     if (saved_state) {
2929         PyObject *exc = PyErr_GetRaisedException();
2930         res = PyObject_CallMethodOneArg(
2931                 self->decoder, &_Py_ID(setstate), saved_state);
2932         _PyErr_ChainExceptions1(exc);
2933         Py_DECREF(saved_state);
2934         Py_XDECREF(res);
2935     }
2936     return NULL;
2937 }
2938 
2939 /*[clinic input]
2940 @critical_section
2941 _io.TextIOWrapper.truncate
2942     pos: object = None
2943     /
2944 [clinic start generated code]*/
2945 
2946 static PyObject *
_io_TextIOWrapper_truncate_impl(textio * self,PyObject * pos)2947 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2948 /*[clinic end generated code: output=90ec2afb9bb7745f input=8bddb320834c93ee]*/
2949 {
2950     CHECK_ATTACHED(self)
2951 
2952     if (_PyFile_Flush((PyObject *)self) < 0) {
2953         return NULL;
2954     }
2955 
2956     return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
2957 }
2958 
2959 static PyObject *
textiowrapper_repr(textio * self)2960 textiowrapper_repr(textio *self)
2961 {
2962     PyObject *nameobj, *modeobj, *res, *s;
2963     int status;
2964     const char *type_name = Py_TYPE(self)->tp_name;
2965 
2966     CHECK_INITIALIZED(self);
2967 
2968     res = PyUnicode_FromFormat("<%.100s", type_name);
2969     if (res == NULL)
2970         return NULL;
2971 
2972     status = Py_ReprEnter((PyObject *)self);
2973     if (status != 0) {
2974         if (status > 0) {
2975             PyErr_Format(PyExc_RuntimeError,
2976                          "reentrant call inside %.100s.__repr__",
2977                          type_name);
2978         }
2979         goto error;
2980     }
2981     if (PyObject_GetOptionalAttr((PyObject *) self, &_Py_ID(name), &nameobj) < 0) {
2982         if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
2983             goto error;
2984         }
2985         /* Ignore ValueError raised if the underlying stream was detached */
2986         PyErr_Clear();
2987     }
2988     if (nameobj != NULL) {
2989         s = PyUnicode_FromFormat(" name=%R", nameobj);
2990         Py_DECREF(nameobj);
2991         if (s == NULL)
2992             goto error;
2993         PyUnicode_AppendAndDel(&res, s);
2994         if (res == NULL)
2995             goto error;
2996     }
2997     if (PyObject_GetOptionalAttr((PyObject *) self, &_Py_ID(mode), &modeobj) < 0) {
2998         goto error;
2999     }
3000     if (modeobj != NULL) {
3001         s = PyUnicode_FromFormat(" mode=%R", modeobj);
3002         Py_DECREF(modeobj);
3003         if (s == NULL)
3004             goto error;
3005         PyUnicode_AppendAndDel(&res, s);
3006         if (res == NULL)
3007             goto error;
3008     }
3009     s = PyUnicode_FromFormat("%U encoding=%R>",
3010                              res, self->encoding);
3011     Py_DECREF(res);
3012     if (status == 0) {
3013         Py_ReprLeave((PyObject *)self);
3014     }
3015     return s;
3016 
3017   error:
3018     Py_XDECREF(res);
3019     if (status == 0) {
3020         Py_ReprLeave((PyObject *)self);
3021     }
3022     return NULL;
3023 }
3024 
3025 
3026 /* Inquiries */
3027 
3028 /*[clinic input]
3029 @critical_section
3030 _io.TextIOWrapper.fileno
3031 [clinic start generated code]*/
3032 
3033 static PyObject *
_io_TextIOWrapper_fileno_impl(textio * self)3034 _io_TextIOWrapper_fileno_impl(textio *self)
3035 /*[clinic end generated code: output=21490a4c3da13e6c input=515e1196aceb97ab]*/
3036 {
3037     CHECK_ATTACHED(self);
3038     return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
3039 }
3040 
3041 /*[clinic input]
3042 @critical_section
3043 _io.TextIOWrapper.seekable
3044 [clinic start generated code]*/
3045 
3046 static PyObject *
_io_TextIOWrapper_seekable_impl(textio * self)3047 _io_TextIOWrapper_seekable_impl(textio *self)
3048 /*[clinic end generated code: output=ab223dbbcffc0f00 input=71c4c092736c549b]*/
3049 {
3050     CHECK_ATTACHED(self);
3051     return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
3052 }
3053 
3054 /*[clinic input]
3055 @critical_section
3056 _io.TextIOWrapper.readable
3057 [clinic start generated code]*/
3058 
3059 static PyObject *
_io_TextIOWrapper_readable_impl(textio * self)3060 _io_TextIOWrapper_readable_impl(textio *self)
3061 /*[clinic end generated code: output=72ff7ba289a8a91b input=80438d1f01b0a89b]*/
3062 {
3063     CHECK_ATTACHED(self);
3064     return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
3065 }
3066 
3067 /*[clinic input]
3068 @critical_section
3069 _io.TextIOWrapper.writable
3070 [clinic start generated code]*/
3071 
3072 static PyObject *
_io_TextIOWrapper_writable_impl(textio * self)3073 _io_TextIOWrapper_writable_impl(textio *self)
3074 /*[clinic end generated code: output=a728c71790d03200 input=9d6c22befb0c340a]*/
3075 {
3076     CHECK_ATTACHED(self);
3077     return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
3078 }
3079 
3080 /*[clinic input]
3081 @critical_section
3082 _io.TextIOWrapper.isatty
3083 [clinic start generated code]*/
3084 
3085 static PyObject *
_io_TextIOWrapper_isatty_impl(textio * self)3086 _io_TextIOWrapper_isatty_impl(textio *self)
3087 /*[clinic end generated code: output=12be1a35bace882e input=7f83ff04d4d1733d]*/
3088 {
3089     CHECK_ATTACHED(self);
3090     return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
3091 }
3092 
3093 /*[clinic input]
3094 @critical_section
3095 _io.TextIOWrapper.flush
3096 [clinic start generated code]*/
3097 
3098 static PyObject *
_io_TextIOWrapper_flush_impl(textio * self)3099 _io_TextIOWrapper_flush_impl(textio *self)
3100 /*[clinic end generated code: output=59de9165f9c2e4d2 input=3ac3bf521bfed59d]*/
3101 {
3102     CHECK_ATTACHED(self);
3103     CHECK_CLOSED(self);
3104     self->telling = self->seekable;
3105     if (_textiowrapper_writeflush(self) < 0)
3106         return NULL;
3107     return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
3108 }
3109 
3110 /*[clinic input]
3111 @critical_section
3112 _io.TextIOWrapper.close
3113 [clinic start generated code]*/
3114 
3115 static PyObject *
_io_TextIOWrapper_close_impl(textio * self)3116 _io_TextIOWrapper_close_impl(textio *self)
3117 /*[clinic end generated code: output=056ccf8b4876e4f4 input=8e12d7079d5ac5c1]*/
3118 {
3119     PyObject *res;
3120     int r;
3121     CHECK_ATTACHED(self);
3122 
3123     res = _io_TextIOWrapper_closed_get_impl(self);
3124     if (res == NULL)
3125         return NULL;
3126     r = PyObject_IsTrue(res);
3127     Py_DECREF(res);
3128     if (r < 0)
3129         return NULL;
3130 
3131     if (r > 0) {
3132         Py_RETURN_NONE; /* stream already closed */
3133     }
3134     else {
3135         PyObject *exc = NULL;
3136         if (self->finalizing) {
3137             res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
3138                                             (PyObject *)self);
3139             if (res) {
3140                 Py_DECREF(res);
3141             }
3142             else {
3143                 PyErr_Clear();
3144             }
3145         }
3146         if (_PyFile_Flush((PyObject *)self) < 0) {
3147             exc = PyErr_GetRaisedException();
3148         }
3149 
3150         res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
3151         if (exc != NULL) {
3152             _PyErr_ChainExceptions1(exc);
3153             Py_CLEAR(res);
3154         }
3155         return res;
3156     }
3157 }
3158 
3159 static PyObject *
textiowrapper_iternext(textio * self)3160 textiowrapper_iternext(textio *self)
3161 {
3162     PyObject *line;
3163 
3164     CHECK_ATTACHED(self);
3165 
3166     self->telling = 0;
3167     if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) {
3168         /* Skip method call overhead for speed */
3169         line = _textiowrapper_readline(self, -1);
3170     }
3171     else {
3172         line = PyObject_CallMethodNoArgs((PyObject *)self,
3173                                           &_Py_ID(readline));
3174         if (line && !PyUnicode_Check(line)) {
3175             PyErr_Format(PyExc_OSError,
3176                          "readline() should have returned a str object, "
3177                          "not '%.200s'", Py_TYPE(line)->tp_name);
3178             Py_DECREF(line);
3179             return NULL;
3180         }
3181     }
3182 
3183     if (line == NULL)
3184         return NULL;
3185 
3186     if (PyUnicode_GET_LENGTH(line) == 0) {
3187         /* Reached EOF or would have blocked */
3188         Py_DECREF(line);
3189         Py_CLEAR(self->snapshot);
3190         self->telling = self->seekable;
3191         return NULL;
3192     }
3193 
3194     return line;
3195 }
3196 
3197 /*[clinic input]
3198 @critical_section
3199 @getter
3200 _io.TextIOWrapper.name
3201 [clinic start generated code]*/
3202 
3203 static PyObject *
_io_TextIOWrapper_name_get_impl(textio * self)3204 _io_TextIOWrapper_name_get_impl(textio *self)
3205 /*[clinic end generated code: output=8c2f1d6d8756af40 input=26ecec9b39e30e07]*/
3206 {
3207     CHECK_ATTACHED(self);
3208     return PyObject_GetAttr(self->buffer, &_Py_ID(name));
3209 }
3210 
3211 /*[clinic input]
3212 @critical_section
3213 @getter
3214 _io.TextIOWrapper.closed
3215 [clinic start generated code]*/
3216 
3217 static PyObject *
_io_TextIOWrapper_closed_get_impl(textio * self)3218 _io_TextIOWrapper_closed_get_impl(textio *self)
3219 /*[clinic end generated code: output=b49b68f443a85e3c input=7dfcf43f63c7003d]*/
3220 {
3221     CHECK_ATTACHED(self);
3222     return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
3223 }
3224 
3225 /*[clinic input]
3226 @critical_section
3227 @getter
3228 _io.TextIOWrapper.newlines
3229 [clinic start generated code]*/
3230 
3231 static PyObject *
_io_TextIOWrapper_newlines_get_impl(textio * self)3232 _io_TextIOWrapper_newlines_get_impl(textio *self)
3233 /*[clinic end generated code: output=53aa03ac35573180 input=610df647e514b3e8]*/
3234 {
3235     PyObject *res;
3236     CHECK_ATTACHED(self);
3237     if (self->decoder == NULL ||
3238         PyObject_GetOptionalAttr(self->decoder, &_Py_ID(newlines), &res) == 0)
3239     {
3240         Py_RETURN_NONE;
3241     }
3242     return res;
3243 }
3244 
3245 /*[clinic input]
3246 @critical_section
3247 @getter
3248 _io.TextIOWrapper.errors
3249 [clinic start generated code]*/
3250 
3251 static PyObject *
_io_TextIOWrapper_errors_get_impl(textio * self)3252 _io_TextIOWrapper_errors_get_impl(textio *self)
3253 /*[clinic end generated code: output=dca3a3ef21b09484 input=b45f983e6d43c4d8]*/
3254 {
3255     CHECK_INITIALIZED(self);
3256     return Py_NewRef(self->errors);
3257 }
3258 
3259 /*[clinic input]
3260 @critical_section
3261 @getter
3262 _io.TextIOWrapper._CHUNK_SIZE
3263 [clinic start generated code]*/
3264 
3265 static PyObject *
_io_TextIOWrapper__CHUNK_SIZE_get_impl(textio * self)3266 _io_TextIOWrapper__CHUNK_SIZE_get_impl(textio *self)
3267 /*[clinic end generated code: output=039925cd2df375bc input=e9715b0e06ff0fa6]*/
3268 {
3269     CHECK_ATTACHED(self);
3270     return PyLong_FromSsize_t(self->chunk_size);
3271 }
3272 
3273 /*[clinic input]
3274 @critical_section
3275 @setter
3276 _io.TextIOWrapper._CHUNK_SIZE
3277 [clinic start generated code]*/
3278 
3279 static int
_io_TextIOWrapper__CHUNK_SIZE_set_impl(textio * self,PyObject * value)3280 _io_TextIOWrapper__CHUNK_SIZE_set_impl(textio *self, PyObject *value)
3281 /*[clinic end generated code: output=edb86d2db660a5ab input=32fc99861db02a0a]*/
3282 {
3283     Py_ssize_t n;
3284     CHECK_ATTACHED_INT(self);
3285     if (value == NULL) {
3286         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3287         return -1;
3288     }
3289     n = PyNumber_AsSsize_t(value, PyExc_ValueError);
3290     if (n == -1 && PyErr_Occurred())
3291         return -1;
3292     if (n <= 0) {
3293         PyErr_SetString(PyExc_ValueError,
3294                         "a strictly positive integer is required");
3295         return -1;
3296     }
3297     self->chunk_size = n;
3298     return 0;
3299 }
3300 
3301 static PyMethodDef incrementalnewlinedecoder_methods[] = {
3302     _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3303     _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3304     _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3305     _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3306     {NULL}
3307 };
3308 
3309 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3310     {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3311     {NULL}
3312 };
3313 
3314 static PyType_Slot nldecoder_slots[] = {
3315     {Py_tp_dealloc, incrementalnewlinedecoder_dealloc},
3316     {Py_tp_doc, (void *)_io_IncrementalNewlineDecoder___init____doc__},
3317     {Py_tp_methods, incrementalnewlinedecoder_methods},
3318     {Py_tp_getset, incrementalnewlinedecoder_getset},
3319     {Py_tp_traverse, incrementalnewlinedecoder_traverse},
3320     {Py_tp_clear, incrementalnewlinedecoder_clear},
3321     {Py_tp_init, _io_IncrementalNewlineDecoder___init__},
3322     {0, NULL},
3323 };
3324 
3325 PyType_Spec nldecoder_spec = {
3326     .name = "_io.IncrementalNewlineDecoder",
3327     .basicsize = sizeof(nldecoder_object),
3328     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3329               Py_TPFLAGS_IMMUTABLETYPE),
3330     .slots = nldecoder_slots,
3331 };
3332 
3333 
3334 static PyMethodDef textiowrapper_methods[] = {
3335     _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3336     _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3337     _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3338     _IO_TEXTIOWRAPPER_READ_METHODDEF
3339     _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3340     _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3341     _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3342 
3343     _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3344     _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3345     _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3346     _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3347     _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3348 
3349     _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3350     _IO_TEXTIOWRAPPER_TELL_METHODDEF
3351     _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3352 
3353     {"__reduce__", _PyIOBase_cannot_pickle, METH_NOARGS},
3354     {"__reduce_ex__", _PyIOBase_cannot_pickle, METH_O},
3355     {NULL, NULL}
3356 };
3357 
3358 static PyMemberDef textiowrapper_members[] = {
3359     {"encoding", _Py_T_OBJECT, offsetof(textio, encoding), Py_READONLY},
3360     {"buffer", _Py_T_OBJECT, offsetof(textio, buffer), Py_READONLY},
3361     {"line_buffering", Py_T_BOOL, offsetof(textio, line_buffering), Py_READONLY},
3362     {"write_through", Py_T_BOOL, offsetof(textio, write_through), Py_READONLY},
3363     {"_finalizing", Py_T_BOOL, offsetof(textio, finalizing), 0},
3364     {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(textio, weakreflist), Py_READONLY},
3365     {"__dictoffset__", Py_T_PYSSIZET, offsetof(textio, dict), Py_READONLY},
3366     {NULL}
3367 };
3368 
3369 static PyGetSetDef textiowrapper_getset[] = {
3370     _IO_TEXTIOWRAPPER_NAME_GETSETDEF
3371     _IO_TEXTIOWRAPPER_CLOSED_GETSETDEF
3372 /*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3373 */
3374     _IO_TEXTIOWRAPPER_NEWLINES_GETSETDEF
3375     _IO_TEXTIOWRAPPER_ERRORS_GETSETDEF
3376     _IO_TEXTIOWRAPPER__CHUNK_SIZE_GETSETDEF
3377     {NULL}
3378 };
3379 
3380 PyType_Slot textiowrapper_slots[] = {
3381     {Py_tp_dealloc, textiowrapper_dealloc},
3382     {Py_tp_repr, textiowrapper_repr},
3383     {Py_tp_doc, (void *)_io_TextIOWrapper___init____doc__},
3384     {Py_tp_traverse, textiowrapper_traverse},
3385     {Py_tp_clear, textiowrapper_clear},
3386     {Py_tp_iternext, textiowrapper_iternext},
3387     {Py_tp_methods, textiowrapper_methods},
3388     {Py_tp_members, textiowrapper_members},
3389     {Py_tp_getset, textiowrapper_getset},
3390     {Py_tp_init, _io_TextIOWrapper___init__},
3391     {0, NULL},
3392 };
3393 
3394 PyType_Spec textiowrapper_spec = {
3395     .name = "_io.TextIOWrapper",
3396     .basicsize = sizeof(textio),
3397     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3398               Py_TPFLAGS_IMMUTABLETYPE),
3399     .slots = textiowrapper_slots,
3400 };
3401