• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2     An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3 
4     Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5 
6     Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8 
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "pycore_interp.h"        // PyInterpreterState.fs_codec
12 #include "pycore_long.h"          // _PyLong_GetZero()
13 #include "pycore_fileutils.h"     // _Py_GetLocaleEncoding()
14 #include "pycore_object.h"
15 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
16 #include "structmember.h"         // PyMemberDef
17 #include "_iomodule.h"
18 
19 /*[clinic input]
20 module _io
21 class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
22 class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
23 [clinic start generated code]*/
24 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
25 
26 _Py_IDENTIFIER(close);
27 _Py_IDENTIFIER(_dealloc_warn);
28 _Py_IDENTIFIER(decode);
29 _Py_IDENTIFIER(fileno);
30 _Py_IDENTIFIER(flush);
31 _Py_IDENTIFIER(isatty);
32 _Py_IDENTIFIER(mode);
33 _Py_IDENTIFIER(name);
34 _Py_IDENTIFIER(raw);
35 _Py_IDENTIFIER(read);
36 _Py_IDENTIFIER(readable);
37 _Py_IDENTIFIER(replace);
38 _Py_IDENTIFIER(reset);
39 _Py_IDENTIFIER(seek);
40 _Py_IDENTIFIER(seekable);
41 _Py_IDENTIFIER(setstate);
42 _Py_IDENTIFIER(strict);
43 _Py_IDENTIFIER(tell);
44 _Py_IDENTIFIER(writable);
45 
46 /* TextIOBase */
47 
48 PyDoc_STRVAR(textiobase_doc,
49     "Base class for text I/O.\n"
50     "\n"
51     "This class provides a character and line based interface to stream\n"
52     "I/O. There is no readinto method because Python's character strings\n"
53     "are immutable. There is no public constructor.\n"
54     );
55 
56 static PyObject *
_unsupported(const char * message)57 _unsupported(const char *message)
58 {
59     _PyIO_State *state = IO_STATE();
60     if (state != NULL)
61         PyErr_SetString(state->unsupported_operation, message);
62     return NULL;
63 }
64 
65 PyDoc_STRVAR(textiobase_detach_doc,
66     "Separate the underlying buffer from the TextIOBase and return it.\n"
67     "\n"
68     "After the underlying buffer has been detached, the TextIO is in an\n"
69     "unusable state.\n"
70     );
71 
72 static PyObject *
textiobase_detach(PyObject * self,PyObject * Py_UNUSED (ignored))73 textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
74 {
75     return _unsupported("detach");
76 }
77 
78 PyDoc_STRVAR(textiobase_read_doc,
79     "Read at most n characters from stream.\n"
80     "\n"
81     "Read from underlying buffer until we have n characters or we hit EOF.\n"
82     "If n is negative or omitted, read until EOF.\n"
83     );
84 
85 static PyObject *
textiobase_read(PyObject * self,PyObject * args)86 textiobase_read(PyObject *self, PyObject *args)
87 {
88     return _unsupported("read");
89 }
90 
91 PyDoc_STRVAR(textiobase_readline_doc,
92     "Read until newline or EOF.\n"
93     "\n"
94     "Returns an empty string if EOF is hit immediately.\n"
95     );
96 
97 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)98 textiobase_readline(PyObject *self, PyObject *args)
99 {
100     return _unsupported("readline");
101 }
102 
103 PyDoc_STRVAR(textiobase_write_doc,
104     "Write string to stream.\n"
105     "Returns the number of characters written (which is always equal to\n"
106     "the length of the string).\n"
107     );
108 
109 static PyObject *
textiobase_write(PyObject * self,PyObject * args)110 textiobase_write(PyObject *self, PyObject *args)
111 {
112     return _unsupported("write");
113 }
114 
115 PyDoc_STRVAR(textiobase_encoding_doc,
116     "Encoding of the text stream.\n"
117     "\n"
118     "Subclasses should override.\n"
119     );
120 
121 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)122 textiobase_encoding_get(PyObject *self, void *context)
123 {
124     Py_RETURN_NONE;
125 }
126 
127 PyDoc_STRVAR(textiobase_newlines_doc,
128     "Line endings translated so far.\n"
129     "\n"
130     "Only line endings translated during reading are considered.\n"
131     "\n"
132     "Subclasses should override.\n"
133     );
134 
135 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)136 textiobase_newlines_get(PyObject *self, void *context)
137 {
138     Py_RETURN_NONE;
139 }
140 
141 PyDoc_STRVAR(textiobase_errors_doc,
142     "The error setting of the decoder or encoder.\n"
143     "\n"
144     "Subclasses should override.\n"
145     );
146 
147 static PyObject *
textiobase_errors_get(PyObject * self,void * context)148 textiobase_errors_get(PyObject *self, void *context)
149 {
150     Py_RETURN_NONE;
151 }
152 
153 
154 static PyMethodDef textiobase_methods[] = {
155     {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
156     {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
157     {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
158     {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
159     {NULL, NULL}
160 };
161 
162 static PyGetSetDef textiobase_getset[] = {
163     {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
164     {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
165     {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
166     {NULL}
167 };
168 
169 PyTypeObject PyTextIOBase_Type = {
170     PyVarObject_HEAD_INIT(NULL, 0)
171     "_io._TextIOBase",          /*tp_name*/
172     0,                          /*tp_basicsize*/
173     0,                          /*tp_itemsize*/
174     0,                          /*tp_dealloc*/
175     0,                          /*tp_vectorcall_offset*/
176     0,                          /*tp_getattr*/
177     0,                          /*tp_setattr*/
178     0,                          /*tp_as_async*/
179     0,                          /*tp_repr*/
180     0,                          /*tp_as_number*/
181     0,                          /*tp_as_sequence*/
182     0,                          /*tp_as_mapping*/
183     0,                          /*tp_hash */
184     0,                          /*tp_call*/
185     0,                          /*tp_str*/
186     0,                          /*tp_getattro*/
187     0,                          /*tp_setattro*/
188     0,                          /*tp_as_buffer*/
189     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
190     textiobase_doc,             /* tp_doc */
191     0,                          /* tp_traverse */
192     0,                          /* tp_clear */
193     0,                          /* tp_richcompare */
194     0,                          /* tp_weaklistoffset */
195     0,                          /* tp_iter */
196     0,                          /* tp_iternext */
197     textiobase_methods,         /* tp_methods */
198     0,                          /* tp_members */
199     textiobase_getset,          /* tp_getset */
200     &PyIOBase_Type,             /* tp_base */
201     0,                          /* tp_dict */
202     0,                          /* tp_descr_get */
203     0,                          /* tp_descr_set */
204     0,                          /* tp_dictoffset */
205     0,                          /* tp_init */
206     0,                          /* tp_alloc */
207     0,                          /* tp_new */
208     0,                          /* tp_free */
209     0,                          /* tp_is_gc */
210     0,                          /* tp_bases */
211     0,                          /* tp_mro */
212     0,                          /* tp_cache */
213     0,                          /* tp_subclasses */
214     0,                          /* tp_weaklist */
215     0,                          /* tp_del */
216     0,                          /* tp_version_tag */
217     0,                          /* tp_finalize */
218 };
219 
220 
221 /* IncrementalNewlineDecoder */
222 
223 typedef struct {
224     PyObject_HEAD
225     PyObject *decoder;
226     PyObject *errors;
227     unsigned int pendingcr: 1;
228     unsigned int translate: 1;
229     unsigned int seennl: 3;
230 } nldecoder_object;
231 
232 /*[clinic input]
233 _io.IncrementalNewlineDecoder.__init__
234     decoder: object
235     translate: int
236     errors: object(c_default="NULL") = "strict"
237 
238 Codec used when reading a file in universal newlines mode.
239 
240 It wraps another incremental decoder, translating \r\n and \r into \n.
241 It also records the types of newlines encountered.  When used with
242 translate=False, it ensures that the newline sequence is returned in
243 one piece. When used with decoder=None, it expects unicode strings as
244 decode input and translates newlines without first invoking an external
245 decoder.
246 [clinic start generated code]*/
247 
248 static int
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object * self,PyObject * decoder,int translate,PyObject * errors)249 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
250                                             PyObject *decoder, int translate,
251                                             PyObject *errors)
252 /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
253 {
254     self->decoder = decoder;
255     Py_INCREF(decoder);
256 
257     if (errors == NULL) {
258         self->errors = _PyUnicode_FromId(&PyId_strict);
259         if (self->errors == NULL)
260             return -1;
261     }
262     else {
263         self->errors = errors;
264     }
265     Py_INCREF(self->errors);
266 
267     self->translate = translate ? 1 : 0;
268     self->seennl = 0;
269     self->pendingcr = 0;
270 
271     return 0;
272 }
273 
274 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)275 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
276 {
277     Py_CLEAR(self->decoder);
278     Py_CLEAR(self->errors);
279     Py_TYPE(self)->tp_free((PyObject *)self);
280 }
281 
282 static int
check_decoded(PyObject * decoded)283 check_decoded(PyObject *decoded)
284 {
285     if (decoded == NULL)
286         return -1;
287     if (!PyUnicode_Check(decoded)) {
288         PyErr_Format(PyExc_TypeError,
289                      "decoder should return a string result, not '%.200s'",
290                      Py_TYPE(decoded)->tp_name);
291         Py_DECREF(decoded);
292         return -1;
293     }
294     if (PyUnicode_READY(decoded) < 0) {
295         Py_DECREF(decoded);
296         return -1;
297     }
298     return 0;
299 }
300 
301 #define SEEN_CR   1
302 #define SEEN_LF   2
303 #define SEEN_CRLF 4
304 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
305 
306 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * myself,PyObject * input,int final)307 _PyIncrementalNewlineDecoder_decode(PyObject *myself,
308                                     PyObject *input, int final)
309 {
310     PyObject *output;
311     Py_ssize_t output_len;
312     nldecoder_object *self = (nldecoder_object *) myself;
313 
314     if (self->decoder == NULL) {
315         PyErr_SetString(PyExc_ValueError,
316                         "IncrementalNewlineDecoder.__init__ not called");
317         return NULL;
318     }
319 
320     /* decode input (with the eventual \r from a previous pass) */
321     if (self->decoder != Py_None) {
322         output = PyObject_CallMethodObjArgs(self->decoder,
323             _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
324     }
325     else {
326         output = input;
327         Py_INCREF(output);
328     }
329 
330     if (check_decoded(output) < 0)
331         return NULL;
332 
333     output_len = PyUnicode_GET_LENGTH(output);
334     if (self->pendingcr && (final || output_len > 0)) {
335         /* Prefix output with CR */
336         int kind;
337         PyObject *modified;
338         char *out;
339 
340         modified = PyUnicode_New(output_len + 1,
341                                  PyUnicode_MAX_CHAR_VALUE(output));
342         if (modified == NULL)
343             goto error;
344         kind = PyUnicode_KIND(modified);
345         out = PyUnicode_DATA(modified);
346         PyUnicode_WRITE(kind, out, 0, '\r');
347         memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
348         Py_DECREF(output);
349         output = modified; /* output remains ready */
350         self->pendingcr = 0;
351         output_len++;
352     }
353 
354     /* retain last \r even when not translating data:
355      * then readline() is sure to get \r\n in one pass
356      */
357     if (!final) {
358         if (output_len > 0
359             && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
360         {
361             PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
362             if (modified == NULL)
363                 goto error;
364             Py_DECREF(output);
365             output = modified;
366             self->pendingcr = 1;
367         }
368     }
369 
370     /* Record which newlines are read and do newline translation if desired,
371        all in one pass. */
372     {
373         const void *in_str;
374         Py_ssize_t len;
375         int seennl = self->seennl;
376         int only_lf = 0;
377         int kind;
378 
379         in_str = PyUnicode_DATA(output);
380         len = PyUnicode_GET_LENGTH(output);
381         kind = PyUnicode_KIND(output);
382 
383         if (len == 0)
384             return output;
385 
386         /* If, up to now, newlines are consistently \n, do a quick check
387            for the \r *byte* with the libc's optimized memchr.
388            */
389         if (seennl == SEEN_LF || seennl == 0) {
390             only_lf = (memchr(in_str, '\r', kind * len) == NULL);
391         }
392 
393         if (only_lf) {
394             /* If not already seen, quick scan for a possible "\n" character.
395                (there's nothing else to be done, even when in translation mode)
396             */
397             if (seennl == 0 &&
398                 memchr(in_str, '\n', kind * len) != NULL) {
399                 if (kind == PyUnicode_1BYTE_KIND)
400                     seennl |= SEEN_LF;
401                 else {
402                     Py_ssize_t i = 0;
403                     for (;;) {
404                         Py_UCS4 c;
405                         /* Fast loop for non-control characters */
406                         while (PyUnicode_READ(kind, in_str, i) > '\n')
407                             i++;
408                         c = PyUnicode_READ(kind, in_str, i++);
409                         if (c == '\n') {
410                             seennl |= SEEN_LF;
411                             break;
412                         }
413                         if (i >= len)
414                             break;
415                     }
416                 }
417             }
418             /* Finished: we have scanned for newlines, and none of them
419                need translating */
420         }
421         else if (!self->translate) {
422             Py_ssize_t i = 0;
423             /* We have already seen all newline types, no need to scan again */
424             if (seennl == SEEN_ALL)
425                 goto endscan;
426             for (;;) {
427                 Py_UCS4 c;
428                 /* Fast loop for non-control characters */
429                 while (PyUnicode_READ(kind, in_str, i) > '\r')
430                     i++;
431                 c = PyUnicode_READ(kind, in_str, i++);
432                 if (c == '\n')
433                     seennl |= SEEN_LF;
434                 else if (c == '\r') {
435                     if (PyUnicode_READ(kind, in_str, i) == '\n') {
436                         seennl |= SEEN_CRLF;
437                         i++;
438                     }
439                     else
440                         seennl |= SEEN_CR;
441                 }
442                 if (i >= len)
443                     break;
444                 if (seennl == SEEN_ALL)
445                     break;
446             }
447         endscan:
448             ;
449         }
450         else {
451             void *translated;
452             int kind = PyUnicode_KIND(output);
453             const void *in_str = PyUnicode_DATA(output);
454             Py_ssize_t in, out;
455             /* XXX: Previous in-place translation here is disabled as
456                resizing is not possible anymore */
457             /* We could try to optimize this so that we only do a copy
458                when there is something to translate. On the other hand,
459                we already know there is a \r byte, so chances are high
460                that something needs to be done. */
461             translated = PyMem_Malloc(kind * len);
462             if (translated == NULL) {
463                 PyErr_NoMemory();
464                 goto error;
465             }
466             in = out = 0;
467             for (;;) {
468                 Py_UCS4 c;
469                 /* Fast loop for non-control characters */
470                 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
471                     PyUnicode_WRITE(kind, translated, out++, c);
472                 if (c == '\n') {
473                     PyUnicode_WRITE(kind, translated, out++, c);
474                     seennl |= SEEN_LF;
475                     continue;
476                 }
477                 if (c == '\r') {
478                     if (PyUnicode_READ(kind, in_str, in) == '\n') {
479                         in++;
480                         seennl |= SEEN_CRLF;
481                     }
482                     else
483                         seennl |= SEEN_CR;
484                     PyUnicode_WRITE(kind, translated, out++, '\n');
485                     continue;
486                 }
487                 if (in > len)
488                     break;
489                 PyUnicode_WRITE(kind, translated, out++, c);
490             }
491             Py_DECREF(output);
492             output = PyUnicode_FromKindAndData(kind, translated, out);
493             PyMem_Free(translated);
494             if (!output)
495                 return NULL;
496         }
497         self->seennl |= seennl;
498     }
499 
500     return output;
501 
502   error:
503     Py_DECREF(output);
504     return NULL;
505 }
506 
507 /*[clinic input]
508 _io.IncrementalNewlineDecoder.decode
509     input: object
510     final: bool(accept={int}) = False
511 [clinic start generated code]*/
512 
513 static PyObject *
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object * self,PyObject * input,int final)514 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
515                                           PyObject *input, int final)
516 /*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
517 {
518     return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
519 }
520 
521 /*[clinic input]
522 _io.IncrementalNewlineDecoder.getstate
523 [clinic start generated code]*/
524 
525 static PyObject *
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object * self)526 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
527 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
528 {
529     PyObject *buffer;
530     unsigned long long flag;
531 
532     if (self->decoder != Py_None) {
533         PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
534            _PyIO_str_getstate);
535         if (state == NULL)
536             return NULL;
537         if (!PyTuple_Check(state)) {
538             PyErr_SetString(PyExc_TypeError,
539                             "illegal decoder state");
540             Py_DECREF(state);
541             return NULL;
542         }
543         if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
544                               &buffer, &flag))
545         {
546             Py_DECREF(state);
547             return NULL;
548         }
549         Py_INCREF(buffer);
550         Py_DECREF(state);
551     }
552     else {
553         buffer = PyBytes_FromString("");
554         flag = 0;
555     }
556     flag <<= 1;
557     if (self->pendingcr)
558         flag |= 1;
559     return Py_BuildValue("NK", buffer, flag);
560 }
561 
562 /*[clinic input]
563 _io.IncrementalNewlineDecoder.setstate
564     state: object
565     /
566 [clinic start generated code]*/
567 
568 static PyObject *
_io_IncrementalNewlineDecoder_setstate(nldecoder_object * self,PyObject * state)569 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
570                                        PyObject *state)
571 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
572 {
573     PyObject *buffer;
574     unsigned long long flag;
575 
576     if (!PyTuple_Check(state)) {
577         PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
578         return NULL;
579     }
580     if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
581                           &buffer, &flag))
582     {
583         return NULL;
584     }
585 
586     self->pendingcr = (int) (flag & 1);
587     flag >>= 1;
588 
589     if (self->decoder != Py_None)
590         return _PyObject_CallMethodId(self->decoder,
591                                       &PyId_setstate, "((OK))", buffer, flag);
592     else
593         Py_RETURN_NONE;
594 }
595 
596 /*[clinic input]
597 _io.IncrementalNewlineDecoder.reset
598 [clinic start generated code]*/
599 
600 static PyObject *
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object * self)601 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
602 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
603 {
604     self->seennl = 0;
605     self->pendingcr = 0;
606     if (self->decoder != Py_None)
607         return PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
608     else
609         Py_RETURN_NONE;
610 }
611 
612 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)613 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
614 {
615     switch (self->seennl) {
616     case SEEN_CR:
617         return PyUnicode_FromString("\r");
618     case SEEN_LF:
619         return PyUnicode_FromString("\n");
620     case SEEN_CRLF:
621         return PyUnicode_FromString("\r\n");
622     case SEEN_CR | SEEN_LF:
623         return Py_BuildValue("ss", "\r", "\n");
624     case SEEN_CR | SEEN_CRLF:
625         return Py_BuildValue("ss", "\r", "\r\n");
626     case SEEN_LF | SEEN_CRLF:
627         return Py_BuildValue("ss", "\n", "\r\n");
628     case SEEN_CR | SEEN_LF | SEEN_CRLF:
629         return Py_BuildValue("sss", "\r", "\n", "\r\n");
630     default:
631         Py_RETURN_NONE;
632    }
633 
634 }
635 
636 /* TextIOWrapper */
637 
638 typedef PyObject *
639         (*encodefunc_t)(PyObject *, PyObject *);
640 
641 typedef struct
642 {
643     PyObject_HEAD
644     int ok; /* initialized? */
645     int detached;
646     Py_ssize_t chunk_size;
647     PyObject *buffer;
648     PyObject *encoding;
649     PyObject *encoder;
650     PyObject *decoder;
651     PyObject *readnl;
652     PyObject *errors;
653     const char *writenl; /* ASCII-encoded; NULL stands for \n */
654     char line_buffering;
655     char write_through;
656     char readuniversal;
657     char readtranslate;
658     char writetranslate;
659     char seekable;
660     char has_read1;
661     char telling;
662     char finalizing;
663     /* Specialized encoding func (see below) */
664     encodefunc_t encodefunc;
665     /* Whether or not it's the start of the stream */
666     char encoding_start_of_stream;
667 
668     /* Reads and writes are internally buffered in order to speed things up.
669        However, any read will first flush the write buffer if itsn't empty.
670 
671        Please also note that text to be written is first encoded before being
672        buffered. This is necessary so that encoding errors are immediately
673        reported to the caller, but it unfortunately means that the
674        IncrementalEncoder (whose encode() method is always written in Python)
675        becomes a bottleneck for small writes.
676     */
677     PyObject *decoded_chars;       /* buffer for text returned from decoder */
678     Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
679     PyObject *pending_bytes;       // data waiting to be written.
680                                    // ascii unicode, bytes, or list of them.
681     Py_ssize_t pending_bytes_count;
682 
683     /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
684      * dec_flags is the second (integer) item of the decoder state and
685      * next_input is the chunk of input bytes that comes next after the
686      * snapshot point.  We use this to reconstruct decoder states in tell().
687      */
688     PyObject *snapshot;
689     /* Bytes-to-characters ratio for the current chunk. Serves as input for
690        the heuristic in tell(). */
691     double b2cratio;
692 
693     /* Cache raw object if it's a FileIO object */
694     PyObject *raw;
695 
696     PyObject *weakreflist;
697     PyObject *dict;
698 } textio;
699 
700 static void
701 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
702 
703 /* A couple of specialized cases in order to bypass the slow incremental
704    encoding methods for the most popular encodings. */
705 
706 static PyObject *
ascii_encode(textio * self,PyObject * text)707 ascii_encode(textio *self, PyObject *text)
708 {
709     return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
710 }
711 
712 static PyObject *
utf16be_encode(textio * self,PyObject * text)713 utf16be_encode(textio *self, PyObject *text)
714 {
715     return _PyUnicode_EncodeUTF16(text,
716                                   PyUnicode_AsUTF8(self->errors), 1);
717 }
718 
719 static PyObject *
utf16le_encode(textio * self,PyObject * text)720 utf16le_encode(textio *self, PyObject *text)
721 {
722     return _PyUnicode_EncodeUTF16(text,
723                                   PyUnicode_AsUTF8(self->errors), -1);
724 }
725 
726 static PyObject *
utf16_encode(textio * self,PyObject * text)727 utf16_encode(textio *self, PyObject *text)
728 {
729     if (!self->encoding_start_of_stream) {
730         /* Skip the BOM and use native byte ordering */
731 #if PY_BIG_ENDIAN
732         return utf16be_encode(self, text);
733 #else
734         return utf16le_encode(self, text);
735 #endif
736     }
737     return _PyUnicode_EncodeUTF16(text,
738                                   PyUnicode_AsUTF8(self->errors), 0);
739 }
740 
741 static PyObject *
utf32be_encode(textio * self,PyObject * text)742 utf32be_encode(textio *self, PyObject *text)
743 {
744     return _PyUnicode_EncodeUTF32(text,
745                                   PyUnicode_AsUTF8(self->errors), 1);
746 }
747 
748 static PyObject *
utf32le_encode(textio * self,PyObject * text)749 utf32le_encode(textio *self, PyObject *text)
750 {
751     return _PyUnicode_EncodeUTF32(text,
752                                   PyUnicode_AsUTF8(self->errors), -1);
753 }
754 
755 static PyObject *
utf32_encode(textio * self,PyObject * text)756 utf32_encode(textio *self, PyObject *text)
757 {
758     if (!self->encoding_start_of_stream) {
759         /* Skip the BOM and use native byte ordering */
760 #if PY_BIG_ENDIAN
761         return utf32be_encode(self, text);
762 #else
763         return utf32le_encode(self, text);
764 #endif
765     }
766     return _PyUnicode_EncodeUTF32(text,
767                                   PyUnicode_AsUTF8(self->errors), 0);
768 }
769 
770 static PyObject *
utf8_encode(textio * self,PyObject * text)771 utf8_encode(textio *self, PyObject *text)
772 {
773     return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
774 }
775 
776 static PyObject *
latin1_encode(textio * self,PyObject * text)777 latin1_encode(textio *self, PyObject *text)
778 {
779     return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
780 }
781 
782 // Return true when encoding can be skipped when text is ascii.
783 static inline int
is_asciicompat_encoding(encodefunc_t f)784 is_asciicompat_encoding(encodefunc_t f)
785 {
786     return f == (encodefunc_t) ascii_encode
787         || f == (encodefunc_t) latin1_encode
788         || f == (encodefunc_t) utf8_encode;
789 }
790 
791 /* Map normalized encoding names onto the specialized encoding funcs */
792 
793 typedef struct {
794     const char *name;
795     encodefunc_t encodefunc;
796 } encodefuncentry;
797 
798 static const encodefuncentry encodefuncs[] = {
799     {"ascii",       (encodefunc_t) ascii_encode},
800     {"iso8859-1",   (encodefunc_t) latin1_encode},
801     {"utf-8",       (encodefunc_t) utf8_encode},
802     {"utf-16-be",   (encodefunc_t) utf16be_encode},
803     {"utf-16-le",   (encodefunc_t) utf16le_encode},
804     {"utf-16",      (encodefunc_t) utf16_encode},
805     {"utf-32-be",   (encodefunc_t) utf32be_encode},
806     {"utf-32-le",   (encodefunc_t) utf32le_encode},
807     {"utf-32",      (encodefunc_t) utf32_encode},
808     {NULL, NULL}
809 };
810 
811 static int
validate_newline(const char * newline)812 validate_newline(const char *newline)
813 {
814     if (newline && newline[0] != '\0'
815         && !(newline[0] == '\n' && newline[1] == '\0')
816         && !(newline[0] == '\r' && newline[1] == '\0')
817         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
818         PyErr_Format(PyExc_ValueError,
819                      "illegal newline value: %s", newline);
820         return -1;
821     }
822     return 0;
823 }
824 
825 static int
set_newline(textio * self,const char * newline)826 set_newline(textio *self, const char *newline)
827 {
828     PyObject *old = self->readnl;
829     if (newline == NULL) {
830         self->readnl = NULL;
831     }
832     else {
833         self->readnl = PyUnicode_FromString(newline);
834         if (self->readnl == NULL) {
835             self->readnl = old;
836             return -1;
837         }
838     }
839     self->readuniversal = (newline == NULL || newline[0] == '\0');
840     self->readtranslate = (newline == NULL);
841     self->writetranslate = (newline == NULL || newline[0] != '\0');
842     if (!self->readuniversal && self->readnl != NULL) {
843         // validate_newline() accepts only ASCII newlines.
844         assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
845         self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
846         if (strcmp(self->writenl, "\n") == 0) {
847             self->writenl = NULL;
848         }
849     }
850     else {
851 #ifdef MS_WINDOWS
852         self->writenl = "\r\n";
853 #else
854         self->writenl = NULL;
855 #endif
856     }
857     Py_XDECREF(old);
858     return 0;
859 }
860 
861 static int
_textiowrapper_set_decoder(textio * self,PyObject * codec_info,const char * errors)862 _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
863                            const char *errors)
864 {
865     PyObject *res;
866     int r;
867 
868     res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
869     if (res == NULL)
870         return -1;
871 
872     r = PyObject_IsTrue(res);
873     Py_DECREF(res);
874     if (r == -1)
875         return -1;
876 
877     if (r != 1)
878         return 0;
879 
880     Py_CLEAR(self->decoder);
881     self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
882     if (self->decoder == NULL)
883         return -1;
884 
885     if (self->readuniversal) {
886         PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
887             (PyObject *)&PyIncrementalNewlineDecoder_Type,
888             self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
889         if (incrementalDecoder == NULL)
890             return -1;
891         Py_CLEAR(self->decoder);
892         self->decoder = incrementalDecoder;
893     }
894 
895     return 0;
896 }
897 
898 static PyObject*
_textiowrapper_decode(PyObject * decoder,PyObject * bytes,int eof)899 _textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
900 {
901     PyObject *chars;
902 
903     if (Py_IS_TYPE(decoder, &PyIncrementalNewlineDecoder_Type))
904         chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
905     else
906         chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
907                                            eof ? Py_True : Py_False, NULL);
908 
909     if (check_decoded(chars) < 0)
910         // check_decoded already decreases refcount
911         return NULL;
912 
913     return chars;
914 }
915 
916 static int
_textiowrapper_set_encoder(textio * self,PyObject * codec_info,const char * errors)917 _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
918                            const char *errors)
919 {
920     PyObject *res;
921     int r;
922 
923     res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
924     if (res == NULL)
925         return -1;
926 
927     r = PyObject_IsTrue(res);
928     Py_DECREF(res);
929     if (r == -1)
930         return -1;
931 
932     if (r != 1)
933         return 0;
934 
935     Py_CLEAR(self->encoder);
936     self->encodefunc = NULL;
937     self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
938     if (self->encoder == NULL)
939         return -1;
940 
941     /* Get the normalized named of the codec */
942     if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
943         return -1;
944     }
945     if (res != NULL && PyUnicode_Check(res)) {
946         const encodefuncentry *e = encodefuncs;
947         while (e->name != NULL) {
948             if (_PyUnicode_EqualToASCIIString(res, e->name)) {
949                 self->encodefunc = e->encodefunc;
950                 break;
951             }
952             e++;
953         }
954     }
955     Py_XDECREF(res);
956 
957     return 0;
958 }
959 
960 static int
_textiowrapper_fix_encoder_state(textio * self)961 _textiowrapper_fix_encoder_state(textio *self)
962 {
963     if (!self->seekable || !self->encoder) {
964         return 0;
965     }
966 
967     self->encoding_start_of_stream = 1;
968 
969     PyObject *cookieObj = PyObject_CallMethodNoArgs(
970         self->buffer, _PyIO_str_tell);
971     if (cookieObj == NULL) {
972         return -1;
973     }
974 
975     int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
976     Py_DECREF(cookieObj);
977     if (cmp < 0) {
978         return -1;
979     }
980 
981     if (cmp == 0) {
982         self->encoding_start_of_stream = 0;
983         PyObject *res = PyObject_CallMethodOneArg(
984             self->encoder, _PyIO_str_setstate, _PyLong_GetZero());
985         if (res == NULL) {
986             return -1;
987         }
988         Py_DECREF(res);
989     }
990 
991     return 0;
992 }
993 
994 static int
io_check_errors(PyObject * errors)995 io_check_errors(PyObject *errors)
996 {
997     assert(errors != NULL && errors != Py_None);
998 
999     PyInterpreterState *interp = _PyInterpreterState_GET();
1000 #ifndef Py_DEBUG
1001     /* In release mode, only check in development mode (-X dev) */
1002     if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
1003         return 0;
1004     }
1005 #else
1006     /* Always check in debug mode */
1007 #endif
1008 
1009     /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1010        before_PyUnicode_InitEncodings() is called. */
1011     if (!interp->unicode.fs_codec.encoding) {
1012         return 0;
1013     }
1014 
1015     Py_ssize_t name_length;
1016     const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
1017     if (name == NULL) {
1018         return -1;
1019     }
1020     if (strlen(name) != (size_t)name_length) {
1021         PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
1022         return -1;
1023     }
1024     PyObject *handler = PyCodec_LookupError(name);
1025     if (handler != NULL) {
1026         Py_DECREF(handler);
1027         return 0;
1028     }
1029     return -1;
1030 }
1031 
1032 
1033 
1034 /*[clinic input]
1035 _io.TextIOWrapper.__init__
1036     buffer: object
1037     encoding: str(accept={str, NoneType}) = None
1038     errors: object = None
1039     newline: str(accept={str, NoneType}) = None
1040     line_buffering: bool(accept={int}) = False
1041     write_through: bool(accept={int}) = False
1042 
1043 Character and line based layer over a BufferedIOBase object, buffer.
1044 
1045 encoding gives the name of the encoding that the stream will be
1046 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1047 
1048 errors determines the strictness of encoding and decoding (see
1049 help(codecs.Codec) or the documentation for codecs.register) and
1050 defaults to "strict".
1051 
1052 newline controls how line endings are handled. It can be None, '',
1053 '\n', '\r', and '\r\n'.  It works as follows:
1054 
1055 * On input, if newline is None, universal newlines mode is
1056   enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1057   these are translated into '\n' before being returned to the
1058   caller. If it is '', universal newline mode is enabled, but line
1059   endings are returned to the caller untranslated. If it has any of
1060   the other legal values, input lines are only terminated by the given
1061   string, and the line ending is returned to the caller untranslated.
1062 
1063 * On output, if newline is None, any '\n' characters written are
1064   translated to the system default line separator, os.linesep. If
1065   newline is '' or '\n', no translation takes place. If newline is any
1066   of the other legal values, any '\n' characters written are translated
1067   to the given string.
1068 
1069 If line_buffering is True, a call to flush is implied when a call to
1070 write contains a newline character.
1071 [clinic start generated code]*/
1072 
1073 static int
_io_TextIOWrapper___init___impl(textio * self,PyObject * buffer,const char * encoding,PyObject * errors,const char * newline,int line_buffering,int write_through)1074 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1075                                 const char *encoding, PyObject *errors,
1076                                 const char *newline, int line_buffering,
1077                                 int write_through)
1078 /*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/
1079 {
1080     PyObject *raw, *codec_info = NULL;
1081     _PyIO_State *state = NULL;
1082     PyObject *res;
1083     int r;
1084 
1085     self->ok = 0;
1086     self->detached = 0;
1087 
1088     if (encoding == NULL) {
1089         PyInterpreterState *interp = _PyInterpreterState_GET();
1090         if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
1091             if (PyErr_WarnEx(PyExc_EncodingWarning,
1092                              "'encoding' argument not specified", 1)) {
1093                 return -1;
1094             }
1095         }
1096     }
1097     else if (strcmp(encoding, "locale") == 0) {
1098         encoding = NULL;
1099     }
1100 
1101     if (errors == Py_None) {
1102         errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
1103         if (errors == NULL) {
1104             return -1;
1105         }
1106     }
1107     else if (!PyUnicode_Check(errors)) {
1108         // Check 'errors' argument here because Argument Clinic doesn't support
1109         // 'str(accept={str, NoneType})' converter.
1110         PyErr_Format(
1111             PyExc_TypeError,
1112             "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1113             Py_TYPE(errors)->tp_name);
1114         return -1;
1115     }
1116     else if (io_check_errors(errors)) {
1117         return -1;
1118     }
1119 
1120     if (validate_newline(newline) < 0) {
1121         return -1;
1122     }
1123 
1124     Py_CLEAR(self->buffer);
1125     Py_CLEAR(self->encoding);
1126     Py_CLEAR(self->encoder);
1127     Py_CLEAR(self->decoder);
1128     Py_CLEAR(self->readnl);
1129     Py_CLEAR(self->decoded_chars);
1130     Py_CLEAR(self->pending_bytes);
1131     Py_CLEAR(self->snapshot);
1132     Py_CLEAR(self->errors);
1133     Py_CLEAR(self->raw);
1134     self->decoded_chars_used = 0;
1135     self->pending_bytes_count = 0;
1136     self->encodefunc = NULL;
1137     self->b2cratio = 0.0;
1138 
1139     if (encoding == NULL) {
1140         /* Try os.device_encoding(fileno) */
1141         PyObject *fileno;
1142         state = IO_STATE();
1143         if (state == NULL)
1144             goto error;
1145         fileno = _PyObject_CallMethodIdNoArgs(buffer, &PyId_fileno);
1146         /* Ignore only AttributeError and UnsupportedOperation */
1147         if (fileno == NULL) {
1148             if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1149                 PyErr_ExceptionMatches(state->unsupported_operation)) {
1150                 PyErr_Clear();
1151             }
1152             else {
1153                 goto error;
1154             }
1155         }
1156         else {
1157             int fd = _PyLong_AsInt(fileno);
1158             Py_DECREF(fileno);
1159             if (fd == -1 && PyErr_Occurred()) {
1160                 goto error;
1161             }
1162 
1163             self->encoding = _Py_device_encoding(fd);
1164             if (self->encoding == NULL)
1165                 goto error;
1166             else if (!PyUnicode_Check(self->encoding))
1167                 Py_CLEAR(self->encoding);
1168         }
1169     }
1170     if (encoding == NULL && self->encoding == NULL) {
1171         self->encoding = _Py_GetLocaleEncodingObject();
1172         if (self->encoding == NULL) {
1173             goto error;
1174         }
1175         assert(PyUnicode_Check(self->encoding));
1176     }
1177     if (self->encoding != NULL) {
1178         encoding = PyUnicode_AsUTF8(self->encoding);
1179         if (encoding == NULL)
1180             goto error;
1181     }
1182     else if (encoding != NULL) {
1183         self->encoding = PyUnicode_FromString(encoding);
1184         if (self->encoding == NULL)
1185             goto error;
1186     }
1187     else {
1188         PyErr_SetString(PyExc_OSError,
1189                         "could not determine default encoding");
1190         goto error;
1191     }
1192 
1193     /* Check we have been asked for a real text encoding */
1194     codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1195     if (codec_info == NULL) {
1196         Py_CLEAR(self->encoding);
1197         goto error;
1198     }
1199 
1200     /* XXX: Failures beyond this point have the potential to leak elements
1201      * of the partially constructed object (like self->encoding)
1202      */
1203 
1204     Py_INCREF(errors);
1205     self->errors = errors;
1206     self->chunk_size = 8192;
1207     self->line_buffering = line_buffering;
1208     self->write_through = write_through;
1209     if (set_newline(self, newline) < 0) {
1210         goto error;
1211     }
1212 
1213     self->buffer = buffer;
1214     Py_INCREF(buffer);
1215 
1216     /* Build the decoder object */
1217     if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1218         goto error;
1219 
1220     /* Build the encoder object */
1221     if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1222         goto error;
1223 
1224     /* Finished sorting out the codec details */
1225     Py_CLEAR(codec_info);
1226 
1227     if (Py_IS_TYPE(buffer, &PyBufferedReader_Type) ||
1228         Py_IS_TYPE(buffer, &PyBufferedWriter_Type) ||
1229         Py_IS_TYPE(buffer, &PyBufferedRandom_Type))
1230     {
1231         if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1232             goto error;
1233         /* Cache the raw FileIO object to speed up 'closed' checks */
1234         if (raw != NULL) {
1235             if (Py_IS_TYPE(raw, &PyFileIO_Type))
1236                 self->raw = raw;
1237             else
1238                 Py_DECREF(raw);
1239         }
1240     }
1241 
1242     res = _PyObject_CallMethodIdNoArgs(buffer, &PyId_seekable);
1243     if (res == NULL)
1244         goto error;
1245     r = PyObject_IsTrue(res);
1246     Py_DECREF(res);
1247     if (r < 0)
1248         goto error;
1249     self->seekable = self->telling = r;
1250 
1251     r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1252     if (r < 0) {
1253         goto error;
1254     }
1255     Py_XDECREF(res);
1256     self->has_read1 = r;
1257 
1258     self->encoding_start_of_stream = 0;
1259     if (_textiowrapper_fix_encoder_state(self) < 0) {
1260         goto error;
1261     }
1262 
1263     self->ok = 1;
1264     return 0;
1265 
1266   error:
1267     Py_XDECREF(codec_info);
1268     return -1;
1269 }
1270 
1271 /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1272  * -1 on error.
1273  */
1274 static int
convert_optional_bool(PyObject * obj,int default_value)1275 convert_optional_bool(PyObject *obj, int default_value)
1276 {
1277     long v;
1278     if (obj == Py_None) {
1279         v = default_value;
1280     }
1281     else {
1282         v = PyLong_AsLong(obj);
1283         if (v == -1 && PyErr_Occurred())
1284             return -1;
1285     }
1286     return v != 0;
1287 }
1288 
1289 static int
textiowrapper_change_encoding(textio * self,PyObject * encoding,PyObject * errors,int newline_changed)1290 textiowrapper_change_encoding(textio *self, PyObject *encoding,
1291                               PyObject *errors, int newline_changed)
1292 {
1293     /* Use existing settings where new settings are not specified */
1294     if (encoding == Py_None && errors == Py_None && !newline_changed) {
1295         return 0;  // no change
1296     }
1297 
1298     if (encoding == Py_None) {
1299         encoding = self->encoding;
1300         if (errors == Py_None) {
1301             errors = self->errors;
1302         }
1303     }
1304     else if (errors == Py_None) {
1305         errors = _PyUnicode_FromId(&PyId_strict);
1306         if (errors == NULL) {
1307             return -1;
1308         }
1309     }
1310 
1311     const char *c_errors = PyUnicode_AsUTF8(errors);
1312     if (c_errors == NULL) {
1313         return -1;
1314     }
1315 
1316     // Create new encoder & decoder
1317     PyObject *codec_info = _PyCodec_LookupTextEncoding(
1318         PyUnicode_AsUTF8(encoding), "codecs.open()");
1319     if (codec_info == NULL) {
1320         return -1;
1321     }
1322     if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1323             _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1324         Py_DECREF(codec_info);
1325         return -1;
1326     }
1327     Py_DECREF(codec_info);
1328 
1329     Py_INCREF(encoding);
1330     Py_INCREF(errors);
1331     Py_SETREF(self->encoding, encoding);
1332     Py_SETREF(self->errors, errors);
1333 
1334     return _textiowrapper_fix_encoder_state(self);
1335 }
1336 
1337 /*[clinic input]
1338 _io.TextIOWrapper.reconfigure
1339     *
1340     encoding: object = None
1341     errors: object = None
1342     newline as newline_obj: object(c_default="NULL") = None
1343     line_buffering as line_buffering_obj: object = None
1344     write_through as write_through_obj: object = None
1345 
1346 Reconfigure the text stream with new parameters.
1347 
1348 This also does an implicit stream flush.
1349 
1350 [clinic start generated code]*/
1351 
1352 static PyObject *
_io_TextIOWrapper_reconfigure_impl(textio * self,PyObject * encoding,PyObject * errors,PyObject * newline_obj,PyObject * line_buffering_obj,PyObject * write_through_obj)1353 _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1354                                    PyObject *errors, PyObject *newline_obj,
1355                                    PyObject *line_buffering_obj,
1356                                    PyObject *write_through_obj)
1357 /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
1358 {
1359     int line_buffering;
1360     int write_through;
1361     const char *newline = NULL;
1362 
1363     /* Check if something is in the read buffer */
1364     if (self->decoded_chars != NULL) {
1365         if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1366             _unsupported("It is not possible to set the encoding or newline "
1367                          "of stream after the first read");
1368             return NULL;
1369         }
1370     }
1371 
1372     if (newline_obj != NULL && newline_obj != Py_None) {
1373         newline = PyUnicode_AsUTF8(newline_obj);
1374         if (newline == NULL || validate_newline(newline) < 0) {
1375             return NULL;
1376         }
1377     }
1378 
1379     line_buffering = convert_optional_bool(line_buffering_obj,
1380                                            self->line_buffering);
1381     write_through = convert_optional_bool(write_through_obj,
1382                                           self->write_through);
1383     if (line_buffering < 0 || write_through < 0) {
1384         return NULL;
1385     }
1386 
1387     PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
1388     if (res == NULL) {
1389         return NULL;
1390     }
1391     Py_DECREF(res);
1392     self->b2cratio = 0;
1393 
1394     if (newline_obj != NULL && set_newline(self, newline) < 0) {
1395         return NULL;
1396     }
1397 
1398     if (textiowrapper_change_encoding(
1399             self, encoding, errors, newline_obj != NULL) < 0) {
1400         return NULL;
1401     }
1402 
1403     self->line_buffering = line_buffering;
1404     self->write_through = write_through;
1405     Py_RETURN_NONE;
1406 }
1407 
1408 static int
textiowrapper_clear(textio * self)1409 textiowrapper_clear(textio *self)
1410 {
1411     self->ok = 0;
1412     Py_CLEAR(self->buffer);
1413     Py_CLEAR(self->encoding);
1414     Py_CLEAR(self->encoder);
1415     Py_CLEAR(self->decoder);
1416     Py_CLEAR(self->readnl);
1417     Py_CLEAR(self->decoded_chars);
1418     Py_CLEAR(self->pending_bytes);
1419     Py_CLEAR(self->snapshot);
1420     Py_CLEAR(self->errors);
1421     Py_CLEAR(self->raw);
1422 
1423     Py_CLEAR(self->dict);
1424     return 0;
1425 }
1426 
1427 static void
textiowrapper_dealloc(textio * self)1428 textiowrapper_dealloc(textio *self)
1429 {
1430     self->finalizing = 1;
1431     if (_PyIOBase_finalize((PyObject *) self) < 0)
1432         return;
1433     self->ok = 0;
1434     _PyObject_GC_UNTRACK(self);
1435     if (self->weakreflist != NULL)
1436         PyObject_ClearWeakRefs((PyObject *)self);
1437     textiowrapper_clear(self);
1438     Py_TYPE(self)->tp_free((PyObject *)self);
1439 }
1440 
1441 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1442 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1443 {
1444     Py_VISIT(self->buffer);
1445     Py_VISIT(self->encoding);
1446     Py_VISIT(self->encoder);
1447     Py_VISIT(self->decoder);
1448     Py_VISIT(self->readnl);
1449     Py_VISIT(self->decoded_chars);
1450     Py_VISIT(self->pending_bytes);
1451     Py_VISIT(self->snapshot);
1452     Py_VISIT(self->errors);
1453     Py_VISIT(self->raw);
1454 
1455     Py_VISIT(self->dict);
1456     return 0;
1457 }
1458 
1459 static PyObject *
1460 textiowrapper_closed_get(textio *self, void *context);
1461 
1462 /* This macro takes some shortcuts to make the common case faster. */
1463 #define CHECK_CLOSED(self) \
1464     do { \
1465         int r; \
1466         PyObject *_res; \
1467         if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { \
1468             if (self->raw != NULL) \
1469                 r = _PyFileIO_closed(self->raw); \
1470             else { \
1471                 _res = textiowrapper_closed_get(self, NULL); \
1472                 if (_res == NULL) \
1473                     return NULL; \
1474                 r = PyObject_IsTrue(_res); \
1475                 Py_DECREF(_res); \
1476                 if (r < 0) \
1477                     return NULL; \
1478             } \
1479             if (r > 0) { \
1480                 PyErr_SetString(PyExc_ValueError, \
1481                                 "I/O operation on closed file."); \
1482                 return NULL; \
1483             } \
1484         } \
1485         else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1486             return NULL; \
1487     } while (0)
1488 
1489 #define CHECK_INITIALIZED(self) \
1490     if (self->ok <= 0) { \
1491         PyErr_SetString(PyExc_ValueError, \
1492             "I/O operation on uninitialized object"); \
1493         return NULL; \
1494     }
1495 
1496 #define CHECK_ATTACHED(self) \
1497     CHECK_INITIALIZED(self); \
1498     if (self->detached) { \
1499         PyErr_SetString(PyExc_ValueError, \
1500              "underlying buffer has been detached"); \
1501         return NULL; \
1502     }
1503 
1504 #define CHECK_ATTACHED_INT(self) \
1505     if (self->ok <= 0) { \
1506         PyErr_SetString(PyExc_ValueError, \
1507             "I/O operation on uninitialized object"); \
1508         return -1; \
1509     } else if (self->detached) { \
1510         PyErr_SetString(PyExc_ValueError, \
1511              "underlying buffer has been detached"); \
1512         return -1; \
1513     }
1514 
1515 
1516 /*[clinic input]
1517 _io.TextIOWrapper.detach
1518 [clinic start generated code]*/
1519 
1520 static PyObject *
_io_TextIOWrapper_detach_impl(textio * self)1521 _io_TextIOWrapper_detach_impl(textio *self)
1522 /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1523 {
1524     PyObject *buffer, *res;
1525     CHECK_ATTACHED(self);
1526     res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
1527     if (res == NULL)
1528         return NULL;
1529     Py_DECREF(res);
1530     buffer = self->buffer;
1531     self->buffer = NULL;
1532     self->detached = 1;
1533     return buffer;
1534 }
1535 
1536 /* Flush the internal write buffer. This doesn't explicitly flush the
1537    underlying buffered object, though. */
1538 static int
_textiowrapper_writeflush(textio * self)1539 _textiowrapper_writeflush(textio *self)
1540 {
1541     if (self->pending_bytes == NULL)
1542         return 0;
1543 
1544     PyObject *pending = self->pending_bytes;
1545     PyObject *b;
1546 
1547     if (PyBytes_Check(pending)) {
1548         b = pending;
1549         Py_INCREF(b);
1550     }
1551     else if (PyUnicode_Check(pending)) {
1552         assert(PyUnicode_IS_ASCII(pending));
1553         assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1554         b = PyBytes_FromStringAndSize(
1555                 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1556         if (b == NULL) {
1557             return -1;
1558         }
1559     }
1560     else {
1561         assert(PyList_Check(pending));
1562         b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1563         if (b == NULL) {
1564             return -1;
1565         }
1566 
1567         char *buf = PyBytes_AsString(b);
1568         Py_ssize_t pos = 0;
1569 
1570         for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1571             PyObject *obj = PyList_GET_ITEM(pending, i);
1572             char *src;
1573             Py_ssize_t len;
1574             if (PyUnicode_Check(obj)) {
1575                 assert(PyUnicode_IS_ASCII(obj));
1576                 src = PyUnicode_DATA(obj);
1577                 len = PyUnicode_GET_LENGTH(obj);
1578             }
1579             else {
1580                 assert(PyBytes_Check(obj));
1581                 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1582                     Py_DECREF(b);
1583                     return -1;
1584                 }
1585             }
1586             memcpy(buf + pos, src, len);
1587             pos += len;
1588         }
1589         assert(pos == self->pending_bytes_count);
1590     }
1591 
1592     self->pending_bytes_count = 0;
1593     self->pending_bytes = NULL;
1594     Py_DECREF(pending);
1595 
1596     PyObject *ret;
1597     do {
1598         ret = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_write, b);
1599     } while (ret == NULL && _PyIO_trap_eintr());
1600     Py_DECREF(b);
1601     // NOTE: We cleared buffer but we don't know how many bytes are actually written
1602     // when an error occurred.
1603     if (ret == NULL)
1604         return -1;
1605     Py_DECREF(ret);
1606     return 0;
1607 }
1608 
1609 /*[clinic input]
1610 _io.TextIOWrapper.write
1611     text: unicode
1612     /
1613 [clinic start generated code]*/
1614 
1615 static PyObject *
_io_TextIOWrapper_write_impl(textio * self,PyObject * text)1616 _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1617 /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1618 {
1619     PyObject *ret;
1620     PyObject *b;
1621     Py_ssize_t textlen;
1622     int haslf = 0;
1623     int needflush = 0, text_needflush = 0;
1624 
1625     if (PyUnicode_READY(text) == -1)
1626         return NULL;
1627 
1628     CHECK_ATTACHED(self);
1629     CHECK_CLOSED(self);
1630 
1631     if (self->encoder == NULL)
1632         return _unsupported("not writable");
1633 
1634     Py_INCREF(text);
1635 
1636     textlen = PyUnicode_GET_LENGTH(text);
1637 
1638     if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1639         if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1640             haslf = 1;
1641 
1642     if (haslf && self->writetranslate && self->writenl != NULL) {
1643         PyObject *newtext = _PyObject_CallMethodId(
1644             text, &PyId_replace, "ss", "\n", self->writenl);
1645         Py_DECREF(text);
1646         if (newtext == NULL)
1647             return NULL;
1648         text = newtext;
1649     }
1650 
1651     if (self->write_through)
1652         text_needflush = 1;
1653     if (self->line_buffering &&
1654         (haslf ||
1655          PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1656         needflush = 1;
1657 
1658     /* XXX What if we were just reading? */
1659     if (self->encodefunc != NULL) {
1660         if (PyUnicode_IS_ASCII(text) &&
1661                 // See bpo-43260
1662                 PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1663                 is_asciicompat_encoding(self->encodefunc)) {
1664             b = text;
1665             Py_INCREF(b);
1666         }
1667         else {
1668             b = (*self->encodefunc)((PyObject *) self, text);
1669         }
1670         self->encoding_start_of_stream = 0;
1671     }
1672     else {
1673         b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
1674     }
1675 
1676     Py_DECREF(text);
1677     if (b == NULL)
1678         return NULL;
1679     if (b != text && !PyBytes_Check(b)) {
1680         PyErr_Format(PyExc_TypeError,
1681                      "encoder should return a bytes object, not '%.200s'",
1682                      Py_TYPE(b)->tp_name);
1683         Py_DECREF(b);
1684         return NULL;
1685     }
1686 
1687     Py_ssize_t bytes_len;
1688     if (b == text) {
1689         bytes_len = PyUnicode_GET_LENGTH(b);
1690     }
1691     else {
1692         bytes_len = PyBytes_GET_SIZE(b);
1693     }
1694 
1695     if (self->pending_bytes == NULL) {
1696         self->pending_bytes_count = 0;
1697         self->pending_bytes = b;
1698     }
1699     else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
1700         // Prevent to concatenate more than chunk_size data.
1701         if (_textiowrapper_writeflush(self) < 0) {
1702             Py_DECREF(b);
1703             return NULL;
1704         }
1705         self->pending_bytes = b;
1706     }
1707     else if (!PyList_CheckExact(self->pending_bytes)) {
1708         PyObject *list = PyList_New(2);
1709         if (list == NULL) {
1710             Py_DECREF(b);
1711             return NULL;
1712         }
1713         PyList_SET_ITEM(list, 0, self->pending_bytes);
1714         PyList_SET_ITEM(list, 1, b);
1715         self->pending_bytes = list;
1716     }
1717     else {
1718         if (PyList_Append(self->pending_bytes, b) < 0) {
1719             Py_DECREF(b);
1720             return NULL;
1721         }
1722         Py_DECREF(b);
1723     }
1724 
1725     self->pending_bytes_count += bytes_len;
1726     if (self->pending_bytes_count >= self->chunk_size || needflush ||
1727         text_needflush) {
1728         if (_textiowrapper_writeflush(self) < 0)
1729             return NULL;
1730     }
1731 
1732     if (needflush) {
1733         ret = PyObject_CallMethodNoArgs(self->buffer, _PyIO_str_flush);
1734         if (ret == NULL)
1735             return NULL;
1736         Py_DECREF(ret);
1737     }
1738 
1739     textiowrapper_set_decoded_chars(self, NULL);
1740     Py_CLEAR(self->snapshot);
1741 
1742     if (self->decoder) {
1743         ret = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
1744         if (ret == NULL)
1745             return NULL;
1746         Py_DECREF(ret);
1747     }
1748 
1749     return PyLong_FromSsize_t(textlen);
1750 }
1751 
1752 /* Steal a reference to chars and store it in the decoded_char buffer;
1753  */
1754 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1755 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1756 {
1757     Py_XSETREF(self->decoded_chars, chars);
1758     self->decoded_chars_used = 0;
1759 }
1760 
1761 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1762 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1763 {
1764     PyObject *chars;
1765     Py_ssize_t avail;
1766 
1767     if (self->decoded_chars == NULL)
1768         return PyUnicode_FromStringAndSize(NULL, 0);
1769 
1770     /* decoded_chars is guaranteed to be "ready". */
1771     avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1772              - self->decoded_chars_used);
1773 
1774     assert(avail >= 0);
1775 
1776     if (n < 0 || n > avail)
1777         n = avail;
1778 
1779     if (self->decoded_chars_used > 0 || n < avail) {
1780         chars = PyUnicode_Substring(self->decoded_chars,
1781                                     self->decoded_chars_used,
1782                                     self->decoded_chars_used + n);
1783         if (chars == NULL)
1784             return NULL;
1785     }
1786     else {
1787         chars = self->decoded_chars;
1788         Py_INCREF(chars);
1789     }
1790 
1791     self->decoded_chars_used += n;
1792     return chars;
1793 }
1794 
1795 /* Read and decode the next chunk of data from the BufferedReader.
1796  */
1797 static int
textiowrapper_read_chunk(textio * self,Py_ssize_t size_hint)1798 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1799 {
1800     PyObject *dec_buffer = NULL;
1801     PyObject *dec_flags = NULL;
1802     PyObject *input_chunk = NULL;
1803     Py_buffer input_chunk_buf;
1804     PyObject *decoded_chars, *chunk_size;
1805     Py_ssize_t nbytes, nchars;
1806     int eof;
1807 
1808     /* The return value is True unless EOF was reached.  The decoded string is
1809      * placed in self._decoded_chars (replacing its previous value).  The
1810      * entire input chunk is sent to the decoder, though some of it may remain
1811      * buffered in the decoder, yet to be converted.
1812      */
1813 
1814     if (self->decoder == NULL) {
1815         _unsupported("not readable");
1816         return -1;
1817     }
1818 
1819     if (self->telling) {
1820         /* To prepare for tell(), we need to snapshot a point in the file
1821          * where the decoder's input buffer is empty.
1822          */
1823         PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1824                                                      _PyIO_str_getstate);
1825         if (state == NULL)
1826             return -1;
1827         /* Given this, we know there was a valid snapshot point
1828          * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1829          */
1830         if (!PyTuple_Check(state)) {
1831             PyErr_SetString(PyExc_TypeError,
1832                             "illegal decoder state");
1833             Py_DECREF(state);
1834             return -1;
1835         }
1836         if (!PyArg_ParseTuple(state,
1837                               "OO;illegal decoder state", &dec_buffer, &dec_flags))
1838         {
1839             Py_DECREF(state);
1840             return -1;
1841         }
1842 
1843         if (!PyBytes_Check(dec_buffer)) {
1844             PyErr_Format(PyExc_TypeError,
1845                          "illegal decoder state: the first item should be a "
1846                          "bytes object, not '%.200s'",
1847                          Py_TYPE(dec_buffer)->tp_name);
1848             Py_DECREF(state);
1849             return -1;
1850         }
1851         Py_INCREF(dec_buffer);
1852         Py_INCREF(dec_flags);
1853         Py_DECREF(state);
1854     }
1855 
1856     /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1857     if (size_hint > 0) {
1858         size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1859     }
1860     chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1861     if (chunk_size == NULL)
1862         goto fail;
1863 
1864     input_chunk = PyObject_CallMethodOneArg(self->buffer,
1865         (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1866         chunk_size);
1867     Py_DECREF(chunk_size);
1868     if (input_chunk == NULL)
1869         goto fail;
1870 
1871     if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1872         PyErr_Format(PyExc_TypeError,
1873                      "underlying %s() should have returned a bytes-like object, "
1874                      "not '%.200s'", (self->has_read1 ? "read1": "read"),
1875                      Py_TYPE(input_chunk)->tp_name);
1876         goto fail;
1877     }
1878 
1879     nbytes = input_chunk_buf.len;
1880     eof = (nbytes == 0);
1881 
1882     decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1883     PyBuffer_Release(&input_chunk_buf);
1884     if (decoded_chars == NULL)
1885         goto fail;
1886 
1887     textiowrapper_set_decoded_chars(self, decoded_chars);
1888     nchars = PyUnicode_GET_LENGTH(decoded_chars);
1889     if (nchars > 0)
1890         self->b2cratio = (double) nbytes / nchars;
1891     else
1892         self->b2cratio = 0.0;
1893     if (nchars > 0)
1894         eof = 0;
1895 
1896     if (self->telling) {
1897         /* At the snapshot point, len(dec_buffer) bytes before the read, the
1898          * next input to be decoded is dec_buffer + input_chunk.
1899          */
1900         PyObject *next_input = dec_buffer;
1901         PyBytes_Concat(&next_input, input_chunk);
1902         dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1903         if (next_input == NULL) {
1904             goto fail;
1905         }
1906         PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1907         if (snapshot == NULL) {
1908             dec_flags = NULL;
1909             goto fail;
1910         }
1911         Py_XSETREF(self->snapshot, snapshot);
1912     }
1913     Py_DECREF(input_chunk);
1914 
1915     return (eof == 0);
1916 
1917   fail:
1918     Py_XDECREF(dec_buffer);
1919     Py_XDECREF(dec_flags);
1920     Py_XDECREF(input_chunk);
1921     return -1;
1922 }
1923 
1924 /*[clinic input]
1925 _io.TextIOWrapper.read
1926     size as n: Py_ssize_t(accept={int, NoneType}) = -1
1927     /
1928 [clinic start generated code]*/
1929 
1930 static PyObject *
_io_TextIOWrapper_read_impl(textio * self,Py_ssize_t n)1931 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1932 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
1933 {
1934     PyObject *result = NULL, *chunks = NULL;
1935 
1936     CHECK_ATTACHED(self);
1937     CHECK_CLOSED(self);
1938 
1939     if (self->decoder == NULL)
1940         return _unsupported("not readable");
1941 
1942     if (_textiowrapper_writeflush(self) < 0)
1943         return NULL;
1944 
1945     if (n < 0) {
1946         /* Read everything */
1947         PyObject *bytes = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_read);
1948         PyObject *decoded;
1949         if (bytes == NULL)
1950             goto fail;
1951 
1952         if (Py_IS_TYPE(self->decoder, &PyIncrementalNewlineDecoder_Type))
1953             decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1954                                                           bytes, 1);
1955         else
1956             decoded = PyObject_CallMethodObjArgs(
1957                 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
1958         Py_DECREF(bytes);
1959         if (check_decoded(decoded) < 0)
1960             goto fail;
1961 
1962         result = textiowrapper_get_decoded_chars(self, -1);
1963 
1964         if (result == NULL) {
1965             Py_DECREF(decoded);
1966             return NULL;
1967         }
1968 
1969         PyUnicode_AppendAndDel(&result, decoded);
1970         if (result == NULL)
1971             goto fail;
1972 
1973         textiowrapper_set_decoded_chars(self, NULL);
1974         Py_CLEAR(self->snapshot);
1975         return result;
1976     }
1977     else {
1978         int res = 1;
1979         Py_ssize_t remaining = n;
1980 
1981         result = textiowrapper_get_decoded_chars(self, n);
1982         if (result == NULL)
1983             goto fail;
1984         if (PyUnicode_READY(result) == -1)
1985             goto fail;
1986         remaining -= PyUnicode_GET_LENGTH(result);
1987 
1988         /* Keep reading chunks until we have n characters to return */
1989         while (remaining > 0) {
1990             res = textiowrapper_read_chunk(self, remaining);
1991             if (res < 0) {
1992                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1993                    when EINTR occurs so we needn't do it ourselves. */
1994                 if (_PyIO_trap_eintr()) {
1995                     continue;
1996                 }
1997                 goto fail;
1998             }
1999             if (res == 0)  /* EOF */
2000                 break;
2001             if (chunks == NULL) {
2002                 chunks = PyList_New(0);
2003                 if (chunks == NULL)
2004                     goto fail;
2005             }
2006             if (PyUnicode_GET_LENGTH(result) > 0 &&
2007                 PyList_Append(chunks, result) < 0)
2008                 goto fail;
2009             Py_DECREF(result);
2010             result = textiowrapper_get_decoded_chars(self, remaining);
2011             if (result == NULL)
2012                 goto fail;
2013             remaining -= PyUnicode_GET_LENGTH(result);
2014         }
2015         if (chunks != NULL) {
2016             if (result != NULL && PyList_Append(chunks, result) < 0)
2017                 goto fail;
2018             Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
2019             if (result == NULL)
2020                 goto fail;
2021             Py_CLEAR(chunks);
2022         }
2023         return result;
2024     }
2025   fail:
2026     Py_XDECREF(result);
2027     Py_XDECREF(chunks);
2028     return NULL;
2029 }
2030 
2031 
2032 /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2033    that is to the NUL character. Otherwise the function will produce
2034    incorrect results. */
2035 static const char *
find_control_char(int kind,const char * s,const char * end,Py_UCS4 ch)2036 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2037 {
2038     if (kind == PyUnicode_1BYTE_KIND) {
2039         assert(ch < 256);
2040         return (char *) memchr((const void *) s, (char) ch, end - s);
2041     }
2042     for (;;) {
2043         while (PyUnicode_READ(kind, s, 0) > ch)
2044             s += kind;
2045         if (PyUnicode_READ(kind, s, 0) == ch)
2046             return s;
2047         if (s == end)
2048             return NULL;
2049         s += kind;
2050     }
2051 }
2052 
2053 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,int kind,const char * start,const char * end,Py_ssize_t * consumed)2054 _PyIO_find_line_ending(
2055     int translated, int universal, PyObject *readnl,
2056     int kind, const char *start, const char *end, Py_ssize_t *consumed)
2057 {
2058     Py_ssize_t len = (end - start)/kind;
2059 
2060     if (translated) {
2061         /* Newlines are already translated, only search for \n */
2062         const char *pos = find_control_char(kind, start, end, '\n');
2063         if (pos != NULL)
2064             return (pos - start)/kind + 1;
2065         else {
2066             *consumed = len;
2067             return -1;
2068         }
2069     }
2070     else if (universal) {
2071         /* Universal newline search. Find any of \r, \r\n, \n
2072          * The decoder ensures that \r\n are not split in two pieces
2073          */
2074         const char *s = start;
2075         for (;;) {
2076             Py_UCS4 ch;
2077             /* Fast path for non-control chars. The loop always ends
2078                since the Unicode string is NUL-terminated. */
2079             while (PyUnicode_READ(kind, s, 0) > '\r')
2080                 s += kind;
2081             if (s >= end) {
2082                 *consumed = len;
2083                 return -1;
2084             }
2085             ch = PyUnicode_READ(kind, s, 0);
2086             s += kind;
2087             if (ch == '\n')
2088                 return (s - start)/kind;
2089             if (ch == '\r') {
2090                 if (PyUnicode_READ(kind, s, 0) == '\n')
2091                     return (s - start)/kind + 1;
2092                 else
2093                     return (s - start)/kind;
2094             }
2095         }
2096     }
2097     else {
2098         /* Non-universal mode. */
2099         Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2100         const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2101         /* Assume that readnl is an ASCII character. */
2102         assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2103         if (readnl_len == 1) {
2104             const char *pos = find_control_char(kind, start, end, nl[0]);
2105             if (pos != NULL)
2106                 return (pos - start)/kind + 1;
2107             *consumed = len;
2108             return -1;
2109         }
2110         else {
2111             const char *s = start;
2112             const char *e = end - (readnl_len - 1)*kind;
2113             const char *pos;
2114             if (e < s)
2115                 e = s;
2116             while (s < e) {
2117                 Py_ssize_t i;
2118                 const char *pos = find_control_char(kind, s, end, nl[0]);
2119                 if (pos == NULL || pos >= e)
2120                     break;
2121                 for (i = 1; i < readnl_len; i++) {
2122                     if (PyUnicode_READ(kind, pos, i) != nl[i])
2123                         break;
2124                 }
2125                 if (i == readnl_len)
2126                     return (pos - start)/kind + readnl_len;
2127                 s = pos + kind;
2128             }
2129             pos = find_control_char(kind, e, end, nl[0]);
2130             if (pos == NULL)
2131                 *consumed = len;
2132             else
2133                 *consumed = (pos - start)/kind;
2134             return -1;
2135         }
2136     }
2137 }
2138 
2139 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)2140 _textiowrapper_readline(textio *self, Py_ssize_t limit)
2141 {
2142     PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2143     Py_ssize_t start, endpos, chunked, offset_to_buffer;
2144     int res;
2145 
2146     CHECK_CLOSED(self);
2147 
2148     if (_textiowrapper_writeflush(self) < 0)
2149         return NULL;
2150 
2151     chunked = 0;
2152 
2153     while (1) {
2154         const char *ptr;
2155         Py_ssize_t line_len;
2156         int kind;
2157         Py_ssize_t consumed = 0;
2158 
2159         /* First, get some data if necessary */
2160         res = 1;
2161         while (!self->decoded_chars ||
2162                !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2163             res = textiowrapper_read_chunk(self, 0);
2164             if (res < 0) {
2165                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2166                    when EINTR occurs so we needn't do it ourselves. */
2167                 if (_PyIO_trap_eintr()) {
2168                     continue;
2169                 }
2170                 goto error;
2171             }
2172             if (res == 0)
2173                 break;
2174         }
2175         if (res == 0) {
2176             /* end of file */
2177             textiowrapper_set_decoded_chars(self, NULL);
2178             Py_CLEAR(self->snapshot);
2179             start = endpos = offset_to_buffer = 0;
2180             break;
2181         }
2182 
2183         if (remaining == NULL) {
2184             line = self->decoded_chars;
2185             start = self->decoded_chars_used;
2186             offset_to_buffer = 0;
2187             Py_INCREF(line);
2188         }
2189         else {
2190             assert(self->decoded_chars_used == 0);
2191             line = PyUnicode_Concat(remaining, self->decoded_chars);
2192             start = 0;
2193             offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2194             Py_CLEAR(remaining);
2195             if (line == NULL)
2196                 goto error;
2197             if (PyUnicode_READY(line) == -1)
2198                 goto error;
2199         }
2200 
2201         ptr = PyUnicode_DATA(line);
2202         line_len = PyUnicode_GET_LENGTH(line);
2203         kind = PyUnicode_KIND(line);
2204 
2205         endpos = _PyIO_find_line_ending(
2206             self->readtranslate, self->readuniversal, self->readnl,
2207             kind,
2208             ptr + kind * start,
2209             ptr + kind * line_len,
2210             &consumed);
2211         if (endpos >= 0) {
2212             endpos += start;
2213             if (limit >= 0 && (endpos - start) + chunked >= limit)
2214                 endpos = start + limit - chunked;
2215             break;
2216         }
2217 
2218         /* We can put aside up to `endpos` */
2219         endpos = consumed + start;
2220         if (limit >= 0 && (endpos - start) + chunked >= limit) {
2221             /* Didn't find line ending, but reached length limit */
2222             endpos = start + limit - chunked;
2223             break;
2224         }
2225 
2226         if (endpos > start) {
2227             /* No line ending seen yet - put aside current data */
2228             PyObject *s;
2229             if (chunks == NULL) {
2230                 chunks = PyList_New(0);
2231                 if (chunks == NULL)
2232                     goto error;
2233             }
2234             s = PyUnicode_Substring(line, start, endpos);
2235             if (s == NULL)
2236                 goto error;
2237             if (PyList_Append(chunks, s) < 0) {
2238                 Py_DECREF(s);
2239                 goto error;
2240             }
2241             chunked += PyUnicode_GET_LENGTH(s);
2242             Py_DECREF(s);
2243         }
2244         /* There may be some remaining bytes we'll have to prepend to the
2245            next chunk of data */
2246         if (endpos < line_len) {
2247             remaining = PyUnicode_Substring(line, endpos, line_len);
2248             if (remaining == NULL)
2249                 goto error;
2250         }
2251         Py_CLEAR(line);
2252         /* We have consumed the buffer */
2253         textiowrapper_set_decoded_chars(self, NULL);
2254     }
2255 
2256     if (line != NULL) {
2257         /* Our line ends in the current buffer */
2258         self->decoded_chars_used = endpos - offset_to_buffer;
2259         if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2260             PyObject *s = PyUnicode_Substring(line, start, endpos);
2261             Py_CLEAR(line);
2262             if (s == NULL)
2263                 goto error;
2264             line = s;
2265         }
2266     }
2267     if (remaining != NULL) {
2268         if (chunks == NULL) {
2269             chunks = PyList_New(0);
2270             if (chunks == NULL)
2271                 goto error;
2272         }
2273         if (PyList_Append(chunks, remaining) < 0)
2274             goto error;
2275         Py_CLEAR(remaining);
2276     }
2277     if (chunks != NULL) {
2278         if (line != NULL) {
2279             if (PyList_Append(chunks, line) < 0)
2280                 goto error;
2281             Py_DECREF(line);
2282         }
2283         line = PyUnicode_Join(_PyIO_empty_str, chunks);
2284         if (line == NULL)
2285             goto error;
2286         Py_CLEAR(chunks);
2287     }
2288     if (line == NULL) {
2289         Py_INCREF(_PyIO_empty_str);
2290         line = _PyIO_empty_str;
2291     }
2292 
2293     return line;
2294 
2295   error:
2296     Py_XDECREF(chunks);
2297     Py_XDECREF(remaining);
2298     Py_XDECREF(line);
2299     return NULL;
2300 }
2301 
2302 /*[clinic input]
2303 _io.TextIOWrapper.readline
2304     size: Py_ssize_t = -1
2305     /
2306 [clinic start generated code]*/
2307 
2308 static PyObject *
_io_TextIOWrapper_readline_impl(textio * self,Py_ssize_t size)2309 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2310 /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2311 {
2312     CHECK_ATTACHED(self);
2313     return _textiowrapper_readline(self, size);
2314 }
2315 
2316 /* Seek and Tell */
2317 
2318 typedef struct {
2319     Py_off_t start_pos;
2320     int dec_flags;
2321     int bytes_to_feed;
2322     int chars_to_skip;
2323     char need_eof;
2324 } cookie_type;
2325 
2326 /*
2327    To speed up cookie packing/unpacking, we store the fields in a temporary
2328    string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2329    The following macros define at which offsets in the intermediary byte
2330    string the various CookieStruct fields will be stored.
2331  */
2332 
2333 #define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2334 
2335 #if PY_BIG_ENDIAN
2336 /* We want the least significant byte of start_pos to also be the least
2337    significant byte of the cookie, which means that in big-endian mode we
2338    must copy the fields in reverse order. */
2339 
2340 # define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
2341 # define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
2342 # define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
2343 # define OFF_CHARS_TO_SKIP  (sizeof(char))
2344 # define OFF_NEED_EOF       0
2345 
2346 #else
2347 /* Little-endian mode: the least significant byte of start_pos will
2348    naturally end up the least significant byte of the cookie. */
2349 
2350 # define OFF_START_POS      0
2351 # define OFF_DEC_FLAGS      (sizeof(Py_off_t))
2352 # define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
2353 # define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
2354 # define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
2355 
2356 #endif
2357 
2358 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)2359 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2360 {
2361     unsigned char buffer[COOKIE_BUF_LEN];
2362     PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2363     if (cookieLong == NULL)
2364         return -1;
2365 
2366     if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2367                             PY_LITTLE_ENDIAN, 0) < 0) {
2368         Py_DECREF(cookieLong);
2369         return -1;
2370     }
2371     Py_DECREF(cookieLong);
2372 
2373     memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2374     memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2375     memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2376     memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2377     memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2378 
2379     return 0;
2380 }
2381 
2382 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)2383 textiowrapper_build_cookie(cookie_type *cookie)
2384 {
2385     unsigned char buffer[COOKIE_BUF_LEN];
2386 
2387     memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2388     memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2389     memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2390     memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2391     memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2392 
2393     return _PyLong_FromByteArray(buffer, sizeof(buffer),
2394                                  PY_LITTLE_ENDIAN, 0);
2395 }
2396 
2397 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)2398 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2399 {
2400     PyObject *res;
2401     /* When seeking to the start of the stream, we call decoder.reset()
2402        rather than decoder.getstate().
2403        This is for a few decoders such as utf-16 for which the state value
2404        at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2405        utf-16, that we are expecting a BOM).
2406     */
2407     if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2408         res = PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
2409     else
2410         res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2411                                      "((yi))", "", cookie->dec_flags);
2412     if (res == NULL)
2413         return -1;
2414     Py_DECREF(res);
2415     return 0;
2416 }
2417 
2418 static int
_textiowrapper_encoder_reset(textio * self,int start_of_stream)2419 _textiowrapper_encoder_reset(textio *self, int start_of_stream)
2420 {
2421     PyObject *res;
2422     if (start_of_stream) {
2423         res = PyObject_CallMethodNoArgs(self->encoder, _PyIO_str_reset);
2424         self->encoding_start_of_stream = 1;
2425     }
2426     else {
2427         res = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_setstate,
2428                                         _PyLong_GetZero());
2429         self->encoding_start_of_stream = 0;
2430     }
2431     if (res == NULL)
2432         return -1;
2433     Py_DECREF(res);
2434     return 0;
2435 }
2436 
2437 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)2438 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2439 {
2440     /* Same as _textiowrapper_decoder_setstate() above. */
2441     return _textiowrapper_encoder_reset(
2442         self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2443 }
2444 
2445 /*[clinic input]
2446 _io.TextIOWrapper.seek
2447     cookie as cookieObj: object
2448     whence: int = 0
2449     /
2450 [clinic start generated code]*/
2451 
2452 static PyObject *
_io_TextIOWrapper_seek_impl(textio * self,PyObject * cookieObj,int whence)2453 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2454 /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2455 {
2456     PyObject *posobj;
2457     cookie_type cookie;
2458     PyObject *res;
2459     int cmp;
2460     PyObject *snapshot;
2461 
2462     CHECK_ATTACHED(self);
2463     CHECK_CLOSED(self);
2464 
2465     Py_INCREF(cookieObj);
2466 
2467     if (!self->seekable) {
2468         _unsupported("underlying stream is not seekable");
2469         goto fail;
2470     }
2471 
2472     PyObject *zero = _PyLong_GetZero();  // borrowed reference
2473 
2474     switch (whence) {
2475     case SEEK_CUR:
2476         /* seek relative to current position */
2477         cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2478         if (cmp < 0)
2479             goto fail;
2480 
2481         if (cmp == 0) {
2482             _unsupported("can't do nonzero cur-relative seeks");
2483             goto fail;
2484         }
2485 
2486         /* Seeking to the current position should attempt to
2487          * sync the underlying buffer with the current position.
2488          */
2489         Py_DECREF(cookieObj);
2490         cookieObj = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_tell);
2491         if (cookieObj == NULL)
2492             goto fail;
2493         break;
2494 
2495     case SEEK_END:
2496         /* seek relative to end of file */
2497         cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2498         if (cmp < 0)
2499             goto fail;
2500 
2501         if (cmp == 0) {
2502             _unsupported("can't do nonzero end-relative seeks");
2503             goto fail;
2504         }
2505 
2506         res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
2507         if (res == NULL)
2508             goto fail;
2509         Py_DECREF(res);
2510 
2511         textiowrapper_set_decoded_chars(self, NULL);
2512         Py_CLEAR(self->snapshot);
2513         if (self->decoder) {
2514             res = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
2515             if (res == NULL)
2516                 goto fail;
2517             Py_DECREF(res);
2518         }
2519 
2520         res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
2521         Py_CLEAR(cookieObj);
2522         if (res == NULL)
2523             goto fail;
2524         if (self->encoder) {
2525             /* If seek() == 0, we are at the start of stream, otherwise not */
2526             cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
2527             if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2528                 Py_DECREF(res);
2529                 goto fail;
2530             }
2531         }
2532         return res;
2533 
2534     case SEEK_SET:
2535         break;
2536 
2537     default:
2538         PyErr_Format(PyExc_ValueError,
2539                      "invalid whence (%d, should be %d, %d or %d)", whence,
2540                      SEEK_SET, SEEK_CUR, SEEK_END);
2541         goto fail;
2542     }
2543 
2544     cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
2545     if (cmp < 0)
2546         goto fail;
2547 
2548     if (cmp == 1) {
2549         PyErr_Format(PyExc_ValueError,
2550                      "negative seek position %R", cookieObj);
2551         goto fail;
2552     }
2553 
2554     res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
2555     if (res == NULL)
2556         goto fail;
2557     Py_DECREF(res);
2558 
2559     /* The strategy of seek() is to go back to the safe start point
2560      * and replay the effect of read(chars_to_skip) from there.
2561      */
2562     if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2563         goto fail;
2564 
2565     /* Seek back to the safe start point. */
2566     posobj = PyLong_FromOff_t(cookie.start_pos);
2567     if (posobj == NULL)
2568         goto fail;
2569     res = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_seek, posobj);
2570     Py_DECREF(posobj);
2571     if (res == NULL)
2572         goto fail;
2573     Py_DECREF(res);
2574 
2575     textiowrapper_set_decoded_chars(self, NULL);
2576     Py_CLEAR(self->snapshot);
2577 
2578     /* Restore the decoder to its state from the safe start point. */
2579     if (self->decoder) {
2580         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2581             goto fail;
2582     }
2583 
2584     if (cookie.chars_to_skip) {
2585         /* Just like _read_chunk, feed the decoder and save a snapshot. */
2586         PyObject *input_chunk = _PyObject_CallMethodId(
2587             self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
2588         PyObject *decoded;
2589 
2590         if (input_chunk == NULL)
2591             goto fail;
2592 
2593         if (!PyBytes_Check(input_chunk)) {
2594             PyErr_Format(PyExc_TypeError,
2595                          "underlying read() should have returned a bytes "
2596                          "object, not '%.200s'",
2597                          Py_TYPE(input_chunk)->tp_name);
2598             Py_DECREF(input_chunk);
2599             goto fail;
2600         }
2601 
2602         snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2603         if (snapshot == NULL) {
2604             goto fail;
2605         }
2606         Py_XSETREF(self->snapshot, snapshot);
2607 
2608         decoded = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_decode,
2609             input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2610 
2611         if (check_decoded(decoded) < 0)
2612             goto fail;
2613 
2614         textiowrapper_set_decoded_chars(self, decoded);
2615 
2616         /* Skip chars_to_skip of the decoded characters. */
2617         if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2618             PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2619             goto fail;
2620         }
2621         self->decoded_chars_used = cookie.chars_to_skip;
2622     }
2623     else {
2624         snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2625         if (snapshot == NULL)
2626             goto fail;
2627         Py_XSETREF(self->snapshot, snapshot);
2628     }
2629 
2630     /* Finally, reset the encoder (merely useful for proper BOM handling) */
2631     if (self->encoder) {
2632         if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2633             goto fail;
2634     }
2635     return cookieObj;
2636   fail:
2637     Py_XDECREF(cookieObj);
2638     return NULL;
2639 
2640 }
2641 
2642 /*[clinic input]
2643 _io.TextIOWrapper.tell
2644 [clinic start generated code]*/
2645 
2646 static PyObject *
_io_TextIOWrapper_tell_impl(textio * self)2647 _io_TextIOWrapper_tell_impl(textio *self)
2648 /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2649 {
2650     PyObject *res;
2651     PyObject *posobj = NULL;
2652     cookie_type cookie = {0,0,0,0,0};
2653     PyObject *next_input;
2654     Py_ssize_t chars_to_skip, chars_decoded;
2655     Py_ssize_t skip_bytes, skip_back;
2656     PyObject *saved_state = NULL;
2657     const char *input, *input_end;
2658     Py_ssize_t dec_buffer_len;
2659     int dec_flags;
2660 
2661     CHECK_ATTACHED(self);
2662     CHECK_CLOSED(self);
2663 
2664     if (!self->seekable) {
2665         _unsupported("underlying stream is not seekable");
2666         goto fail;
2667     }
2668     if (!self->telling) {
2669         PyErr_SetString(PyExc_OSError,
2670                         "telling position disabled by next() call");
2671         goto fail;
2672     }
2673 
2674     if (_textiowrapper_writeflush(self) < 0)
2675         return NULL;
2676     res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
2677     if (res == NULL)
2678         goto fail;
2679     Py_DECREF(res);
2680 
2681     posobj = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_tell);
2682     if (posobj == NULL)
2683         goto fail;
2684 
2685     if (self->decoder == NULL || self->snapshot == NULL) {
2686         assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2687         return posobj;
2688     }
2689 
2690 #if defined(HAVE_LARGEFILE_SUPPORT)
2691     cookie.start_pos = PyLong_AsLongLong(posobj);
2692 #else
2693     cookie.start_pos = PyLong_AsLong(posobj);
2694 #endif
2695     Py_DECREF(posobj);
2696     if (PyErr_Occurred())
2697         goto fail;
2698 
2699     /* Skip backward to the snapshot point (see _read_chunk). */
2700     assert(PyTuple_Check(self->snapshot));
2701     if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2702         goto fail;
2703 
2704     assert (PyBytes_Check(next_input));
2705 
2706     cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2707 
2708     /* How many decoded characters have been used up since the snapshot? */
2709     if (self->decoded_chars_used == 0)  {
2710         /* We haven't moved from the snapshot point. */
2711         return textiowrapper_build_cookie(&cookie);
2712     }
2713 
2714     chars_to_skip = self->decoded_chars_used;
2715 
2716     /* Decoder state will be restored at the end */
2717     saved_state = PyObject_CallMethodNoArgs(self->decoder,
2718                                              _PyIO_str_getstate);
2719     if (saved_state == NULL)
2720         goto fail;
2721 
2722 #define DECODER_GETSTATE() do { \
2723         PyObject *dec_buffer; \
2724         PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2725             _PyIO_str_getstate); \
2726         if (_state == NULL) \
2727             goto fail; \
2728         if (!PyTuple_Check(_state)) { \
2729             PyErr_SetString(PyExc_TypeError, \
2730                             "illegal decoder state"); \
2731             Py_DECREF(_state); \
2732             goto fail; \
2733         } \
2734         if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2735                               &dec_buffer, &dec_flags)) \
2736         { \
2737             Py_DECREF(_state); \
2738             goto fail; \
2739         } \
2740         if (!PyBytes_Check(dec_buffer)) { \
2741             PyErr_Format(PyExc_TypeError, \
2742                          "illegal decoder state: the first item should be a " \
2743                          "bytes object, not '%.200s'", \
2744                          Py_TYPE(dec_buffer)->tp_name); \
2745             Py_DECREF(_state); \
2746             goto fail; \
2747         } \
2748         dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2749         Py_DECREF(_state); \
2750     } while (0)
2751 
2752 #define DECODER_DECODE(start, len, res) do { \
2753         PyObject *_decoded = _PyObject_CallMethodId( \
2754             self->decoder, &PyId_decode, "y#", start, len); \
2755         if (check_decoded(_decoded) < 0) \
2756             goto fail; \
2757         res = PyUnicode_GET_LENGTH(_decoded); \
2758         Py_DECREF(_decoded); \
2759     } while (0)
2760 
2761     /* Fast search for an acceptable start point, close to our
2762        current pos */
2763     skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2764     skip_back = 1;
2765     assert(skip_back <= PyBytes_GET_SIZE(next_input));
2766     input = PyBytes_AS_STRING(next_input);
2767     while (skip_bytes > 0) {
2768         /* Decode up to temptative start point */
2769         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2770             goto fail;
2771         DECODER_DECODE(input, skip_bytes, chars_decoded);
2772         if (chars_decoded <= chars_to_skip) {
2773             DECODER_GETSTATE();
2774             if (dec_buffer_len == 0) {
2775                 /* Before pos and no bytes buffered in decoder => OK */
2776                 cookie.dec_flags = dec_flags;
2777                 chars_to_skip -= chars_decoded;
2778                 break;
2779             }
2780             /* Skip back by buffered amount and reset heuristic */
2781             skip_bytes -= dec_buffer_len;
2782             skip_back = 1;
2783         }
2784         else {
2785             /* We're too far ahead, skip back a bit */
2786             skip_bytes -= skip_back;
2787             skip_back *= 2;
2788         }
2789     }
2790     if (skip_bytes <= 0) {
2791         skip_bytes = 0;
2792         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2793             goto fail;
2794     }
2795 
2796     /* Note our initial start point. */
2797     cookie.start_pos += skip_bytes;
2798     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2799     if (chars_to_skip == 0)
2800         goto finally;
2801 
2802     /* We should be close to the desired position.  Now feed the decoder one
2803      * byte at a time until we reach the `chars_to_skip` target.
2804      * As we go, note the nearest "safe start point" before the current
2805      * location (a point where the decoder has nothing buffered, so seek()
2806      * can safely start from there and advance to this location).
2807      */
2808     chars_decoded = 0;
2809     input = PyBytes_AS_STRING(next_input);
2810     input_end = input + PyBytes_GET_SIZE(next_input);
2811     input += skip_bytes;
2812     while (input < input_end) {
2813         Py_ssize_t n;
2814 
2815         DECODER_DECODE(input, (Py_ssize_t)1, n);
2816         /* We got n chars for 1 byte */
2817         chars_decoded += n;
2818         cookie.bytes_to_feed += 1;
2819         DECODER_GETSTATE();
2820 
2821         if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2822             /* Decoder buffer is empty, so this is a safe start point. */
2823             cookie.start_pos += cookie.bytes_to_feed;
2824             chars_to_skip -= chars_decoded;
2825             cookie.dec_flags = dec_flags;
2826             cookie.bytes_to_feed = 0;
2827             chars_decoded = 0;
2828         }
2829         if (chars_decoded >= chars_to_skip)
2830             break;
2831         input++;
2832     }
2833     if (input == input_end) {
2834         /* We didn't get enough decoded data; signal EOF to get more. */
2835         PyObject *decoded = _PyObject_CallMethodId(
2836             self->decoder, &PyId_decode, "yO", "", /* final = */ Py_True);
2837         if (check_decoded(decoded) < 0)
2838             goto fail;
2839         chars_decoded += PyUnicode_GET_LENGTH(decoded);
2840         Py_DECREF(decoded);
2841         cookie.need_eof = 1;
2842 
2843         if (chars_decoded < chars_to_skip) {
2844             PyErr_SetString(PyExc_OSError,
2845                             "can't reconstruct logical file position");
2846             goto fail;
2847         }
2848     }
2849 
2850 finally:
2851     res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
2852     Py_DECREF(saved_state);
2853     if (res == NULL)
2854         return NULL;
2855     Py_DECREF(res);
2856 
2857     /* The returned cookie corresponds to the last safe start point. */
2858     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2859     return textiowrapper_build_cookie(&cookie);
2860 
2861 fail:
2862     if (saved_state) {
2863         PyObject *type, *value, *traceback;
2864         PyErr_Fetch(&type, &value, &traceback);
2865         res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
2866         _PyErr_ChainExceptions(type, value, traceback);
2867         Py_DECREF(saved_state);
2868         Py_XDECREF(res);
2869     }
2870     return NULL;
2871 }
2872 
2873 /*[clinic input]
2874 _io.TextIOWrapper.truncate
2875     pos: object = None
2876     /
2877 [clinic start generated code]*/
2878 
2879 static PyObject *
_io_TextIOWrapper_truncate_impl(textio * self,PyObject * pos)2880 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2881 /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2882 {
2883     PyObject *res;
2884 
2885     CHECK_ATTACHED(self)
2886 
2887     res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
2888     if (res == NULL)
2889         return NULL;
2890     Py_DECREF(res);
2891 
2892     return PyObject_CallMethodOneArg(self->buffer, _PyIO_str_truncate, pos);
2893 }
2894 
2895 static PyObject *
textiowrapper_repr(textio * self)2896 textiowrapper_repr(textio *self)
2897 {
2898     PyObject *nameobj, *modeobj, *res, *s;
2899     int status;
2900 
2901     CHECK_INITIALIZED(self);
2902 
2903     res = PyUnicode_FromString("<_io.TextIOWrapper");
2904     if (res == NULL)
2905         return NULL;
2906 
2907     status = Py_ReprEnter((PyObject *)self);
2908     if (status != 0) {
2909         if (status > 0) {
2910             PyErr_Format(PyExc_RuntimeError,
2911                          "reentrant call inside %s.__repr__",
2912                          Py_TYPE(self)->tp_name);
2913         }
2914         goto error;
2915     }
2916     if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
2917         if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
2918             goto error;
2919         }
2920         /* Ignore ValueError raised if the underlying stream was detached */
2921         PyErr_Clear();
2922     }
2923     if (nameobj != NULL) {
2924         s = PyUnicode_FromFormat(" name=%R", nameobj);
2925         Py_DECREF(nameobj);
2926         if (s == NULL)
2927             goto error;
2928         PyUnicode_AppendAndDel(&res, s);
2929         if (res == NULL)
2930             goto error;
2931     }
2932     if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
2933         goto error;
2934     }
2935     if (modeobj != NULL) {
2936         s = PyUnicode_FromFormat(" mode=%R", modeobj);
2937         Py_DECREF(modeobj);
2938         if (s == NULL)
2939             goto error;
2940         PyUnicode_AppendAndDel(&res, s);
2941         if (res == NULL)
2942             goto error;
2943     }
2944     s = PyUnicode_FromFormat("%U encoding=%R>",
2945                              res, self->encoding);
2946     Py_DECREF(res);
2947     if (status == 0) {
2948         Py_ReprLeave((PyObject *)self);
2949     }
2950     return s;
2951 
2952   error:
2953     Py_XDECREF(res);
2954     if (status == 0) {
2955         Py_ReprLeave((PyObject *)self);
2956     }
2957     return NULL;
2958 }
2959 
2960 
2961 /* Inquiries */
2962 
2963 /*[clinic input]
2964 _io.TextIOWrapper.fileno
2965 [clinic start generated code]*/
2966 
2967 static PyObject *
_io_TextIOWrapper_fileno_impl(textio * self)2968 _io_TextIOWrapper_fileno_impl(textio *self)
2969 /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2970 {
2971     CHECK_ATTACHED(self);
2972     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_fileno);
2973 }
2974 
2975 /*[clinic input]
2976 _io.TextIOWrapper.seekable
2977 [clinic start generated code]*/
2978 
2979 static PyObject *
_io_TextIOWrapper_seekable_impl(textio * self)2980 _io_TextIOWrapper_seekable_impl(textio *self)
2981 /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2982 {
2983     CHECK_ATTACHED(self);
2984     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_seekable);
2985 }
2986 
2987 /*[clinic input]
2988 _io.TextIOWrapper.readable
2989 [clinic start generated code]*/
2990 
2991 static PyObject *
_io_TextIOWrapper_readable_impl(textio * self)2992 _io_TextIOWrapper_readable_impl(textio *self)
2993 /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2994 {
2995     CHECK_ATTACHED(self);
2996     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
2997 }
2998 
2999 /*[clinic input]
3000 _io.TextIOWrapper.writable
3001 [clinic start generated code]*/
3002 
3003 static PyObject *
_io_TextIOWrapper_writable_impl(textio * self)3004 _io_TextIOWrapper_writable_impl(textio *self)
3005 /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
3006 {
3007     CHECK_ATTACHED(self);
3008     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
3009 }
3010 
3011 /*[clinic input]
3012 _io.TextIOWrapper.isatty
3013 [clinic start generated code]*/
3014 
3015 static PyObject *
_io_TextIOWrapper_isatty_impl(textio * self)3016 _io_TextIOWrapper_isatty_impl(textio *self)
3017 /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
3018 {
3019     CHECK_ATTACHED(self);
3020     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_isatty);
3021 }
3022 
3023 /*[clinic input]
3024 _io.TextIOWrapper.flush
3025 [clinic start generated code]*/
3026 
3027 static PyObject *
_io_TextIOWrapper_flush_impl(textio * self)3028 _io_TextIOWrapper_flush_impl(textio *self)
3029 /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
3030 {
3031     CHECK_ATTACHED(self);
3032     CHECK_CLOSED(self);
3033     self->telling = self->seekable;
3034     if (_textiowrapper_writeflush(self) < 0)
3035         return NULL;
3036     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_flush);
3037 }
3038 
3039 /*[clinic input]
3040 _io.TextIOWrapper.close
3041 [clinic start generated code]*/
3042 
3043 static PyObject *
_io_TextIOWrapper_close_impl(textio * self)3044 _io_TextIOWrapper_close_impl(textio *self)
3045 /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
3046 {
3047     PyObject *res;
3048     int r;
3049     CHECK_ATTACHED(self);
3050 
3051     res = textiowrapper_closed_get(self, NULL);
3052     if (res == NULL)
3053         return NULL;
3054     r = PyObject_IsTrue(res);
3055     Py_DECREF(res);
3056     if (r < 0)
3057         return NULL;
3058 
3059     if (r > 0) {
3060         Py_RETURN_NONE; /* stream already closed */
3061     }
3062     else {
3063         PyObject *exc = NULL, *val, *tb;
3064         if (self->finalizing) {
3065             res = _PyObject_CallMethodIdOneArg(self->buffer,
3066                                               &PyId__dealloc_warn,
3067                                               (PyObject *)self);
3068             if (res)
3069                 Py_DECREF(res);
3070             else
3071                 PyErr_Clear();
3072         }
3073         res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
3074         if (res == NULL)
3075             PyErr_Fetch(&exc, &val, &tb);
3076         else
3077             Py_DECREF(res);
3078 
3079         res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_close);
3080         if (exc != NULL) {
3081             _PyErr_ChainExceptions(exc, val, tb);
3082             Py_CLEAR(res);
3083         }
3084         return res;
3085     }
3086 }
3087 
3088 static PyObject *
textiowrapper_iternext(textio * self)3089 textiowrapper_iternext(textio *self)
3090 {
3091     PyObject *line;
3092 
3093     CHECK_ATTACHED(self);
3094 
3095     self->telling = 0;
3096     if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) {
3097         /* Skip method call overhead for speed */
3098         line = _textiowrapper_readline(self, -1);
3099     }
3100     else {
3101         line = PyObject_CallMethodNoArgs((PyObject *)self,
3102                                           _PyIO_str_readline);
3103         if (line && !PyUnicode_Check(line)) {
3104             PyErr_Format(PyExc_OSError,
3105                          "readline() should have returned a str object, "
3106                          "not '%.200s'", Py_TYPE(line)->tp_name);
3107             Py_DECREF(line);
3108             return NULL;
3109         }
3110     }
3111 
3112     if (line == NULL || PyUnicode_READY(line) == -1)
3113         return NULL;
3114 
3115     if (PyUnicode_GET_LENGTH(line) == 0) {
3116         /* Reached EOF or would have blocked */
3117         Py_DECREF(line);
3118         Py_CLEAR(self->snapshot);
3119         self->telling = self->seekable;
3120         return NULL;
3121     }
3122 
3123     return line;
3124 }
3125 
3126 static PyObject *
textiowrapper_name_get(textio * self,void * context)3127 textiowrapper_name_get(textio *self, void *context)
3128 {
3129     CHECK_ATTACHED(self);
3130     return _PyObject_GetAttrId(self->buffer, &PyId_name);
3131 }
3132 
3133 static PyObject *
textiowrapper_closed_get(textio * self,void * context)3134 textiowrapper_closed_get(textio *self, void *context)
3135 {
3136     CHECK_ATTACHED(self);
3137     return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3138 }
3139 
3140 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)3141 textiowrapper_newlines_get(textio *self, void *context)
3142 {
3143     PyObject *res;
3144     CHECK_ATTACHED(self);
3145     if (self->decoder == NULL ||
3146         _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3147     {
3148         Py_RETURN_NONE;
3149     }
3150     return res;
3151 }
3152 
3153 static PyObject *
textiowrapper_errors_get(textio * self,void * context)3154 textiowrapper_errors_get(textio *self, void *context)
3155 {
3156     CHECK_INITIALIZED(self);
3157     Py_INCREF(self->errors);
3158     return self->errors;
3159 }
3160 
3161 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)3162 textiowrapper_chunk_size_get(textio *self, void *context)
3163 {
3164     CHECK_ATTACHED(self);
3165     return PyLong_FromSsize_t(self->chunk_size);
3166 }
3167 
3168 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)3169 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
3170 {
3171     Py_ssize_t n;
3172     CHECK_ATTACHED_INT(self);
3173     if (arg == NULL) {
3174         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3175         return -1;
3176     }
3177     n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
3178     if (n == -1 && PyErr_Occurred())
3179         return -1;
3180     if (n <= 0) {
3181         PyErr_SetString(PyExc_ValueError,
3182                         "a strictly positive integer is required");
3183         return -1;
3184     }
3185     self->chunk_size = n;
3186     return 0;
3187 }
3188 
3189 #include "clinic/textio.c.h"
3190 
3191 static PyMethodDef incrementalnewlinedecoder_methods[] = {
3192     _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3193     _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3194     _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3195     _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3196     {NULL}
3197 };
3198 
3199 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3200     {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3201     {NULL}
3202 };
3203 
3204 PyTypeObject PyIncrementalNewlineDecoder_Type = {
3205     PyVarObject_HEAD_INIT(NULL, 0)
3206     "_io.IncrementalNewlineDecoder", /*tp_name*/
3207     sizeof(nldecoder_object), /*tp_basicsize*/
3208     0,                          /*tp_itemsize*/
3209     (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3210     0,                          /*tp_vectorcall_offset*/
3211     0,                          /*tp_getattr*/
3212     0,                          /*tp_setattr*/
3213     0,                          /*tp_as_async*/
3214     0,                          /*tp_repr*/
3215     0,                          /*tp_as_number*/
3216     0,                          /*tp_as_sequence*/
3217     0,                          /*tp_as_mapping*/
3218     0,                          /*tp_hash */
3219     0,                          /*tp_call*/
3220     0,                          /*tp_str*/
3221     0,                          /*tp_getattro*/
3222     0,                          /*tp_setattro*/
3223     0,                          /*tp_as_buffer*/
3224     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
3225     _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3226     0,                          /* tp_traverse */
3227     0,                          /* tp_clear */
3228     0,                          /* tp_richcompare */
3229     0,                          /*tp_weaklistoffset*/
3230     0,                          /* tp_iter */
3231     0,                          /* tp_iternext */
3232     incrementalnewlinedecoder_methods, /* tp_methods */
3233     0,                          /* tp_members */
3234     incrementalnewlinedecoder_getset, /* tp_getset */
3235     0,                          /* tp_base */
3236     0,                          /* tp_dict */
3237     0,                          /* tp_descr_get */
3238     0,                          /* tp_descr_set */
3239     0,                          /* tp_dictoffset */
3240     _io_IncrementalNewlineDecoder___init__, /* tp_init */
3241     0,                          /* tp_alloc */
3242     PyType_GenericNew,          /* tp_new */
3243 };
3244 
3245 
3246 static PyMethodDef textiowrapper_methods[] = {
3247     _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3248     _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3249     _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3250     _IO_TEXTIOWRAPPER_READ_METHODDEF
3251     _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3252     _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3253     _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3254 
3255     _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3256     _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3257     _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3258     _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3259     _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3260 
3261     _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3262     _IO_TEXTIOWRAPPER_TELL_METHODDEF
3263     _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3264     {NULL, NULL}
3265 };
3266 
3267 static PyMemberDef textiowrapper_members[] = {
3268     {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3269     {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3270     {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
3271     {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
3272     {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
3273     {NULL}
3274 };
3275 
3276 static PyGetSetDef textiowrapper_getset[] = {
3277     {"name", (getter)textiowrapper_name_get, NULL, NULL},
3278     {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
3279 /*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3280 */
3281     {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3282     {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3283     {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3284                     (setter)textiowrapper_chunk_size_set, NULL},
3285     {NULL}
3286 };
3287 
3288 PyTypeObject PyTextIOWrapper_Type = {
3289     PyVarObject_HEAD_INIT(NULL, 0)
3290     "_io.TextIOWrapper",        /*tp_name*/
3291     sizeof(textio), /*tp_basicsize*/
3292     0,                          /*tp_itemsize*/
3293     (destructor)textiowrapper_dealloc, /*tp_dealloc*/
3294     0,                          /*tp_vectorcall_offset*/
3295     0,                          /*tp_getattr*/
3296     0,                          /*tps_etattr*/
3297     0,                          /*tp_as_async*/
3298     (reprfunc)textiowrapper_repr,/*tp_repr*/
3299     0,                          /*tp_as_number*/
3300     0,                          /*tp_as_sequence*/
3301     0,                          /*tp_as_mapping*/
3302     0,                          /*tp_hash */
3303     0,                          /*tp_call*/
3304     0,                          /*tp_str*/
3305     0,                          /*tp_getattro*/
3306     0,                          /*tp_setattro*/
3307     0,                          /*tp_as_buffer*/
3308     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
3309         | Py_TPFLAGS_HAVE_GC,   /*tp_flags*/
3310     _io_TextIOWrapper___init____doc__, /* tp_doc */
3311     (traverseproc)textiowrapper_traverse, /* tp_traverse */
3312     (inquiry)textiowrapper_clear, /* tp_clear */
3313     0,                          /* tp_richcompare */
3314     offsetof(textio, weakreflist), /*tp_weaklistoffset*/
3315     0,                          /* tp_iter */
3316     (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3317     textiowrapper_methods,      /* tp_methods */
3318     textiowrapper_members,      /* tp_members */
3319     textiowrapper_getset,       /* tp_getset */
3320     0,                          /* tp_base */
3321     0,                          /* tp_dict */
3322     0,                          /* tp_descr_get */
3323     0,                          /* tp_descr_set */
3324     offsetof(textio, dict), /*tp_dictoffset*/
3325     _io_TextIOWrapper___init__, /* tp_init */
3326     0,                          /* tp_alloc */
3327     PyType_GenericNew,          /* tp_new */
3328     0,                          /* tp_free */
3329     0,                          /* tp_is_gc */
3330     0,                          /* tp_bases */
3331     0,                          /* tp_mro */
3332     0,                          /* tp_cache */
3333     0,                          /* tp_subclasses */
3334     0,                          /* tp_weaklist */
3335     0,                          /* tp_del */
3336     0,                          /* tp_version_tag */
3337     0,                          /* tp_finalize */
3338 };
3339