• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2     An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3 
4     Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5 
6     Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8 
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "pycore_object.h"
12 #include "structmember.h"
13 #include "_iomodule.h"
14 
15 /*[clinic input]
16 module _io
17 class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
18 class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
21 
22 _Py_IDENTIFIER(close);
23 _Py_IDENTIFIER(_dealloc_warn);
24 _Py_IDENTIFIER(decode);
25 _Py_IDENTIFIER(fileno);
26 _Py_IDENTIFIER(flush);
27 _Py_IDENTIFIER(getpreferredencoding);
28 _Py_IDENTIFIER(isatty);
29 _Py_IDENTIFIER(mode);
30 _Py_IDENTIFIER(name);
31 _Py_IDENTIFIER(raw);
32 _Py_IDENTIFIER(read);
33 _Py_IDENTIFIER(readable);
34 _Py_IDENTIFIER(replace);
35 _Py_IDENTIFIER(reset);
36 _Py_IDENTIFIER(seek);
37 _Py_IDENTIFIER(seekable);
38 _Py_IDENTIFIER(setstate);
39 _Py_IDENTIFIER(strict);
40 _Py_IDENTIFIER(tell);
41 _Py_IDENTIFIER(writable);
42 
43 /* TextIOBase */
44 
45 PyDoc_STRVAR(textiobase_doc,
46     "Base class for text I/O.\n"
47     "\n"
48     "This class provides a character and line based interface to stream\n"
49     "I/O. There is no readinto method because Python's character strings\n"
50     "are immutable. There is no public constructor.\n"
51     );
52 
53 static PyObject *
_unsupported(const char * message)54 _unsupported(const char *message)
55 {
56     _PyIO_State *state = IO_STATE();
57     if (state != NULL)
58         PyErr_SetString(state->unsupported_operation, message);
59     return NULL;
60 }
61 
62 PyDoc_STRVAR(textiobase_detach_doc,
63     "Separate the underlying buffer from the TextIOBase and return it.\n"
64     "\n"
65     "After the underlying buffer has been detached, the TextIO is in an\n"
66     "unusable state.\n"
67     );
68 
69 static PyObject *
textiobase_detach(PyObject * self,PyObject * Py_UNUSED (ignored))70 textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
71 {
72     return _unsupported("detach");
73 }
74 
75 PyDoc_STRVAR(textiobase_read_doc,
76     "Read at most n characters from stream.\n"
77     "\n"
78     "Read from underlying buffer until we have n characters or we hit EOF.\n"
79     "If n is negative or omitted, read until EOF.\n"
80     );
81 
82 static PyObject *
textiobase_read(PyObject * self,PyObject * args)83 textiobase_read(PyObject *self, PyObject *args)
84 {
85     return _unsupported("read");
86 }
87 
88 PyDoc_STRVAR(textiobase_readline_doc,
89     "Read until newline or EOF.\n"
90     "\n"
91     "Returns an empty string if EOF is hit immediately.\n"
92     );
93 
94 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)95 textiobase_readline(PyObject *self, PyObject *args)
96 {
97     return _unsupported("readline");
98 }
99 
100 PyDoc_STRVAR(textiobase_write_doc,
101     "Write string to stream.\n"
102     "Returns the number of characters written (which is always equal to\n"
103     "the length of the string).\n"
104     );
105 
106 static PyObject *
textiobase_write(PyObject * self,PyObject * args)107 textiobase_write(PyObject *self, PyObject *args)
108 {
109     return _unsupported("write");
110 }
111 
112 PyDoc_STRVAR(textiobase_encoding_doc,
113     "Encoding of the text stream.\n"
114     "\n"
115     "Subclasses should override.\n"
116     );
117 
118 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)119 textiobase_encoding_get(PyObject *self, void *context)
120 {
121     Py_RETURN_NONE;
122 }
123 
124 PyDoc_STRVAR(textiobase_newlines_doc,
125     "Line endings translated so far.\n"
126     "\n"
127     "Only line endings translated during reading are considered.\n"
128     "\n"
129     "Subclasses should override.\n"
130     );
131 
132 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)133 textiobase_newlines_get(PyObject *self, void *context)
134 {
135     Py_RETURN_NONE;
136 }
137 
138 PyDoc_STRVAR(textiobase_errors_doc,
139     "The error setting of the decoder or encoder.\n"
140     "\n"
141     "Subclasses should override.\n"
142     );
143 
144 static PyObject *
textiobase_errors_get(PyObject * self,void * context)145 textiobase_errors_get(PyObject *self, void *context)
146 {
147     Py_RETURN_NONE;
148 }
149 
150 
151 static PyMethodDef textiobase_methods[] = {
152     {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
153     {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
154     {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
155     {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
156     {NULL, NULL}
157 };
158 
159 static PyGetSetDef textiobase_getset[] = {
160     {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
161     {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
162     {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
163     {NULL}
164 };
165 
166 PyTypeObject PyTextIOBase_Type = {
167     PyVarObject_HEAD_INIT(NULL, 0)
168     "_io._TextIOBase",          /*tp_name*/
169     0,                          /*tp_basicsize*/
170     0,                          /*tp_itemsize*/
171     0,                          /*tp_dealloc*/
172     0,                          /*tp_vectorcall_offset*/
173     0,                          /*tp_getattr*/
174     0,                          /*tp_setattr*/
175     0,                          /*tp_as_async*/
176     0,                          /*tp_repr*/
177     0,                          /*tp_as_number*/
178     0,                          /*tp_as_sequence*/
179     0,                          /*tp_as_mapping*/
180     0,                          /*tp_hash */
181     0,                          /*tp_call*/
182     0,                          /*tp_str*/
183     0,                          /*tp_getattro*/
184     0,                          /*tp_setattro*/
185     0,                          /*tp_as_buffer*/
186     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
187     textiobase_doc,             /* tp_doc */
188     0,                          /* tp_traverse */
189     0,                          /* tp_clear */
190     0,                          /* tp_richcompare */
191     0,                          /* tp_weaklistoffset */
192     0,                          /* tp_iter */
193     0,                          /* tp_iternext */
194     textiobase_methods,         /* tp_methods */
195     0,                          /* tp_members */
196     textiobase_getset,          /* tp_getset */
197     &PyIOBase_Type,             /* tp_base */
198     0,                          /* tp_dict */
199     0,                          /* tp_descr_get */
200     0,                          /* tp_descr_set */
201     0,                          /* tp_dictoffset */
202     0,                          /* tp_init */
203     0,                          /* tp_alloc */
204     0,                          /* tp_new */
205     0,                          /* tp_free */
206     0,                          /* tp_is_gc */
207     0,                          /* tp_bases */
208     0,                          /* tp_mro */
209     0,                          /* tp_cache */
210     0,                          /* tp_subclasses */
211     0,                          /* tp_weaklist */
212     0,                          /* tp_del */
213     0,                          /* tp_version_tag */
214     0,                          /* tp_finalize */
215 };
216 
217 
218 /* IncrementalNewlineDecoder */
219 
220 typedef struct {
221     PyObject_HEAD
222     PyObject *decoder;
223     PyObject *errors;
224     unsigned int pendingcr: 1;
225     unsigned int translate: 1;
226     unsigned int seennl: 3;
227 } nldecoder_object;
228 
229 /*[clinic input]
230 _io.IncrementalNewlineDecoder.__init__
231     decoder: object
232     translate: int
233     errors: object(c_default="NULL") = "strict"
234 
235 Codec used when reading a file in universal newlines mode.
236 
237 It wraps another incremental decoder, translating \r\n and \r into \n.
238 It also records the types of newlines encountered.  When used with
239 translate=False, it ensures that the newline sequence is returned in
240 one piece. When used with decoder=None, it expects unicode strings as
241 decode input and translates newlines without first invoking an external
242 decoder.
243 [clinic start generated code]*/
244 
245 static int
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object * self,PyObject * decoder,int translate,PyObject * errors)246 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
247                                             PyObject *decoder, int translate,
248                                             PyObject *errors)
249 /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
250 {
251     self->decoder = decoder;
252     Py_INCREF(decoder);
253 
254     if (errors == NULL) {
255         self->errors = _PyUnicode_FromId(&PyId_strict);
256         if (self->errors == NULL)
257             return -1;
258     }
259     else {
260         self->errors = errors;
261     }
262     Py_INCREF(self->errors);
263 
264     self->translate = translate ? 1 : 0;
265     self->seennl = 0;
266     self->pendingcr = 0;
267 
268     return 0;
269 }
270 
271 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)272 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
273 {
274     Py_CLEAR(self->decoder);
275     Py_CLEAR(self->errors);
276     Py_TYPE(self)->tp_free((PyObject *)self);
277 }
278 
279 static int
check_decoded(PyObject * decoded)280 check_decoded(PyObject *decoded)
281 {
282     if (decoded == NULL)
283         return -1;
284     if (!PyUnicode_Check(decoded)) {
285         PyErr_Format(PyExc_TypeError,
286                      "decoder should return a string result, not '%.200s'",
287                      Py_TYPE(decoded)->tp_name);
288         Py_DECREF(decoded);
289         return -1;
290     }
291     if (PyUnicode_READY(decoded) < 0) {
292         Py_DECREF(decoded);
293         return -1;
294     }
295     return 0;
296 }
297 
298 #define SEEN_CR   1
299 #define SEEN_LF   2
300 #define SEEN_CRLF 4
301 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
302 
303 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * myself,PyObject * input,int final)304 _PyIncrementalNewlineDecoder_decode(PyObject *myself,
305                                     PyObject *input, int final)
306 {
307     PyObject *output;
308     Py_ssize_t output_len;
309     nldecoder_object *self = (nldecoder_object *) myself;
310 
311     if (self->decoder == NULL) {
312         PyErr_SetString(PyExc_ValueError,
313                         "IncrementalNewlineDecoder.__init__ not called");
314         return NULL;
315     }
316 
317     /* decode input (with the eventual \r from a previous pass) */
318     if (self->decoder != Py_None) {
319         output = PyObject_CallMethodObjArgs(self->decoder,
320             _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
321     }
322     else {
323         output = input;
324         Py_INCREF(output);
325     }
326 
327     if (check_decoded(output) < 0)
328         return NULL;
329 
330     output_len = PyUnicode_GET_LENGTH(output);
331     if (self->pendingcr && (final || output_len > 0)) {
332         /* Prefix output with CR */
333         int kind;
334         PyObject *modified;
335         char *out;
336 
337         modified = PyUnicode_New(output_len + 1,
338                                  PyUnicode_MAX_CHAR_VALUE(output));
339         if (modified == NULL)
340             goto error;
341         kind = PyUnicode_KIND(modified);
342         out = PyUnicode_DATA(modified);
343         PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
344         memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
345         Py_DECREF(output);
346         output = modified; /* output remains ready */
347         self->pendingcr = 0;
348         output_len++;
349     }
350 
351     /* retain last \r even when not translating data:
352      * then readline() is sure to get \r\n in one pass
353      */
354     if (!final) {
355         if (output_len > 0
356             && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
357         {
358             PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
359             if (modified == NULL)
360                 goto error;
361             Py_DECREF(output);
362             output = modified;
363             self->pendingcr = 1;
364         }
365     }
366 
367     /* Record which newlines are read and do newline translation if desired,
368        all in one pass. */
369     {
370         void *in_str;
371         Py_ssize_t len;
372         int seennl = self->seennl;
373         int only_lf = 0;
374         int kind;
375 
376         in_str = PyUnicode_DATA(output);
377         len = PyUnicode_GET_LENGTH(output);
378         kind = PyUnicode_KIND(output);
379 
380         if (len == 0)
381             return output;
382 
383         /* If, up to now, newlines are consistently \n, do a quick check
384            for the \r *byte* with the libc's optimized memchr.
385            */
386         if (seennl == SEEN_LF || seennl == 0) {
387             only_lf = (memchr(in_str, '\r', kind * len) == NULL);
388         }
389 
390         if (only_lf) {
391             /* If not already seen, quick scan for a possible "\n" character.
392                (there's nothing else to be done, even when in translation mode)
393             */
394             if (seennl == 0 &&
395                 memchr(in_str, '\n', kind * len) != NULL) {
396                 if (kind == PyUnicode_1BYTE_KIND)
397                     seennl |= SEEN_LF;
398                 else {
399                     Py_ssize_t i = 0;
400                     for (;;) {
401                         Py_UCS4 c;
402                         /* Fast loop for non-control characters */
403                         while (PyUnicode_READ(kind, in_str, i) > '\n')
404                             i++;
405                         c = PyUnicode_READ(kind, in_str, i++);
406                         if (c == '\n') {
407                             seennl |= SEEN_LF;
408                             break;
409                         }
410                         if (i >= len)
411                             break;
412                     }
413                 }
414             }
415             /* Finished: we have scanned for newlines, and none of them
416                need translating */
417         }
418         else if (!self->translate) {
419             Py_ssize_t i = 0;
420             /* We have already seen all newline types, no need to scan again */
421             if (seennl == SEEN_ALL)
422                 goto endscan;
423             for (;;) {
424                 Py_UCS4 c;
425                 /* Fast loop for non-control characters */
426                 while (PyUnicode_READ(kind, in_str, i) > '\r')
427                     i++;
428                 c = PyUnicode_READ(kind, in_str, i++);
429                 if (c == '\n')
430                     seennl |= SEEN_LF;
431                 else if (c == '\r') {
432                     if (PyUnicode_READ(kind, in_str, i) == '\n') {
433                         seennl |= SEEN_CRLF;
434                         i++;
435                     }
436                     else
437                         seennl |= SEEN_CR;
438                 }
439                 if (i >= len)
440                     break;
441                 if (seennl == SEEN_ALL)
442                     break;
443             }
444         endscan:
445             ;
446         }
447         else {
448             void *translated;
449             int kind = PyUnicode_KIND(output);
450             void *in_str = PyUnicode_DATA(output);
451             Py_ssize_t in, out;
452             /* XXX: Previous in-place translation here is disabled as
453                resizing is not possible anymore */
454             /* We could try to optimize this so that we only do a copy
455                when there is something to translate. On the other hand,
456                we already know there is a \r byte, so chances are high
457                that something needs to be done. */
458             translated = PyMem_Malloc(kind * len);
459             if (translated == NULL) {
460                 PyErr_NoMemory();
461                 goto error;
462             }
463             in = out = 0;
464             for (;;) {
465                 Py_UCS4 c;
466                 /* Fast loop for non-control characters */
467                 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
468                     PyUnicode_WRITE(kind, translated, out++, c);
469                 if (c == '\n') {
470                     PyUnicode_WRITE(kind, translated, out++, c);
471                     seennl |= SEEN_LF;
472                     continue;
473                 }
474                 if (c == '\r') {
475                     if (PyUnicode_READ(kind, in_str, in) == '\n') {
476                         in++;
477                         seennl |= SEEN_CRLF;
478                     }
479                     else
480                         seennl |= SEEN_CR;
481                     PyUnicode_WRITE(kind, translated, out++, '\n');
482                     continue;
483                 }
484                 if (in > len)
485                     break;
486                 PyUnicode_WRITE(kind, translated, out++, c);
487             }
488             Py_DECREF(output);
489             output = PyUnicode_FromKindAndData(kind, translated, out);
490             PyMem_Free(translated);
491             if (!output)
492                 return NULL;
493         }
494         self->seennl |= seennl;
495     }
496 
497     return output;
498 
499   error:
500     Py_DECREF(output);
501     return NULL;
502 }
503 
504 /*[clinic input]
505 _io.IncrementalNewlineDecoder.decode
506     input: object
507     final: bool(accept={int}) = False
508 [clinic start generated code]*/
509 
510 static PyObject *
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object * self,PyObject * input,int final)511 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
512                                           PyObject *input, int final)
513 /*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
514 {
515     return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
516 }
517 
518 /*[clinic input]
519 _io.IncrementalNewlineDecoder.getstate
520 [clinic start generated code]*/
521 
522 static PyObject *
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object * self)523 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
524 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
525 {
526     PyObject *buffer;
527     unsigned long long flag;
528 
529     if (self->decoder != Py_None) {
530         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
531            _PyIO_str_getstate, NULL);
532         if (state == NULL)
533             return NULL;
534         if (!PyTuple_Check(state)) {
535             PyErr_SetString(PyExc_TypeError,
536                             "illegal decoder state");
537             Py_DECREF(state);
538             return NULL;
539         }
540         if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
541                               &buffer, &flag))
542         {
543             Py_DECREF(state);
544             return NULL;
545         }
546         Py_INCREF(buffer);
547         Py_DECREF(state);
548     }
549     else {
550         buffer = PyBytes_FromString("");
551         flag = 0;
552     }
553     flag <<= 1;
554     if (self->pendingcr)
555         flag |= 1;
556     return Py_BuildValue("NK", buffer, flag);
557 }
558 
559 /*[clinic input]
560 _io.IncrementalNewlineDecoder.setstate
561     state: object
562     /
563 [clinic start generated code]*/
564 
565 static PyObject *
_io_IncrementalNewlineDecoder_setstate(nldecoder_object * self,PyObject * state)566 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
567                                        PyObject *state)
568 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
569 {
570     PyObject *buffer;
571     unsigned long long flag;
572 
573     if (!PyTuple_Check(state)) {
574         PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
575         return NULL;
576     }
577     if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
578                           &buffer, &flag))
579     {
580         return NULL;
581     }
582 
583     self->pendingcr = (int) (flag & 1);
584     flag >>= 1;
585 
586     if (self->decoder != Py_None)
587         return _PyObject_CallMethodId(self->decoder,
588                                       &PyId_setstate, "((OK))", buffer, flag);
589     else
590         Py_RETURN_NONE;
591 }
592 
593 /*[clinic input]
594 _io.IncrementalNewlineDecoder.reset
595 [clinic start generated code]*/
596 
597 static PyObject *
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object * self)598 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
599 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
600 {
601     self->seennl = 0;
602     self->pendingcr = 0;
603     if (self->decoder != Py_None)
604         return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
605     else
606         Py_RETURN_NONE;
607 }
608 
609 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)610 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
611 {
612     switch (self->seennl) {
613     case SEEN_CR:
614         return PyUnicode_FromString("\r");
615     case SEEN_LF:
616         return PyUnicode_FromString("\n");
617     case SEEN_CRLF:
618         return PyUnicode_FromString("\r\n");
619     case SEEN_CR | SEEN_LF:
620         return Py_BuildValue("ss", "\r", "\n");
621     case SEEN_CR | SEEN_CRLF:
622         return Py_BuildValue("ss", "\r", "\r\n");
623     case SEEN_LF | SEEN_CRLF:
624         return Py_BuildValue("ss", "\n", "\r\n");
625     case SEEN_CR | SEEN_LF | SEEN_CRLF:
626         return Py_BuildValue("sss", "\r", "\n", "\r\n");
627     default:
628         Py_RETURN_NONE;
629    }
630 
631 }
632 
633 /* TextIOWrapper */
634 
635 typedef PyObject *
636         (*encodefunc_t)(PyObject *, PyObject *);
637 
638 typedef struct
639 {
640     PyObject_HEAD
641     int ok; /* initialized? */
642     int detached;
643     Py_ssize_t chunk_size;
644     PyObject *buffer;
645     PyObject *encoding;
646     PyObject *encoder;
647     PyObject *decoder;
648     PyObject *readnl;
649     PyObject *errors;
650     const char *writenl; /* ASCII-encoded; NULL stands for \n */
651     char line_buffering;
652     char write_through;
653     char readuniversal;
654     char readtranslate;
655     char writetranslate;
656     char seekable;
657     char has_read1;
658     char telling;
659     char finalizing;
660     /* Specialized encoding func (see below) */
661     encodefunc_t encodefunc;
662     /* Whether or not it's the start of the stream */
663     char encoding_start_of_stream;
664 
665     /* Reads and writes are internally buffered in order to speed things up.
666        However, any read will first flush the write buffer if itsn't empty.
667 
668        Please also note that text to be written is first encoded before being
669        buffered. This is necessary so that encoding errors are immediately
670        reported to the caller, but it unfortunately means that the
671        IncrementalEncoder (whose encode() method is always written in Python)
672        becomes a bottleneck for small writes.
673     */
674     PyObject *decoded_chars;       /* buffer for text returned from decoder */
675     Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
676     PyObject *pending_bytes;       // data waiting to be written.
677                                    // ascii unicode, bytes, or list of them.
678     Py_ssize_t pending_bytes_count;
679 
680     /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
681      * dec_flags is the second (integer) item of the decoder state and
682      * next_input is the chunk of input bytes that comes next after the
683      * snapshot point.  We use this to reconstruct decoder states in tell().
684      */
685     PyObject *snapshot;
686     /* Bytes-to-characters ratio for the current chunk. Serves as input for
687        the heuristic in tell(). */
688     double b2cratio;
689 
690     /* Cache raw object if it's a FileIO object */
691     PyObject *raw;
692 
693     PyObject *weakreflist;
694     PyObject *dict;
695 } textio;
696 
697 static void
698 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
699 
700 /* A couple of specialized cases in order to bypass the slow incremental
701    encoding methods for the most popular encodings. */
702 
703 static PyObject *
ascii_encode(textio * self,PyObject * text)704 ascii_encode(textio *self, PyObject *text)
705 {
706     return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
707 }
708 
709 static PyObject *
utf16be_encode(textio * self,PyObject * text)710 utf16be_encode(textio *self, PyObject *text)
711 {
712     return _PyUnicode_EncodeUTF16(text,
713                                   PyUnicode_AsUTF8(self->errors), 1);
714 }
715 
716 static PyObject *
utf16le_encode(textio * self,PyObject * text)717 utf16le_encode(textio *self, PyObject *text)
718 {
719     return _PyUnicode_EncodeUTF16(text,
720                                   PyUnicode_AsUTF8(self->errors), -1);
721 }
722 
723 static PyObject *
utf16_encode(textio * self,PyObject * text)724 utf16_encode(textio *self, PyObject *text)
725 {
726     if (!self->encoding_start_of_stream) {
727         /* Skip the BOM and use native byte ordering */
728 #if PY_BIG_ENDIAN
729         return utf16be_encode(self, text);
730 #else
731         return utf16le_encode(self, text);
732 #endif
733     }
734     return _PyUnicode_EncodeUTF16(text,
735                                   PyUnicode_AsUTF8(self->errors), 0);
736 }
737 
738 static PyObject *
utf32be_encode(textio * self,PyObject * text)739 utf32be_encode(textio *self, PyObject *text)
740 {
741     return _PyUnicode_EncodeUTF32(text,
742                                   PyUnicode_AsUTF8(self->errors), 1);
743 }
744 
745 static PyObject *
utf32le_encode(textio * self,PyObject * text)746 utf32le_encode(textio *self, PyObject *text)
747 {
748     return _PyUnicode_EncodeUTF32(text,
749                                   PyUnicode_AsUTF8(self->errors), -1);
750 }
751 
752 static PyObject *
utf32_encode(textio * self,PyObject * text)753 utf32_encode(textio *self, PyObject *text)
754 {
755     if (!self->encoding_start_of_stream) {
756         /* Skip the BOM and use native byte ordering */
757 #if PY_BIG_ENDIAN
758         return utf32be_encode(self, text);
759 #else
760         return utf32le_encode(self, text);
761 #endif
762     }
763     return _PyUnicode_EncodeUTF32(text,
764                                   PyUnicode_AsUTF8(self->errors), 0);
765 }
766 
767 static PyObject *
utf8_encode(textio * self,PyObject * text)768 utf8_encode(textio *self, PyObject *text)
769 {
770     return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
771 }
772 
773 static PyObject *
latin1_encode(textio * self,PyObject * text)774 latin1_encode(textio *self, PyObject *text)
775 {
776     return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
777 }
778 
779 // Return true when encoding can be skipped when text is ascii.
780 static inline int
is_asciicompat_encoding(encodefunc_t f)781 is_asciicompat_encoding(encodefunc_t f)
782 {
783     return f == (encodefunc_t) ascii_encode
784         || f == (encodefunc_t) latin1_encode
785         || f == (encodefunc_t) utf8_encode;
786 }
787 
788 /* Map normalized encoding names onto the specialized encoding funcs */
789 
790 typedef struct {
791     const char *name;
792     encodefunc_t encodefunc;
793 } encodefuncentry;
794 
795 static const encodefuncentry encodefuncs[] = {
796     {"ascii",       (encodefunc_t) ascii_encode},
797     {"iso8859-1",   (encodefunc_t) latin1_encode},
798     {"utf-8",       (encodefunc_t) utf8_encode},
799     {"utf-16-be",   (encodefunc_t) utf16be_encode},
800     {"utf-16-le",   (encodefunc_t) utf16le_encode},
801     {"utf-16",      (encodefunc_t) utf16_encode},
802     {"utf-32-be",   (encodefunc_t) utf32be_encode},
803     {"utf-32-le",   (encodefunc_t) utf32le_encode},
804     {"utf-32",      (encodefunc_t) utf32_encode},
805     {NULL, NULL}
806 };
807 
808 static int
validate_newline(const char * newline)809 validate_newline(const char *newline)
810 {
811     if (newline && newline[0] != '\0'
812         && !(newline[0] == '\n' && newline[1] == '\0')
813         && !(newline[0] == '\r' && newline[1] == '\0')
814         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
815         PyErr_Format(PyExc_ValueError,
816                      "illegal newline value: %s", newline);
817         return -1;
818     }
819     return 0;
820 }
821 
822 static int
set_newline(textio * self,const char * newline)823 set_newline(textio *self, const char *newline)
824 {
825     PyObject *old = self->readnl;
826     if (newline == NULL) {
827         self->readnl = NULL;
828     }
829     else {
830         self->readnl = PyUnicode_FromString(newline);
831         if (self->readnl == NULL) {
832             self->readnl = old;
833             return -1;
834         }
835     }
836     self->readuniversal = (newline == NULL || newline[0] == '\0');
837     self->readtranslate = (newline == NULL);
838     self->writetranslate = (newline == NULL || newline[0] != '\0');
839     if (!self->readuniversal && self->readnl != NULL) {
840         // validate_newline() accepts only ASCII newlines.
841         assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
842         self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
843         if (strcmp(self->writenl, "\n") == 0) {
844             self->writenl = NULL;
845         }
846     }
847     else {
848 #ifdef MS_WINDOWS
849         self->writenl = "\r\n";
850 #else
851         self->writenl = NULL;
852 #endif
853     }
854     Py_XDECREF(old);
855     return 0;
856 }
857 
858 static int
_textiowrapper_set_decoder(textio * self,PyObject * codec_info,const char * errors)859 _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
860                            const char *errors)
861 {
862     PyObject *res;
863     int r;
864 
865     res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
866     if (res == NULL)
867         return -1;
868 
869     r = PyObject_IsTrue(res);
870     Py_DECREF(res);
871     if (r == -1)
872         return -1;
873 
874     if (r != 1)
875         return 0;
876 
877     Py_CLEAR(self->decoder);
878     self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
879     if (self->decoder == NULL)
880         return -1;
881 
882     if (self->readuniversal) {
883         PyObject *incrementalDecoder = PyObject_CallFunction(
884             (PyObject *)&PyIncrementalNewlineDecoder_Type,
885             "Oi", self->decoder, (int)self->readtranslate);
886         if (incrementalDecoder == NULL)
887             return -1;
888         Py_CLEAR(self->decoder);
889         self->decoder = incrementalDecoder;
890     }
891 
892     return 0;
893 }
894 
895 static PyObject*
_textiowrapper_decode(PyObject * decoder,PyObject * bytes,int eof)896 _textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
897 {
898     PyObject *chars;
899 
900     if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type)
901         chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
902     else
903         chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
904                                            eof ? Py_True : Py_False, NULL);
905 
906     if (check_decoded(chars) < 0)
907         // check_decoded already decreases refcount
908         return NULL;
909 
910     return chars;
911 }
912 
913 static int
_textiowrapper_set_encoder(textio * self,PyObject * codec_info,const char * errors)914 _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
915                            const char *errors)
916 {
917     PyObject *res;
918     int r;
919 
920     res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
921     if (res == NULL)
922         return -1;
923 
924     r = PyObject_IsTrue(res);
925     Py_DECREF(res);
926     if (r == -1)
927         return -1;
928 
929     if (r != 1)
930         return 0;
931 
932     Py_CLEAR(self->encoder);
933     self->encodefunc = NULL;
934     self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
935     if (self->encoder == NULL)
936         return -1;
937 
938     /* Get the normalized named of the codec */
939     if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
940         return -1;
941     }
942     if (res != NULL && PyUnicode_Check(res)) {
943         const encodefuncentry *e = encodefuncs;
944         while (e->name != NULL) {
945             if (_PyUnicode_EqualToASCIIString(res, e->name)) {
946                 self->encodefunc = e->encodefunc;
947                 break;
948             }
949             e++;
950         }
951     }
952     Py_XDECREF(res);
953 
954     return 0;
955 }
956 
957 static int
_textiowrapper_fix_encoder_state(textio * self)958 _textiowrapper_fix_encoder_state(textio *self)
959 {
960     if (!self->seekable || !self->encoder) {
961         return 0;
962     }
963 
964     self->encoding_start_of_stream = 1;
965 
966     PyObject *cookieObj = PyObject_CallMethodObjArgs(
967         self->buffer, _PyIO_str_tell, NULL);
968     if (cookieObj == NULL) {
969         return -1;
970     }
971 
972     int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
973     Py_DECREF(cookieObj);
974     if (cmp < 0) {
975         return -1;
976     }
977 
978     if (cmp == 0) {
979         self->encoding_start_of_stream = 0;
980         PyObject *res = PyObject_CallMethodObjArgs(
981             self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL);
982         if (res == NULL) {
983             return -1;
984         }
985         Py_DECREF(res);
986     }
987 
988     return 0;
989 }
990 
991 /*[clinic input]
992 _io.TextIOWrapper.__init__
993     buffer: object
994     encoding: str(accept={str, NoneType}) = None
995     errors: object = None
996     newline: str(accept={str, NoneType}) = None
997     line_buffering: bool(accept={int}) = False
998     write_through: bool(accept={int}) = False
999 
1000 Character and line based layer over a BufferedIOBase object, buffer.
1001 
1002 encoding gives the name of the encoding that the stream will be
1003 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1004 
1005 errors determines the strictness of encoding and decoding (see
1006 help(codecs.Codec) or the documentation for codecs.register) and
1007 defaults to "strict".
1008 
1009 newline controls how line endings are handled. It can be None, '',
1010 '\n', '\r', and '\r\n'.  It works as follows:
1011 
1012 * On input, if newline is None, universal newlines mode is
1013   enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1014   these are translated into '\n' before being returned to the
1015   caller. If it is '', universal newline mode is enabled, but line
1016   endings are returned to the caller untranslated. If it has any of
1017   the other legal values, input lines are only terminated by the given
1018   string, and the line ending is returned to the caller untranslated.
1019 
1020 * On output, if newline is None, any '\n' characters written are
1021   translated to the system default line separator, os.linesep. If
1022   newline is '' or '\n', no translation takes place. If newline is any
1023   of the other legal values, any '\n' characters written are translated
1024   to the given string.
1025 
1026 If line_buffering is True, a call to flush is implied when a call to
1027 write contains a newline character.
1028 [clinic start generated code]*/
1029 
1030 static int
_io_TextIOWrapper___init___impl(textio * self,PyObject * buffer,const char * encoding,PyObject * errors,const char * newline,int line_buffering,int write_through)1031 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1032                                 const char *encoding, PyObject *errors,
1033                                 const char *newline, int line_buffering,
1034                                 int write_through)
1035 /*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/
1036 {
1037     PyObject *raw, *codec_info = NULL;
1038     _PyIO_State *state = NULL;
1039     PyObject *res;
1040     int r;
1041 
1042     self->ok = 0;
1043     self->detached = 0;
1044 
1045     if (errors == Py_None) {
1046         errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
1047         if (errors == NULL) {
1048             return -1;
1049         }
1050     }
1051     else if (!PyUnicode_Check(errors)) {
1052         // Check 'errors' argument here because Argument Clinic doesn't support
1053         // 'str(accept={str, NoneType})' converter.
1054         PyErr_Format(
1055             PyExc_TypeError,
1056             "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1057             errors->ob_type->tp_name);
1058         return -1;
1059     }
1060 
1061     if (validate_newline(newline) < 0) {
1062         return -1;
1063     }
1064 
1065     Py_CLEAR(self->buffer);
1066     Py_CLEAR(self->encoding);
1067     Py_CLEAR(self->encoder);
1068     Py_CLEAR(self->decoder);
1069     Py_CLEAR(self->readnl);
1070     Py_CLEAR(self->decoded_chars);
1071     Py_CLEAR(self->pending_bytes);
1072     Py_CLEAR(self->snapshot);
1073     Py_CLEAR(self->errors);
1074     Py_CLEAR(self->raw);
1075     self->decoded_chars_used = 0;
1076     self->pending_bytes_count = 0;
1077     self->encodefunc = NULL;
1078     self->b2cratio = 0.0;
1079 
1080     if (encoding == NULL) {
1081         /* Try os.device_encoding(fileno) */
1082         PyObject *fileno;
1083         state = IO_STATE();
1084         if (state == NULL)
1085             goto error;
1086         fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
1087         /* Ignore only AttributeError and UnsupportedOperation */
1088         if (fileno == NULL) {
1089             if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1090                 PyErr_ExceptionMatches(state->unsupported_operation)) {
1091                 PyErr_Clear();
1092             }
1093             else {
1094                 goto error;
1095             }
1096         }
1097         else {
1098             int fd = _PyLong_AsInt(fileno);
1099             Py_DECREF(fileno);
1100             if (fd == -1 && PyErr_Occurred()) {
1101                 goto error;
1102             }
1103 
1104             self->encoding = _Py_device_encoding(fd);
1105             if (self->encoding == NULL)
1106                 goto error;
1107             else if (!PyUnicode_Check(self->encoding))
1108                 Py_CLEAR(self->encoding);
1109         }
1110     }
1111     if (encoding == NULL && self->encoding == NULL) {
1112         PyObject *locale_module = _PyIO_get_locale_module(state);
1113         if (locale_module == NULL)
1114             goto catch_ImportError;
1115         self->encoding = _PyObject_CallMethodIdObjArgs(
1116             locale_module, &PyId_getpreferredencoding, Py_False, NULL);
1117         Py_DECREF(locale_module);
1118         if (self->encoding == NULL) {
1119           catch_ImportError:
1120             /*
1121              Importing locale can raise an ImportError because of
1122              _functools, and locale.getpreferredencoding can raise an
1123              ImportError if _locale is not available.  These will happen
1124              during module building.
1125             */
1126             if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1127                 PyErr_Clear();
1128                 self->encoding = PyUnicode_FromString("ascii");
1129             }
1130             else
1131                 goto error;
1132         }
1133         else if (!PyUnicode_Check(self->encoding))
1134             Py_CLEAR(self->encoding);
1135     }
1136     if (self->encoding != NULL) {
1137         encoding = PyUnicode_AsUTF8(self->encoding);
1138         if (encoding == NULL)
1139             goto error;
1140     }
1141     else if (encoding != NULL) {
1142         self->encoding = PyUnicode_FromString(encoding);
1143         if (self->encoding == NULL)
1144             goto error;
1145     }
1146     else {
1147         PyErr_SetString(PyExc_OSError,
1148                         "could not determine default encoding");
1149         goto error;
1150     }
1151 
1152     /* Check we have been asked for a real text encoding */
1153     codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1154     if (codec_info == NULL) {
1155         Py_CLEAR(self->encoding);
1156         goto error;
1157     }
1158 
1159     /* XXX: Failures beyond this point have the potential to leak elements
1160      * of the partially constructed object (like self->encoding)
1161      */
1162 
1163     Py_INCREF(errors);
1164     self->errors = errors;
1165     self->chunk_size = 8192;
1166     self->line_buffering = line_buffering;
1167     self->write_through = write_through;
1168     if (set_newline(self, newline) < 0) {
1169         goto error;
1170     }
1171 
1172     self->buffer = buffer;
1173     Py_INCREF(buffer);
1174 
1175     /* Build the decoder object */
1176     if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1177         goto error;
1178 
1179     /* Build the encoder object */
1180     if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1181         goto error;
1182 
1183     /* Finished sorting out the codec details */
1184     Py_CLEAR(codec_info);
1185 
1186     if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1187         Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1188         Py_TYPE(buffer) == &PyBufferedRandom_Type)
1189     {
1190         if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1191             goto error;
1192         /* Cache the raw FileIO object to speed up 'closed' checks */
1193         if (raw != NULL) {
1194             if (Py_TYPE(raw) == &PyFileIO_Type)
1195                 self->raw = raw;
1196             else
1197                 Py_DECREF(raw);
1198         }
1199     }
1200 
1201     res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
1202     if (res == NULL)
1203         goto error;
1204     r = PyObject_IsTrue(res);
1205     Py_DECREF(res);
1206     if (r < 0)
1207         goto error;
1208     self->seekable = self->telling = r;
1209 
1210     r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1211     if (r < 0) {
1212         goto error;
1213     }
1214     Py_XDECREF(res);
1215     self->has_read1 = r;
1216 
1217     self->encoding_start_of_stream = 0;
1218     if (_textiowrapper_fix_encoder_state(self) < 0) {
1219         goto error;
1220     }
1221 
1222     self->ok = 1;
1223     return 0;
1224 
1225   error:
1226     Py_XDECREF(codec_info);
1227     return -1;
1228 }
1229 
1230 /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1231  * -1 on error.
1232  */
1233 static int
convert_optional_bool(PyObject * obj,int default_value)1234 convert_optional_bool(PyObject *obj, int default_value)
1235 {
1236     long v;
1237     if (obj == Py_None) {
1238         v = default_value;
1239     }
1240     else {
1241         v = PyLong_AsLong(obj);
1242         if (v == -1 && PyErr_Occurred())
1243             return -1;
1244     }
1245     return v != 0;
1246 }
1247 
1248 static int
textiowrapper_change_encoding(textio * self,PyObject * encoding,PyObject * errors,int newline_changed)1249 textiowrapper_change_encoding(textio *self, PyObject *encoding,
1250                               PyObject *errors, int newline_changed)
1251 {
1252     /* Use existing settings where new settings are not specified */
1253     if (encoding == Py_None && errors == Py_None && !newline_changed) {
1254         return 0;  // no change
1255     }
1256 
1257     if (encoding == Py_None) {
1258         encoding = self->encoding;
1259         if (errors == Py_None) {
1260             errors = self->errors;
1261         }
1262     }
1263     else if (errors == Py_None) {
1264         errors = _PyUnicode_FromId(&PyId_strict);
1265         if (errors == NULL) {
1266             return -1;
1267         }
1268     }
1269 
1270     const char *c_errors = PyUnicode_AsUTF8(errors);
1271     if (c_errors == NULL) {
1272         return -1;
1273     }
1274 
1275     // Create new encoder & decoder
1276     PyObject *codec_info = _PyCodec_LookupTextEncoding(
1277         PyUnicode_AsUTF8(encoding), "codecs.open()");
1278     if (codec_info == NULL) {
1279         return -1;
1280     }
1281     if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1282             _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1283         Py_DECREF(codec_info);
1284         return -1;
1285     }
1286     Py_DECREF(codec_info);
1287 
1288     Py_INCREF(encoding);
1289     Py_INCREF(errors);
1290     Py_SETREF(self->encoding, encoding);
1291     Py_SETREF(self->errors, errors);
1292 
1293     return _textiowrapper_fix_encoder_state(self);
1294 }
1295 
1296 /*[clinic input]
1297 _io.TextIOWrapper.reconfigure
1298     *
1299     encoding: object = None
1300     errors: object = None
1301     newline as newline_obj: object(c_default="NULL") = None
1302     line_buffering as line_buffering_obj: object = None
1303     write_through as write_through_obj: object = None
1304 
1305 Reconfigure the text stream with new parameters.
1306 
1307 This also does an implicit stream flush.
1308 
1309 [clinic start generated code]*/
1310 
1311 static PyObject *
_io_TextIOWrapper_reconfigure_impl(textio * self,PyObject * encoding,PyObject * errors,PyObject * newline_obj,PyObject * line_buffering_obj,PyObject * write_through_obj)1312 _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1313                                    PyObject *errors, PyObject *newline_obj,
1314                                    PyObject *line_buffering_obj,
1315                                    PyObject *write_through_obj)
1316 /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
1317 {
1318     int line_buffering;
1319     int write_through;
1320     const char *newline = NULL;
1321 
1322     /* Check if something is in the read buffer */
1323     if (self->decoded_chars != NULL) {
1324         if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1325             _unsupported("It is not possible to set the encoding or newline "
1326                          "of stream after the first read");
1327             return NULL;
1328         }
1329     }
1330 
1331     if (newline_obj != NULL && newline_obj != Py_None) {
1332         newline = PyUnicode_AsUTF8(newline_obj);
1333         if (newline == NULL || validate_newline(newline) < 0) {
1334             return NULL;
1335         }
1336     }
1337 
1338     line_buffering = convert_optional_bool(line_buffering_obj,
1339                                            self->line_buffering);
1340     write_through = convert_optional_bool(write_through_obj,
1341                                           self->write_through);
1342     if (line_buffering < 0 || write_through < 0) {
1343         return NULL;
1344     }
1345 
1346     PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1347     if (res == NULL) {
1348         return NULL;
1349     }
1350     Py_DECREF(res);
1351     self->b2cratio = 0;
1352 
1353     if (newline_obj != NULL && set_newline(self, newline) < 0) {
1354         return NULL;
1355     }
1356 
1357     if (textiowrapper_change_encoding(
1358             self, encoding, errors, newline_obj != NULL) < 0) {
1359         return NULL;
1360     }
1361 
1362     self->line_buffering = line_buffering;
1363     self->write_through = write_through;
1364     Py_RETURN_NONE;
1365 }
1366 
1367 static int
textiowrapper_clear(textio * self)1368 textiowrapper_clear(textio *self)
1369 {
1370     self->ok = 0;
1371     Py_CLEAR(self->buffer);
1372     Py_CLEAR(self->encoding);
1373     Py_CLEAR(self->encoder);
1374     Py_CLEAR(self->decoder);
1375     Py_CLEAR(self->readnl);
1376     Py_CLEAR(self->decoded_chars);
1377     Py_CLEAR(self->pending_bytes);
1378     Py_CLEAR(self->snapshot);
1379     Py_CLEAR(self->errors);
1380     Py_CLEAR(self->raw);
1381 
1382     Py_CLEAR(self->dict);
1383     return 0;
1384 }
1385 
1386 static void
textiowrapper_dealloc(textio * self)1387 textiowrapper_dealloc(textio *self)
1388 {
1389     self->finalizing = 1;
1390     if (_PyIOBase_finalize((PyObject *) self) < 0)
1391         return;
1392     self->ok = 0;
1393     _PyObject_GC_UNTRACK(self);
1394     if (self->weakreflist != NULL)
1395         PyObject_ClearWeakRefs((PyObject *)self);
1396     textiowrapper_clear(self);
1397     Py_TYPE(self)->tp_free((PyObject *)self);
1398 }
1399 
1400 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1401 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1402 {
1403     Py_VISIT(self->buffer);
1404     Py_VISIT(self->encoding);
1405     Py_VISIT(self->encoder);
1406     Py_VISIT(self->decoder);
1407     Py_VISIT(self->readnl);
1408     Py_VISIT(self->decoded_chars);
1409     Py_VISIT(self->pending_bytes);
1410     Py_VISIT(self->snapshot);
1411     Py_VISIT(self->errors);
1412     Py_VISIT(self->raw);
1413 
1414     Py_VISIT(self->dict);
1415     return 0;
1416 }
1417 
1418 static PyObject *
1419 textiowrapper_closed_get(textio *self, void *context);
1420 
1421 /* This macro takes some shortcuts to make the common case faster. */
1422 #define CHECK_CLOSED(self) \
1423     do { \
1424         int r; \
1425         PyObject *_res; \
1426         if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1427             if (self->raw != NULL) \
1428                 r = _PyFileIO_closed(self->raw); \
1429             else { \
1430                 _res = textiowrapper_closed_get(self, NULL); \
1431                 if (_res == NULL) \
1432                     return NULL; \
1433                 r = PyObject_IsTrue(_res); \
1434                 Py_DECREF(_res); \
1435                 if (r < 0) \
1436                     return NULL; \
1437             } \
1438             if (r > 0) { \
1439                 PyErr_SetString(PyExc_ValueError, \
1440                                 "I/O operation on closed file."); \
1441                 return NULL; \
1442             } \
1443         } \
1444         else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1445             return NULL; \
1446     } while (0)
1447 
1448 #define CHECK_INITIALIZED(self) \
1449     if (self->ok <= 0) { \
1450         PyErr_SetString(PyExc_ValueError, \
1451             "I/O operation on uninitialized object"); \
1452         return NULL; \
1453     }
1454 
1455 #define CHECK_ATTACHED(self) \
1456     CHECK_INITIALIZED(self); \
1457     if (self->detached) { \
1458         PyErr_SetString(PyExc_ValueError, \
1459              "underlying buffer has been detached"); \
1460         return NULL; \
1461     }
1462 
1463 #define CHECK_ATTACHED_INT(self) \
1464     if (self->ok <= 0) { \
1465         PyErr_SetString(PyExc_ValueError, \
1466             "I/O operation on uninitialized object"); \
1467         return -1; \
1468     } else if (self->detached) { \
1469         PyErr_SetString(PyExc_ValueError, \
1470              "underlying buffer has been detached"); \
1471         return -1; \
1472     }
1473 
1474 
1475 /*[clinic input]
1476 _io.TextIOWrapper.detach
1477 [clinic start generated code]*/
1478 
1479 static PyObject *
_io_TextIOWrapper_detach_impl(textio * self)1480 _io_TextIOWrapper_detach_impl(textio *self)
1481 /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1482 {
1483     PyObject *buffer, *res;
1484     CHECK_ATTACHED(self);
1485     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1486     if (res == NULL)
1487         return NULL;
1488     Py_DECREF(res);
1489     buffer = self->buffer;
1490     self->buffer = NULL;
1491     self->detached = 1;
1492     return buffer;
1493 }
1494 
1495 /* Flush the internal write buffer. This doesn't explicitly flush the
1496    underlying buffered object, though. */
1497 static int
_textiowrapper_writeflush(textio * self)1498 _textiowrapper_writeflush(textio *self)
1499 {
1500     if (self->pending_bytes == NULL)
1501         return 0;
1502 
1503     PyObject *pending = self->pending_bytes;
1504     PyObject *b;
1505 
1506     if (PyBytes_Check(pending)) {
1507         b = pending;
1508         Py_INCREF(b);
1509     }
1510     else if (PyUnicode_Check(pending)) {
1511         assert(PyUnicode_IS_ASCII(pending));
1512         assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1513         b = PyBytes_FromStringAndSize(
1514                 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1515         if (b == NULL) {
1516             return -1;
1517         }
1518     }
1519     else {
1520         assert(PyList_Check(pending));
1521         b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1522         if (b == NULL) {
1523             return -1;
1524         }
1525 
1526         char *buf = PyBytes_AsString(b);
1527         Py_ssize_t pos = 0;
1528 
1529         for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1530             PyObject *obj = PyList_GET_ITEM(pending, i);
1531             char *src;
1532             Py_ssize_t len;
1533             if (PyUnicode_Check(obj)) {
1534                 assert(PyUnicode_IS_ASCII(obj));
1535                 src = PyUnicode_DATA(obj);
1536                 len = PyUnicode_GET_LENGTH(obj);
1537             }
1538             else {
1539                 assert(PyBytes_Check(obj));
1540                 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1541                     Py_DECREF(b);
1542                     return -1;
1543                 }
1544             }
1545             memcpy(buf + pos, src, len);
1546             pos += len;
1547         }
1548         assert(pos == self->pending_bytes_count);
1549     }
1550 
1551     self->pending_bytes_count = 0;
1552     self->pending_bytes = NULL;
1553     Py_DECREF(pending);
1554 
1555     PyObject *ret;
1556     do {
1557         ret = PyObject_CallMethodObjArgs(self->buffer,
1558                                          _PyIO_str_write, b, NULL);
1559     } while (ret == NULL && _PyIO_trap_eintr());
1560     Py_DECREF(b);
1561     if (ret == NULL)
1562         return -1;
1563     Py_DECREF(ret);
1564     return 0;
1565 }
1566 
1567 /*[clinic input]
1568 _io.TextIOWrapper.write
1569     text: unicode
1570     /
1571 [clinic start generated code]*/
1572 
1573 static PyObject *
_io_TextIOWrapper_write_impl(textio * self,PyObject * text)1574 _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1575 /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1576 {
1577     PyObject *ret;
1578     PyObject *b;
1579     Py_ssize_t textlen;
1580     int haslf = 0;
1581     int needflush = 0, text_needflush = 0;
1582 
1583     if (PyUnicode_READY(text) == -1)
1584         return NULL;
1585 
1586     CHECK_ATTACHED(self);
1587     CHECK_CLOSED(self);
1588 
1589     if (self->encoder == NULL)
1590         return _unsupported("not writable");
1591 
1592     Py_INCREF(text);
1593 
1594     textlen = PyUnicode_GET_LENGTH(text);
1595 
1596     if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1597         if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1598             haslf = 1;
1599 
1600     if (haslf && self->writetranslate && self->writenl != NULL) {
1601         PyObject *newtext = _PyObject_CallMethodId(
1602             text, &PyId_replace, "ss", "\n", self->writenl);
1603         Py_DECREF(text);
1604         if (newtext == NULL)
1605             return NULL;
1606         text = newtext;
1607     }
1608 
1609     if (self->write_through)
1610         text_needflush = 1;
1611     if (self->line_buffering &&
1612         (haslf ||
1613          PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1614         needflush = 1;
1615 
1616     /* XXX What if we were just reading? */
1617     if (self->encodefunc != NULL) {
1618         if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) {
1619             b = text;
1620             Py_INCREF(b);
1621         }
1622         else {
1623             b = (*self->encodefunc)((PyObject *) self, text);
1624         }
1625         self->encoding_start_of_stream = 0;
1626     }
1627     else
1628         b = PyObject_CallMethodObjArgs(self->encoder,
1629                                        _PyIO_str_encode, text, NULL);
1630 
1631     Py_DECREF(text);
1632     if (b == NULL)
1633         return NULL;
1634     if (b != text && !PyBytes_Check(b)) {
1635         PyErr_Format(PyExc_TypeError,
1636                      "encoder should return a bytes object, not '%.200s'",
1637                      Py_TYPE(b)->tp_name);
1638         Py_DECREF(b);
1639         return NULL;
1640     }
1641 
1642     Py_ssize_t bytes_len;
1643     if (b == text) {
1644         bytes_len = PyUnicode_GET_LENGTH(b);
1645     }
1646     else {
1647         bytes_len = PyBytes_GET_SIZE(b);
1648     }
1649 
1650     if (self->pending_bytes == NULL) {
1651         self->pending_bytes_count = 0;
1652         self->pending_bytes = b;
1653     }
1654     else if (!PyList_CheckExact(self->pending_bytes)) {
1655         PyObject *list = PyList_New(2);
1656         if (list == NULL) {
1657             Py_DECREF(b);
1658             return NULL;
1659         }
1660         PyList_SET_ITEM(list, 0, self->pending_bytes);
1661         PyList_SET_ITEM(list, 1, b);
1662         self->pending_bytes = list;
1663     }
1664     else {
1665         if (PyList_Append(self->pending_bytes, b) < 0) {
1666             Py_DECREF(b);
1667             return NULL;
1668         }
1669         Py_DECREF(b);
1670     }
1671 
1672     self->pending_bytes_count += bytes_len;
1673     if (self->pending_bytes_count > self->chunk_size || needflush ||
1674         text_needflush) {
1675         if (_textiowrapper_writeflush(self) < 0)
1676             return NULL;
1677     }
1678 
1679     if (needflush) {
1680         ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1681         if (ret == NULL)
1682             return NULL;
1683         Py_DECREF(ret);
1684     }
1685 
1686     textiowrapper_set_decoded_chars(self, NULL);
1687     Py_CLEAR(self->snapshot);
1688 
1689     if (self->decoder) {
1690         ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
1691         if (ret == NULL)
1692             return NULL;
1693         Py_DECREF(ret);
1694     }
1695 
1696     return PyLong_FromSsize_t(textlen);
1697 }
1698 
1699 /* Steal a reference to chars and store it in the decoded_char buffer;
1700  */
1701 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1702 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1703 {
1704     Py_XSETREF(self->decoded_chars, chars);
1705     self->decoded_chars_used = 0;
1706 }
1707 
1708 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1709 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1710 {
1711     PyObject *chars;
1712     Py_ssize_t avail;
1713 
1714     if (self->decoded_chars == NULL)
1715         return PyUnicode_FromStringAndSize(NULL, 0);
1716 
1717     /* decoded_chars is guaranteed to be "ready". */
1718     avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1719              - self->decoded_chars_used);
1720 
1721     assert(avail >= 0);
1722 
1723     if (n < 0 || n > avail)
1724         n = avail;
1725 
1726     if (self->decoded_chars_used > 0 || n < avail) {
1727         chars = PyUnicode_Substring(self->decoded_chars,
1728                                     self->decoded_chars_used,
1729                                     self->decoded_chars_used + n);
1730         if (chars == NULL)
1731             return NULL;
1732     }
1733     else {
1734         chars = self->decoded_chars;
1735         Py_INCREF(chars);
1736     }
1737 
1738     self->decoded_chars_used += n;
1739     return chars;
1740 }
1741 
1742 /* Read and decode the next chunk of data from the BufferedReader.
1743  */
1744 static int
textiowrapper_read_chunk(textio * self,Py_ssize_t size_hint)1745 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1746 {
1747     PyObject *dec_buffer = NULL;
1748     PyObject *dec_flags = NULL;
1749     PyObject *input_chunk = NULL;
1750     Py_buffer input_chunk_buf;
1751     PyObject *decoded_chars, *chunk_size;
1752     Py_ssize_t nbytes, nchars;
1753     int eof;
1754 
1755     /* The return value is True unless EOF was reached.  The decoded string is
1756      * placed in self._decoded_chars (replacing its previous value).  The
1757      * entire input chunk is sent to the decoder, though some of it may remain
1758      * buffered in the decoder, yet to be converted.
1759      */
1760 
1761     if (self->decoder == NULL) {
1762         _unsupported("not readable");
1763         return -1;
1764     }
1765 
1766     if (self->telling) {
1767         /* To prepare for tell(), we need to snapshot a point in the file
1768          * where the decoder's input buffer is empty.
1769          */
1770 
1771         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1772                                                      _PyIO_str_getstate, NULL);
1773         if (state == NULL)
1774             return -1;
1775         /* Given this, we know there was a valid snapshot point
1776          * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1777          */
1778         if (!PyTuple_Check(state)) {
1779             PyErr_SetString(PyExc_TypeError,
1780                             "illegal decoder state");
1781             Py_DECREF(state);
1782             return -1;
1783         }
1784         if (!PyArg_ParseTuple(state,
1785                               "OO;illegal decoder state", &dec_buffer, &dec_flags))
1786         {
1787             Py_DECREF(state);
1788             return -1;
1789         }
1790 
1791         if (!PyBytes_Check(dec_buffer)) {
1792             PyErr_Format(PyExc_TypeError,
1793                          "illegal decoder state: the first item should be a "
1794                          "bytes object, not '%.200s'",
1795                          Py_TYPE(dec_buffer)->tp_name);
1796             Py_DECREF(state);
1797             return -1;
1798         }
1799         Py_INCREF(dec_buffer);
1800         Py_INCREF(dec_flags);
1801         Py_DECREF(state);
1802     }
1803 
1804     /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1805     if (size_hint > 0) {
1806         size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1807     }
1808     chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1809     if (chunk_size == NULL)
1810         goto fail;
1811 
1812     input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1813         (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1814         chunk_size, NULL);
1815     Py_DECREF(chunk_size);
1816     if (input_chunk == NULL)
1817         goto fail;
1818 
1819     if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1820         PyErr_Format(PyExc_TypeError,
1821                      "underlying %s() should have returned a bytes-like object, "
1822                      "not '%.200s'", (self->has_read1 ? "read1": "read"),
1823                      Py_TYPE(input_chunk)->tp_name);
1824         goto fail;
1825     }
1826 
1827     nbytes = input_chunk_buf.len;
1828     eof = (nbytes == 0);
1829 
1830     decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1831     PyBuffer_Release(&input_chunk_buf);
1832     if (decoded_chars == NULL)
1833         goto fail;
1834 
1835     textiowrapper_set_decoded_chars(self, decoded_chars);
1836     nchars = PyUnicode_GET_LENGTH(decoded_chars);
1837     if (nchars > 0)
1838         self->b2cratio = (double) nbytes / nchars;
1839     else
1840         self->b2cratio = 0.0;
1841     if (nchars > 0)
1842         eof = 0;
1843 
1844     if (self->telling) {
1845         /* At the snapshot point, len(dec_buffer) bytes before the read, the
1846          * next input to be decoded is dec_buffer + input_chunk.
1847          */
1848         PyObject *next_input = dec_buffer;
1849         PyBytes_Concat(&next_input, input_chunk);
1850         dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1851         if (next_input == NULL) {
1852             goto fail;
1853         }
1854         PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1855         if (snapshot == NULL) {
1856             dec_flags = NULL;
1857             goto fail;
1858         }
1859         Py_XSETREF(self->snapshot, snapshot);
1860     }
1861     Py_DECREF(input_chunk);
1862 
1863     return (eof == 0);
1864 
1865   fail:
1866     Py_XDECREF(dec_buffer);
1867     Py_XDECREF(dec_flags);
1868     Py_XDECREF(input_chunk);
1869     return -1;
1870 }
1871 
1872 /*[clinic input]
1873 _io.TextIOWrapper.read
1874     size as n: Py_ssize_t(accept={int, NoneType}) = -1
1875     /
1876 [clinic start generated code]*/
1877 
1878 static PyObject *
_io_TextIOWrapper_read_impl(textio * self,Py_ssize_t n)1879 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1880 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
1881 {
1882     PyObject *result = NULL, *chunks = NULL;
1883 
1884     CHECK_ATTACHED(self);
1885     CHECK_CLOSED(self);
1886 
1887     if (self->decoder == NULL)
1888         return _unsupported("not readable");
1889 
1890     if (_textiowrapper_writeflush(self) < 0)
1891         return NULL;
1892 
1893     if (n < 0) {
1894         /* Read everything */
1895         PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
1896         PyObject *decoded;
1897         if (bytes == NULL)
1898             goto fail;
1899 
1900         if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1901             decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1902                                                           bytes, 1);
1903         else
1904             decoded = PyObject_CallMethodObjArgs(
1905                 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
1906         Py_DECREF(bytes);
1907         if (check_decoded(decoded) < 0)
1908             goto fail;
1909 
1910         result = textiowrapper_get_decoded_chars(self, -1);
1911 
1912         if (result == NULL) {
1913             Py_DECREF(decoded);
1914             return NULL;
1915         }
1916 
1917         PyUnicode_AppendAndDel(&result, decoded);
1918         if (result == NULL)
1919             goto fail;
1920 
1921         textiowrapper_set_decoded_chars(self, NULL);
1922         Py_CLEAR(self->snapshot);
1923         return result;
1924     }
1925     else {
1926         int res = 1;
1927         Py_ssize_t remaining = n;
1928 
1929         result = textiowrapper_get_decoded_chars(self, n);
1930         if (result == NULL)
1931             goto fail;
1932         if (PyUnicode_READY(result) == -1)
1933             goto fail;
1934         remaining -= PyUnicode_GET_LENGTH(result);
1935 
1936         /* Keep reading chunks until we have n characters to return */
1937         while (remaining > 0) {
1938             res = textiowrapper_read_chunk(self, remaining);
1939             if (res < 0) {
1940                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1941                    when EINTR occurs so we needn't do it ourselves. */
1942                 if (_PyIO_trap_eintr()) {
1943                     continue;
1944                 }
1945                 goto fail;
1946             }
1947             if (res == 0)  /* EOF */
1948                 break;
1949             if (chunks == NULL) {
1950                 chunks = PyList_New(0);
1951                 if (chunks == NULL)
1952                     goto fail;
1953             }
1954             if (PyUnicode_GET_LENGTH(result) > 0 &&
1955                 PyList_Append(chunks, result) < 0)
1956                 goto fail;
1957             Py_DECREF(result);
1958             result = textiowrapper_get_decoded_chars(self, remaining);
1959             if (result == NULL)
1960                 goto fail;
1961             remaining -= PyUnicode_GET_LENGTH(result);
1962         }
1963         if (chunks != NULL) {
1964             if (result != NULL && PyList_Append(chunks, result) < 0)
1965                 goto fail;
1966             Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
1967             if (result == NULL)
1968                 goto fail;
1969             Py_CLEAR(chunks);
1970         }
1971         return result;
1972     }
1973   fail:
1974     Py_XDECREF(result);
1975     Py_XDECREF(chunks);
1976     return NULL;
1977 }
1978 
1979 
1980 /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
1981    that is to the NUL character. Otherwise the function will produce
1982    incorrect results. */
1983 static const char *
find_control_char(int kind,const char * s,const char * end,Py_UCS4 ch)1984 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
1985 {
1986     if (kind == PyUnicode_1BYTE_KIND) {
1987         assert(ch < 256);
1988         return (char *) memchr((void *) s, (char) ch, end - s);
1989     }
1990     for (;;) {
1991         while (PyUnicode_READ(kind, s, 0) > ch)
1992             s += kind;
1993         if (PyUnicode_READ(kind, s, 0) == ch)
1994             return s;
1995         if (s == end)
1996             return NULL;
1997         s += kind;
1998     }
1999 }
2000 
2001 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,int kind,const char * start,const char * end,Py_ssize_t * consumed)2002 _PyIO_find_line_ending(
2003     int translated, int universal, PyObject *readnl,
2004     int kind, const char *start, const char *end, Py_ssize_t *consumed)
2005 {
2006     Py_ssize_t len = ((char*)end - (char*)start)/kind;
2007 
2008     if (translated) {
2009         /* Newlines are already translated, only search for \n */
2010         const char *pos = find_control_char(kind, start, end, '\n');
2011         if (pos != NULL)
2012             return (pos - start)/kind + 1;
2013         else {
2014             *consumed = len;
2015             return -1;
2016         }
2017     }
2018     else if (universal) {
2019         /* Universal newline search. Find any of \r, \r\n, \n
2020          * The decoder ensures that \r\n are not split in two pieces
2021          */
2022         const char *s = start;
2023         for (;;) {
2024             Py_UCS4 ch;
2025             /* Fast path for non-control chars. The loop always ends
2026                since the Unicode string is NUL-terminated. */
2027             while (PyUnicode_READ(kind, s, 0) > '\r')
2028                 s += kind;
2029             if (s >= end) {
2030                 *consumed = len;
2031                 return -1;
2032             }
2033             ch = PyUnicode_READ(kind, s, 0);
2034             s += kind;
2035             if (ch == '\n')
2036                 return (s - start)/kind;
2037             if (ch == '\r') {
2038                 if (PyUnicode_READ(kind, s, 0) == '\n')
2039                     return (s - start)/kind + 1;
2040                 else
2041                     return (s - start)/kind;
2042             }
2043         }
2044     }
2045     else {
2046         /* Non-universal mode. */
2047         Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2048         Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2049         /* Assume that readnl is an ASCII character. */
2050         assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2051         if (readnl_len == 1) {
2052             const char *pos = find_control_char(kind, start, end, nl[0]);
2053             if (pos != NULL)
2054                 return (pos - start)/kind + 1;
2055             *consumed = len;
2056             return -1;
2057         }
2058         else {
2059             const char *s = start;
2060             const char *e = end - (readnl_len - 1)*kind;
2061             const char *pos;
2062             if (e < s)
2063                 e = s;
2064             while (s < e) {
2065                 Py_ssize_t i;
2066                 const char *pos = find_control_char(kind, s, end, nl[0]);
2067                 if (pos == NULL || pos >= e)
2068                     break;
2069                 for (i = 1; i < readnl_len; i++) {
2070                     if (PyUnicode_READ(kind, pos, i) != nl[i])
2071                         break;
2072                 }
2073                 if (i == readnl_len)
2074                     return (pos - start)/kind + readnl_len;
2075                 s = pos + kind;
2076             }
2077             pos = find_control_char(kind, e, end, nl[0]);
2078             if (pos == NULL)
2079                 *consumed = len;
2080             else
2081                 *consumed = (pos - start)/kind;
2082             return -1;
2083         }
2084     }
2085 }
2086 
2087 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)2088 _textiowrapper_readline(textio *self, Py_ssize_t limit)
2089 {
2090     PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2091     Py_ssize_t start, endpos, chunked, offset_to_buffer;
2092     int res;
2093 
2094     CHECK_CLOSED(self);
2095 
2096     if (_textiowrapper_writeflush(self) < 0)
2097         return NULL;
2098 
2099     chunked = 0;
2100 
2101     while (1) {
2102         char *ptr;
2103         Py_ssize_t line_len;
2104         int kind;
2105         Py_ssize_t consumed = 0;
2106 
2107         /* First, get some data if necessary */
2108         res = 1;
2109         while (!self->decoded_chars ||
2110                !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2111             res = textiowrapper_read_chunk(self, 0);
2112             if (res < 0) {
2113                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2114                    when EINTR occurs so we needn't do it ourselves. */
2115                 if (_PyIO_trap_eintr()) {
2116                     continue;
2117                 }
2118                 goto error;
2119             }
2120             if (res == 0)
2121                 break;
2122         }
2123         if (res == 0) {
2124             /* end of file */
2125             textiowrapper_set_decoded_chars(self, NULL);
2126             Py_CLEAR(self->snapshot);
2127             start = endpos = offset_to_buffer = 0;
2128             break;
2129         }
2130 
2131         if (remaining == NULL) {
2132             line = self->decoded_chars;
2133             start = self->decoded_chars_used;
2134             offset_to_buffer = 0;
2135             Py_INCREF(line);
2136         }
2137         else {
2138             assert(self->decoded_chars_used == 0);
2139             line = PyUnicode_Concat(remaining, self->decoded_chars);
2140             start = 0;
2141             offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2142             Py_CLEAR(remaining);
2143             if (line == NULL)
2144                 goto error;
2145             if (PyUnicode_READY(line) == -1)
2146                 goto error;
2147         }
2148 
2149         ptr = PyUnicode_DATA(line);
2150         line_len = PyUnicode_GET_LENGTH(line);
2151         kind = PyUnicode_KIND(line);
2152 
2153         endpos = _PyIO_find_line_ending(
2154             self->readtranslate, self->readuniversal, self->readnl,
2155             kind,
2156             ptr + kind * start,
2157             ptr + kind * line_len,
2158             &consumed);
2159         if (endpos >= 0) {
2160             endpos += start;
2161             if (limit >= 0 && (endpos - start) + chunked >= limit)
2162                 endpos = start + limit - chunked;
2163             break;
2164         }
2165 
2166         /* We can put aside up to `endpos` */
2167         endpos = consumed + start;
2168         if (limit >= 0 && (endpos - start) + chunked >= limit) {
2169             /* Didn't find line ending, but reached length limit */
2170             endpos = start + limit - chunked;
2171             break;
2172         }
2173 
2174         if (endpos > start) {
2175             /* No line ending seen yet - put aside current data */
2176             PyObject *s;
2177             if (chunks == NULL) {
2178                 chunks = PyList_New(0);
2179                 if (chunks == NULL)
2180                     goto error;
2181             }
2182             s = PyUnicode_Substring(line, start, endpos);
2183             if (s == NULL)
2184                 goto error;
2185             if (PyList_Append(chunks, s) < 0) {
2186                 Py_DECREF(s);
2187                 goto error;
2188             }
2189             chunked += PyUnicode_GET_LENGTH(s);
2190             Py_DECREF(s);
2191         }
2192         /* There may be some remaining bytes we'll have to prepend to the
2193            next chunk of data */
2194         if (endpos < line_len) {
2195             remaining = PyUnicode_Substring(line, endpos, line_len);
2196             if (remaining == NULL)
2197                 goto error;
2198         }
2199         Py_CLEAR(line);
2200         /* We have consumed the buffer */
2201         textiowrapper_set_decoded_chars(self, NULL);
2202     }
2203 
2204     if (line != NULL) {
2205         /* Our line ends in the current buffer */
2206         self->decoded_chars_used = endpos - offset_to_buffer;
2207         if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2208             PyObject *s = PyUnicode_Substring(line, start, endpos);
2209             Py_CLEAR(line);
2210             if (s == NULL)
2211                 goto error;
2212             line = s;
2213         }
2214     }
2215     if (remaining != NULL) {
2216         if (chunks == NULL) {
2217             chunks = PyList_New(0);
2218             if (chunks == NULL)
2219                 goto error;
2220         }
2221         if (PyList_Append(chunks, remaining) < 0)
2222             goto error;
2223         Py_CLEAR(remaining);
2224     }
2225     if (chunks != NULL) {
2226         if (line != NULL) {
2227             if (PyList_Append(chunks, line) < 0)
2228                 goto error;
2229             Py_DECREF(line);
2230         }
2231         line = PyUnicode_Join(_PyIO_empty_str, chunks);
2232         if (line == NULL)
2233             goto error;
2234         Py_CLEAR(chunks);
2235     }
2236     if (line == NULL) {
2237         Py_INCREF(_PyIO_empty_str);
2238         line = _PyIO_empty_str;
2239     }
2240 
2241     return line;
2242 
2243   error:
2244     Py_XDECREF(chunks);
2245     Py_XDECREF(remaining);
2246     Py_XDECREF(line);
2247     return NULL;
2248 }
2249 
2250 /*[clinic input]
2251 _io.TextIOWrapper.readline
2252     size: Py_ssize_t = -1
2253     /
2254 [clinic start generated code]*/
2255 
2256 static PyObject *
_io_TextIOWrapper_readline_impl(textio * self,Py_ssize_t size)2257 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2258 /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2259 {
2260     CHECK_ATTACHED(self);
2261     return _textiowrapper_readline(self, size);
2262 }
2263 
2264 /* Seek and Tell */
2265 
2266 typedef struct {
2267     Py_off_t start_pos;
2268     int dec_flags;
2269     int bytes_to_feed;
2270     int chars_to_skip;
2271     char need_eof;
2272 } cookie_type;
2273 
2274 /*
2275    To speed up cookie packing/unpacking, we store the fields in a temporary
2276    string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2277    The following macros define at which offsets in the intermediary byte
2278    string the various CookieStruct fields will be stored.
2279  */
2280 
2281 #define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2282 
2283 #if PY_BIG_ENDIAN
2284 /* We want the least significant byte of start_pos to also be the least
2285    significant byte of the cookie, which means that in big-endian mode we
2286    must copy the fields in reverse order. */
2287 
2288 # define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
2289 # define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
2290 # define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
2291 # define OFF_CHARS_TO_SKIP  (sizeof(char))
2292 # define OFF_NEED_EOF       0
2293 
2294 #else
2295 /* Little-endian mode: the least significant byte of start_pos will
2296    naturally end up the least significant byte of the cookie. */
2297 
2298 # define OFF_START_POS      0
2299 # define OFF_DEC_FLAGS      (sizeof(Py_off_t))
2300 # define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
2301 # define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
2302 # define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
2303 
2304 #endif
2305 
2306 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)2307 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2308 {
2309     unsigned char buffer[COOKIE_BUF_LEN];
2310     PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2311     if (cookieLong == NULL)
2312         return -1;
2313 
2314     if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2315                             PY_LITTLE_ENDIAN, 0) < 0) {
2316         Py_DECREF(cookieLong);
2317         return -1;
2318     }
2319     Py_DECREF(cookieLong);
2320 
2321     memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2322     memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2323     memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2324     memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2325     memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2326 
2327     return 0;
2328 }
2329 
2330 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)2331 textiowrapper_build_cookie(cookie_type *cookie)
2332 {
2333     unsigned char buffer[COOKIE_BUF_LEN];
2334 
2335     memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2336     memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2337     memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2338     memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2339     memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2340 
2341     return _PyLong_FromByteArray(buffer, sizeof(buffer),
2342                                  PY_LITTLE_ENDIAN, 0);
2343 }
2344 
2345 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)2346 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2347 {
2348     PyObject *res;
2349     /* When seeking to the start of the stream, we call decoder.reset()
2350        rather than decoder.getstate().
2351        This is for a few decoders such as utf-16 for which the state value
2352        at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2353        utf-16, that we are expecting a BOM).
2354     */
2355     if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2356         res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2357     else
2358         res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2359                                      "((yi))", "", cookie->dec_flags);
2360     if (res == NULL)
2361         return -1;
2362     Py_DECREF(res);
2363     return 0;
2364 }
2365 
2366 static int
_textiowrapper_encoder_reset(textio * self,int start_of_stream)2367 _textiowrapper_encoder_reset(textio *self, int start_of_stream)
2368 {
2369     PyObject *res;
2370     if (start_of_stream) {
2371         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2372         self->encoding_start_of_stream = 1;
2373     }
2374     else {
2375         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2376                                          _PyLong_Zero, NULL);
2377         self->encoding_start_of_stream = 0;
2378     }
2379     if (res == NULL)
2380         return -1;
2381     Py_DECREF(res);
2382     return 0;
2383 }
2384 
2385 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)2386 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2387 {
2388     /* Same as _textiowrapper_decoder_setstate() above. */
2389     return _textiowrapper_encoder_reset(
2390         self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2391 }
2392 
2393 /*[clinic input]
2394 _io.TextIOWrapper.seek
2395     cookie as cookieObj: object
2396     whence: int = 0
2397     /
2398 [clinic start generated code]*/
2399 
2400 static PyObject *
_io_TextIOWrapper_seek_impl(textio * self,PyObject * cookieObj,int whence)2401 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2402 /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2403 {
2404     PyObject *posobj;
2405     cookie_type cookie;
2406     PyObject *res;
2407     int cmp;
2408     PyObject *snapshot;
2409 
2410     CHECK_ATTACHED(self);
2411     CHECK_CLOSED(self);
2412 
2413     Py_INCREF(cookieObj);
2414 
2415     if (!self->seekable) {
2416         _unsupported("underlying stream is not seekable");
2417         goto fail;
2418     }
2419 
2420     switch (whence) {
2421     case SEEK_CUR:
2422         /* seek relative to current position */
2423         cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2424         if (cmp < 0)
2425             goto fail;
2426 
2427         if (cmp == 0) {
2428             _unsupported("can't do nonzero cur-relative seeks");
2429             goto fail;
2430         }
2431 
2432         /* Seeking to the current position should attempt to
2433          * sync the underlying buffer with the current position.
2434          */
2435         Py_DECREF(cookieObj);
2436         cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
2437         if (cookieObj == NULL)
2438             goto fail;
2439         break;
2440 
2441     case SEEK_END:
2442         /* seek relative to end of file */
2443         cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2444         if (cmp < 0)
2445             goto fail;
2446 
2447         if (cmp == 0) {
2448             _unsupported("can't do nonzero end-relative seeks");
2449             goto fail;
2450         }
2451 
2452         res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2453         if (res == NULL)
2454             goto fail;
2455         Py_DECREF(res);
2456 
2457         textiowrapper_set_decoded_chars(self, NULL);
2458         Py_CLEAR(self->snapshot);
2459         if (self->decoder) {
2460             res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
2461             if (res == NULL)
2462                 goto fail;
2463             Py_DECREF(res);
2464         }
2465 
2466         res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
2467         Py_CLEAR(cookieObj);
2468         if (res == NULL)
2469             goto fail;
2470         if (self->encoder) {
2471             /* If seek() == 0, we are at the start of stream, otherwise not */
2472             cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
2473             if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2474                 Py_DECREF(res);
2475                 goto fail;
2476             }
2477         }
2478         return res;
2479 
2480     case SEEK_SET:
2481         break;
2482 
2483     default:
2484         PyErr_Format(PyExc_ValueError,
2485                      "invalid whence (%d, should be %d, %d or %d)", whence,
2486                      SEEK_SET, SEEK_CUR, SEEK_END);
2487         goto fail;
2488     }
2489 
2490     cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
2491     if (cmp < 0)
2492         goto fail;
2493 
2494     if (cmp == 1) {
2495         PyErr_Format(PyExc_ValueError,
2496                      "negative seek position %R", cookieObj);
2497         goto fail;
2498     }
2499 
2500     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2501     if (res == NULL)
2502         goto fail;
2503     Py_DECREF(res);
2504 
2505     /* The strategy of seek() is to go back to the safe start point
2506      * and replay the effect of read(chars_to_skip) from there.
2507      */
2508     if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2509         goto fail;
2510 
2511     /* Seek back to the safe start point. */
2512     posobj = PyLong_FromOff_t(cookie.start_pos);
2513     if (posobj == NULL)
2514         goto fail;
2515     res = PyObject_CallMethodObjArgs(self->buffer,
2516                                      _PyIO_str_seek, posobj, NULL);
2517     Py_DECREF(posobj);
2518     if (res == NULL)
2519         goto fail;
2520     Py_DECREF(res);
2521 
2522     textiowrapper_set_decoded_chars(self, NULL);
2523     Py_CLEAR(self->snapshot);
2524 
2525     /* Restore the decoder to its state from the safe start point. */
2526     if (self->decoder) {
2527         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2528             goto fail;
2529     }
2530 
2531     if (cookie.chars_to_skip) {
2532         /* Just like _read_chunk, feed the decoder and save a snapshot. */
2533         PyObject *input_chunk = _PyObject_CallMethodId(
2534             self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
2535         PyObject *decoded;
2536 
2537         if (input_chunk == NULL)
2538             goto fail;
2539 
2540         if (!PyBytes_Check(input_chunk)) {
2541             PyErr_Format(PyExc_TypeError,
2542                          "underlying read() should have returned a bytes "
2543                          "object, not '%.200s'",
2544                          Py_TYPE(input_chunk)->tp_name);
2545             Py_DECREF(input_chunk);
2546             goto fail;
2547         }
2548 
2549         snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2550         if (snapshot == NULL) {
2551             goto fail;
2552         }
2553         Py_XSETREF(self->snapshot, snapshot);
2554 
2555         decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2556             "Oi", input_chunk, (int)cookie.need_eof);
2557 
2558         if (check_decoded(decoded) < 0)
2559             goto fail;
2560 
2561         textiowrapper_set_decoded_chars(self, decoded);
2562 
2563         /* Skip chars_to_skip of the decoded characters. */
2564         if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2565             PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2566             goto fail;
2567         }
2568         self->decoded_chars_used = cookie.chars_to_skip;
2569     }
2570     else {
2571         snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2572         if (snapshot == NULL)
2573             goto fail;
2574         Py_XSETREF(self->snapshot, snapshot);
2575     }
2576 
2577     /* Finally, reset the encoder (merely useful for proper BOM handling) */
2578     if (self->encoder) {
2579         if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2580             goto fail;
2581     }
2582     return cookieObj;
2583   fail:
2584     Py_XDECREF(cookieObj);
2585     return NULL;
2586 
2587 }
2588 
2589 /*[clinic input]
2590 _io.TextIOWrapper.tell
2591 [clinic start generated code]*/
2592 
2593 static PyObject *
_io_TextIOWrapper_tell_impl(textio * self)2594 _io_TextIOWrapper_tell_impl(textio *self)
2595 /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2596 {
2597     PyObject *res;
2598     PyObject *posobj = NULL;
2599     cookie_type cookie = {0,0,0,0,0};
2600     PyObject *next_input;
2601     Py_ssize_t chars_to_skip, chars_decoded;
2602     Py_ssize_t skip_bytes, skip_back;
2603     PyObject *saved_state = NULL;
2604     char *input, *input_end;
2605     Py_ssize_t dec_buffer_len;
2606     int dec_flags;
2607 
2608     CHECK_ATTACHED(self);
2609     CHECK_CLOSED(self);
2610 
2611     if (!self->seekable) {
2612         _unsupported("underlying stream is not seekable");
2613         goto fail;
2614     }
2615     if (!self->telling) {
2616         PyErr_SetString(PyExc_OSError,
2617                         "telling position disabled by next() call");
2618         goto fail;
2619     }
2620 
2621     if (_textiowrapper_writeflush(self) < 0)
2622         return NULL;
2623     res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2624     if (res == NULL)
2625         goto fail;
2626     Py_DECREF(res);
2627 
2628     posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
2629     if (posobj == NULL)
2630         goto fail;
2631 
2632     if (self->decoder == NULL || self->snapshot == NULL) {
2633         assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2634         return posobj;
2635     }
2636 
2637 #if defined(HAVE_LARGEFILE_SUPPORT)
2638     cookie.start_pos = PyLong_AsLongLong(posobj);
2639 #else
2640     cookie.start_pos = PyLong_AsLong(posobj);
2641 #endif
2642     Py_DECREF(posobj);
2643     if (PyErr_Occurred())
2644         goto fail;
2645 
2646     /* Skip backward to the snapshot point (see _read_chunk). */
2647     assert(PyTuple_Check(self->snapshot));
2648     if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2649         goto fail;
2650 
2651     assert (PyBytes_Check(next_input));
2652 
2653     cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2654 
2655     /* How many decoded characters have been used up since the snapshot? */
2656     if (self->decoded_chars_used == 0)  {
2657         /* We haven't moved from the snapshot point. */
2658         return textiowrapper_build_cookie(&cookie);
2659     }
2660 
2661     chars_to_skip = self->decoded_chars_used;
2662 
2663     /* Decoder state will be restored at the end */
2664     saved_state = PyObject_CallMethodObjArgs(self->decoder,
2665                                              _PyIO_str_getstate, NULL);
2666     if (saved_state == NULL)
2667         goto fail;
2668 
2669 #define DECODER_GETSTATE() do { \
2670         PyObject *dec_buffer; \
2671         PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2672             _PyIO_str_getstate, NULL); \
2673         if (_state == NULL) \
2674             goto fail; \
2675         if (!PyTuple_Check(_state)) { \
2676             PyErr_SetString(PyExc_TypeError, \
2677                             "illegal decoder state"); \
2678             Py_DECREF(_state); \
2679             goto fail; \
2680         } \
2681         if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2682                               &dec_buffer, &dec_flags)) \
2683         { \
2684             Py_DECREF(_state); \
2685             goto fail; \
2686         } \
2687         if (!PyBytes_Check(dec_buffer)) { \
2688             PyErr_Format(PyExc_TypeError, \
2689                          "illegal decoder state: the first item should be a " \
2690                          "bytes object, not '%.200s'", \
2691                          Py_TYPE(dec_buffer)->tp_name); \
2692             Py_DECREF(_state); \
2693             goto fail; \
2694         } \
2695         dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2696         Py_DECREF(_state); \
2697     } while (0)
2698 
2699 #define DECODER_DECODE(start, len, res) do { \
2700         PyObject *_decoded = _PyObject_CallMethodId( \
2701             self->decoder, &PyId_decode, "y#", start, len); \
2702         if (check_decoded(_decoded) < 0) \
2703             goto fail; \
2704         res = PyUnicode_GET_LENGTH(_decoded); \
2705         Py_DECREF(_decoded); \
2706     } while (0)
2707 
2708     /* Fast search for an acceptable start point, close to our
2709        current pos */
2710     skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2711     skip_back = 1;
2712     assert(skip_back <= PyBytes_GET_SIZE(next_input));
2713     input = PyBytes_AS_STRING(next_input);
2714     while (skip_bytes > 0) {
2715         /* Decode up to temptative start point */
2716         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2717             goto fail;
2718         DECODER_DECODE(input, skip_bytes, chars_decoded);
2719         if (chars_decoded <= chars_to_skip) {
2720             DECODER_GETSTATE();
2721             if (dec_buffer_len == 0) {
2722                 /* Before pos and no bytes buffered in decoder => OK */
2723                 cookie.dec_flags = dec_flags;
2724                 chars_to_skip -= chars_decoded;
2725                 break;
2726             }
2727             /* Skip back by buffered amount and reset heuristic */
2728             skip_bytes -= dec_buffer_len;
2729             skip_back = 1;
2730         }
2731         else {
2732             /* We're too far ahead, skip back a bit */
2733             skip_bytes -= skip_back;
2734             skip_back *= 2;
2735         }
2736     }
2737     if (skip_bytes <= 0) {
2738         skip_bytes = 0;
2739         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2740             goto fail;
2741     }
2742 
2743     /* Note our initial start point. */
2744     cookie.start_pos += skip_bytes;
2745     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2746     if (chars_to_skip == 0)
2747         goto finally;
2748 
2749     /* We should be close to the desired position.  Now feed the decoder one
2750      * byte at a time until we reach the `chars_to_skip` target.
2751      * As we go, note the nearest "safe start point" before the current
2752      * location (a point where the decoder has nothing buffered, so seek()
2753      * can safely start from there and advance to this location).
2754      */
2755     chars_decoded = 0;
2756     input = PyBytes_AS_STRING(next_input);
2757     input_end = input + PyBytes_GET_SIZE(next_input);
2758     input += skip_bytes;
2759     while (input < input_end) {
2760         Py_ssize_t n;
2761 
2762         DECODER_DECODE(input, (Py_ssize_t)1, n);
2763         /* We got n chars for 1 byte */
2764         chars_decoded += n;
2765         cookie.bytes_to_feed += 1;
2766         DECODER_GETSTATE();
2767 
2768         if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2769             /* Decoder buffer is empty, so this is a safe start point. */
2770             cookie.start_pos += cookie.bytes_to_feed;
2771             chars_to_skip -= chars_decoded;
2772             cookie.dec_flags = dec_flags;
2773             cookie.bytes_to_feed = 0;
2774             chars_decoded = 0;
2775         }
2776         if (chars_decoded >= chars_to_skip)
2777             break;
2778         input++;
2779     }
2780     if (input == input_end) {
2781         /* We didn't get enough decoded data; signal EOF to get more. */
2782         PyObject *decoded = _PyObject_CallMethodId(
2783             self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
2784         if (check_decoded(decoded) < 0)
2785             goto fail;
2786         chars_decoded += PyUnicode_GET_LENGTH(decoded);
2787         Py_DECREF(decoded);
2788         cookie.need_eof = 1;
2789 
2790         if (chars_decoded < chars_to_skip) {
2791             PyErr_SetString(PyExc_OSError,
2792                             "can't reconstruct logical file position");
2793             goto fail;
2794         }
2795     }
2796 
2797 finally:
2798     res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
2799     Py_DECREF(saved_state);
2800     if (res == NULL)
2801         return NULL;
2802     Py_DECREF(res);
2803 
2804     /* The returned cookie corresponds to the last safe start point. */
2805     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2806     return textiowrapper_build_cookie(&cookie);
2807 
2808 fail:
2809     if (saved_state) {
2810         PyObject *type, *value, *traceback;
2811         PyErr_Fetch(&type, &value, &traceback);
2812         res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
2813         _PyErr_ChainExceptions(type, value, traceback);
2814         Py_DECREF(saved_state);
2815         Py_XDECREF(res);
2816     }
2817     return NULL;
2818 }
2819 
2820 /*[clinic input]
2821 _io.TextIOWrapper.truncate
2822     pos: object = None
2823     /
2824 [clinic start generated code]*/
2825 
2826 static PyObject *
_io_TextIOWrapper_truncate_impl(textio * self,PyObject * pos)2827 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2828 /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2829 {
2830     PyObject *res;
2831 
2832     CHECK_ATTACHED(self)
2833 
2834     res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2835     if (res == NULL)
2836         return NULL;
2837     Py_DECREF(res);
2838 
2839     return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2840 }
2841 
2842 static PyObject *
textiowrapper_repr(textio * self)2843 textiowrapper_repr(textio *self)
2844 {
2845     PyObject *nameobj, *modeobj, *res, *s;
2846     int status;
2847 
2848     CHECK_INITIALIZED(self);
2849 
2850     res = PyUnicode_FromString("<_io.TextIOWrapper");
2851     if (res == NULL)
2852         return NULL;
2853 
2854     status = Py_ReprEnter((PyObject *)self);
2855     if (status != 0) {
2856         if (status > 0) {
2857             PyErr_Format(PyExc_RuntimeError,
2858                          "reentrant call inside %s.__repr__",
2859                          Py_TYPE(self)->tp_name);
2860         }
2861         goto error;
2862     }
2863     if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
2864         if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
2865             goto error;
2866         }
2867         /* Ignore ValueError raised if the underlying stream was detached */
2868         PyErr_Clear();
2869     }
2870     if (nameobj != NULL) {
2871         s = PyUnicode_FromFormat(" name=%R", nameobj);
2872         Py_DECREF(nameobj);
2873         if (s == NULL)
2874             goto error;
2875         PyUnicode_AppendAndDel(&res, s);
2876         if (res == NULL)
2877             goto error;
2878     }
2879     if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
2880         goto error;
2881     }
2882     if (modeobj != NULL) {
2883         s = PyUnicode_FromFormat(" mode=%R", modeobj);
2884         Py_DECREF(modeobj);
2885         if (s == NULL)
2886             goto error;
2887         PyUnicode_AppendAndDel(&res, s);
2888         if (res == NULL)
2889             goto error;
2890     }
2891     s = PyUnicode_FromFormat("%U encoding=%R>",
2892                              res, self->encoding);
2893     Py_DECREF(res);
2894     if (status == 0) {
2895         Py_ReprLeave((PyObject *)self);
2896     }
2897     return s;
2898 
2899   error:
2900     Py_XDECREF(res);
2901     if (status == 0) {
2902         Py_ReprLeave((PyObject *)self);
2903     }
2904     return NULL;
2905 }
2906 
2907 
2908 /* Inquiries */
2909 
2910 /*[clinic input]
2911 _io.TextIOWrapper.fileno
2912 [clinic start generated code]*/
2913 
2914 static PyObject *
_io_TextIOWrapper_fileno_impl(textio * self)2915 _io_TextIOWrapper_fileno_impl(textio *self)
2916 /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2917 {
2918     CHECK_ATTACHED(self);
2919     return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
2920 }
2921 
2922 /*[clinic input]
2923 _io.TextIOWrapper.seekable
2924 [clinic start generated code]*/
2925 
2926 static PyObject *
_io_TextIOWrapper_seekable_impl(textio * self)2927 _io_TextIOWrapper_seekable_impl(textio *self)
2928 /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2929 {
2930     CHECK_ATTACHED(self);
2931     return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
2932 }
2933 
2934 /*[clinic input]
2935 _io.TextIOWrapper.readable
2936 [clinic start generated code]*/
2937 
2938 static PyObject *
_io_TextIOWrapper_readable_impl(textio * self)2939 _io_TextIOWrapper_readable_impl(textio *self)
2940 /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2941 {
2942     CHECK_ATTACHED(self);
2943     return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
2944 }
2945 
2946 /*[clinic input]
2947 _io.TextIOWrapper.writable
2948 [clinic start generated code]*/
2949 
2950 static PyObject *
_io_TextIOWrapper_writable_impl(textio * self)2951 _io_TextIOWrapper_writable_impl(textio *self)
2952 /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
2953 {
2954     CHECK_ATTACHED(self);
2955     return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
2956 }
2957 
2958 /*[clinic input]
2959 _io.TextIOWrapper.isatty
2960 [clinic start generated code]*/
2961 
2962 static PyObject *
_io_TextIOWrapper_isatty_impl(textio * self)2963 _io_TextIOWrapper_isatty_impl(textio *self)
2964 /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
2965 {
2966     CHECK_ATTACHED(self);
2967     return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
2968 }
2969 
2970 /*[clinic input]
2971 _io.TextIOWrapper.flush
2972 [clinic start generated code]*/
2973 
2974 static PyObject *
_io_TextIOWrapper_flush_impl(textio * self)2975 _io_TextIOWrapper_flush_impl(textio *self)
2976 /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
2977 {
2978     CHECK_ATTACHED(self);
2979     CHECK_CLOSED(self);
2980     self->telling = self->seekable;
2981     if (_textiowrapper_writeflush(self) < 0)
2982         return NULL;
2983     return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
2984 }
2985 
2986 /*[clinic input]
2987 _io.TextIOWrapper.close
2988 [clinic start generated code]*/
2989 
2990 static PyObject *
_io_TextIOWrapper_close_impl(textio * self)2991 _io_TextIOWrapper_close_impl(textio *self)
2992 /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
2993 {
2994     PyObject *res;
2995     int r;
2996     CHECK_ATTACHED(self);
2997 
2998     res = textiowrapper_closed_get(self, NULL);
2999     if (res == NULL)
3000         return NULL;
3001     r = PyObject_IsTrue(res);
3002     Py_DECREF(res);
3003     if (r < 0)
3004         return NULL;
3005 
3006     if (r > 0) {
3007         Py_RETURN_NONE; /* stream already closed */
3008     }
3009     else {
3010         PyObject *exc = NULL, *val, *tb;
3011         if (self->finalizing) {
3012             res = _PyObject_CallMethodIdObjArgs(self->buffer,
3013                                                 &PyId__dealloc_warn,
3014                                                 self, NULL);
3015             if (res)
3016                 Py_DECREF(res);
3017             else
3018                 PyErr_Clear();
3019         }
3020         res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
3021         if (res == NULL)
3022             PyErr_Fetch(&exc, &val, &tb);
3023         else
3024             Py_DECREF(res);
3025 
3026         res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
3027         if (exc != NULL) {
3028             _PyErr_ChainExceptions(exc, val, tb);
3029             Py_CLEAR(res);
3030         }
3031         return res;
3032     }
3033 }
3034 
3035 static PyObject *
textiowrapper_iternext(textio * self)3036 textiowrapper_iternext(textio *self)
3037 {
3038     PyObject *line;
3039 
3040     CHECK_ATTACHED(self);
3041 
3042     self->telling = 0;
3043     if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
3044         /* Skip method call overhead for speed */
3045         line = _textiowrapper_readline(self, -1);
3046     }
3047     else {
3048         line = PyObject_CallMethodObjArgs((PyObject *)self,
3049                                            _PyIO_str_readline, NULL);
3050         if (line && !PyUnicode_Check(line)) {
3051             PyErr_Format(PyExc_OSError,
3052                          "readline() should have returned a str object, "
3053                          "not '%.200s'", Py_TYPE(line)->tp_name);
3054             Py_DECREF(line);
3055             return NULL;
3056         }
3057     }
3058 
3059     if (line == NULL || PyUnicode_READY(line) == -1)
3060         return NULL;
3061 
3062     if (PyUnicode_GET_LENGTH(line) == 0) {
3063         /* Reached EOF or would have blocked */
3064         Py_DECREF(line);
3065         Py_CLEAR(self->snapshot);
3066         self->telling = self->seekable;
3067         return NULL;
3068     }
3069 
3070     return line;
3071 }
3072 
3073 static PyObject *
textiowrapper_name_get(textio * self,void * context)3074 textiowrapper_name_get(textio *self, void *context)
3075 {
3076     CHECK_ATTACHED(self);
3077     return _PyObject_GetAttrId(self->buffer, &PyId_name);
3078 }
3079 
3080 static PyObject *
textiowrapper_closed_get(textio * self,void * context)3081 textiowrapper_closed_get(textio *self, void *context)
3082 {
3083     CHECK_ATTACHED(self);
3084     return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3085 }
3086 
3087 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)3088 textiowrapper_newlines_get(textio *self, void *context)
3089 {
3090     PyObject *res;
3091     CHECK_ATTACHED(self);
3092     if (self->decoder == NULL ||
3093         _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3094     {
3095         Py_RETURN_NONE;
3096     }
3097     return res;
3098 }
3099 
3100 static PyObject *
textiowrapper_errors_get(textio * self,void * context)3101 textiowrapper_errors_get(textio *self, void *context)
3102 {
3103     CHECK_INITIALIZED(self);
3104     Py_INCREF(self->errors);
3105     return self->errors;
3106 }
3107 
3108 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)3109 textiowrapper_chunk_size_get(textio *self, void *context)
3110 {
3111     CHECK_ATTACHED(self);
3112     return PyLong_FromSsize_t(self->chunk_size);
3113 }
3114 
3115 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)3116 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
3117 {
3118     Py_ssize_t n;
3119     CHECK_ATTACHED_INT(self);
3120     if (arg == NULL) {
3121         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3122         return -1;
3123     }
3124     n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
3125     if (n == -1 && PyErr_Occurred())
3126         return -1;
3127     if (n <= 0) {
3128         PyErr_SetString(PyExc_ValueError,
3129                         "a strictly positive integer is required");
3130         return -1;
3131     }
3132     self->chunk_size = n;
3133     return 0;
3134 }
3135 
3136 #include "clinic/textio.c.h"
3137 
3138 static PyMethodDef incrementalnewlinedecoder_methods[] = {
3139     _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3140     _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3141     _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3142     _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3143     {NULL}
3144 };
3145 
3146 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3147     {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3148     {NULL}
3149 };
3150 
3151 PyTypeObject PyIncrementalNewlineDecoder_Type = {
3152     PyVarObject_HEAD_INIT(NULL, 0)
3153     "_io.IncrementalNewlineDecoder", /*tp_name*/
3154     sizeof(nldecoder_object), /*tp_basicsize*/
3155     0,                          /*tp_itemsize*/
3156     (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3157     0,                          /*tp_vectorcall_offset*/
3158     0,                          /*tp_getattr*/
3159     0,                          /*tp_setattr*/
3160     0,                          /*tp_as_async*/
3161     0,                          /*tp_repr*/
3162     0,                          /*tp_as_number*/
3163     0,                          /*tp_as_sequence*/
3164     0,                          /*tp_as_mapping*/
3165     0,                          /*tp_hash */
3166     0,                          /*tp_call*/
3167     0,                          /*tp_str*/
3168     0,                          /*tp_getattro*/
3169     0,                          /*tp_setattro*/
3170     0,                          /*tp_as_buffer*/
3171     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
3172     _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3173     0,                          /* tp_traverse */
3174     0,                          /* tp_clear */
3175     0,                          /* tp_richcompare */
3176     0,                          /*tp_weaklistoffset*/
3177     0,                          /* tp_iter */
3178     0,                          /* tp_iternext */
3179     incrementalnewlinedecoder_methods, /* tp_methods */
3180     0,                          /* tp_members */
3181     incrementalnewlinedecoder_getset, /* tp_getset */
3182     0,                          /* tp_base */
3183     0,                          /* tp_dict */
3184     0,                          /* tp_descr_get */
3185     0,                          /* tp_descr_set */
3186     0,                          /* tp_dictoffset */
3187     _io_IncrementalNewlineDecoder___init__, /* tp_init */
3188     0,                          /* tp_alloc */
3189     PyType_GenericNew,          /* tp_new */
3190 };
3191 
3192 
3193 static PyMethodDef textiowrapper_methods[] = {
3194     _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3195     _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3196     _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3197     _IO_TEXTIOWRAPPER_READ_METHODDEF
3198     _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3199     _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3200     _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3201 
3202     _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3203     _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3204     _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3205     _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3206     _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3207 
3208     _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3209     _IO_TEXTIOWRAPPER_TELL_METHODDEF
3210     _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3211     {NULL, NULL}
3212 };
3213 
3214 static PyMemberDef textiowrapper_members[] = {
3215     {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3216     {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3217     {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
3218     {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
3219     {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
3220     {NULL}
3221 };
3222 
3223 static PyGetSetDef textiowrapper_getset[] = {
3224     {"name", (getter)textiowrapper_name_get, NULL, NULL},
3225     {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
3226 /*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3227 */
3228     {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3229     {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3230     {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3231                     (setter)textiowrapper_chunk_size_set, NULL},
3232     {NULL}
3233 };
3234 
3235 PyTypeObject PyTextIOWrapper_Type = {
3236     PyVarObject_HEAD_INIT(NULL, 0)
3237     "_io.TextIOWrapper",        /*tp_name*/
3238     sizeof(textio), /*tp_basicsize*/
3239     0,                          /*tp_itemsize*/
3240     (destructor)textiowrapper_dealloc, /*tp_dealloc*/
3241     0,                          /*tp_vectorcall_offset*/
3242     0,                          /*tp_getattr*/
3243     0,                          /*tps_etattr*/
3244     0,                          /*tp_as_async*/
3245     (reprfunc)textiowrapper_repr,/*tp_repr*/
3246     0,                          /*tp_as_number*/
3247     0,                          /*tp_as_sequence*/
3248     0,                          /*tp_as_mapping*/
3249     0,                          /*tp_hash */
3250     0,                          /*tp_call*/
3251     0,                          /*tp_str*/
3252     0,                          /*tp_getattro*/
3253     0,                          /*tp_setattro*/
3254     0,                          /*tp_as_buffer*/
3255     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
3256         | Py_TPFLAGS_HAVE_GC,   /*tp_flags*/
3257     _io_TextIOWrapper___init____doc__, /* tp_doc */
3258     (traverseproc)textiowrapper_traverse, /* tp_traverse */
3259     (inquiry)textiowrapper_clear, /* tp_clear */
3260     0,                          /* tp_richcompare */
3261     offsetof(textio, weakreflist), /*tp_weaklistoffset*/
3262     0,                          /* tp_iter */
3263     (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3264     textiowrapper_methods,      /* tp_methods */
3265     textiowrapper_members,      /* tp_members */
3266     textiowrapper_getset,       /* tp_getset */
3267     0,                          /* tp_base */
3268     0,                          /* tp_dict */
3269     0,                          /* tp_descr_get */
3270     0,                          /* tp_descr_set */
3271     offsetof(textio, dict), /*tp_dictoffset*/
3272     _io_TextIOWrapper___init__, /* tp_init */
3273     0,                          /* tp_alloc */
3274     PyType_GenericNew,          /* tp_new */
3275     0,                          /* tp_free */
3276     0,                          /* tp_is_gc */
3277     0,                          /* tp_bases */
3278     0,                          /* tp_mro */
3279     0,                          /* tp_cache */
3280     0,                          /* tp_subclasses */
3281     0,                          /* tp_weaklist */
3282     0,                          /* tp_del */
3283     0,                          /* tp_version_tag */
3284     0,                          /* tp_finalize */
3285 };
3286