• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2     An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3 
4     Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5 
6     Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8 
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "structmember.h"
12 #include "_iomodule.h"
13 
14 /*[clinic input]
15 module _io
16 class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
17 class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
18 [clinic start generated code]*/
19 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
20 
21 /*[python input]
22 class io_ssize_t_converter(CConverter):
23     type = 'Py_ssize_t'
24     converter = '_PyIO_ConvertSsize_t'
25 [python start generated code]*/
26 /*[python end generated code: output=da39a3ee5e6b4b0d input=d0a811d3cbfd1b33]*/
27 
28 _Py_IDENTIFIER(close);
29 _Py_IDENTIFIER(_dealloc_warn);
30 _Py_IDENTIFIER(decode);
31 _Py_IDENTIFIER(fileno);
32 _Py_IDENTIFIER(flush);
33 _Py_IDENTIFIER(getpreferredencoding);
34 _Py_IDENTIFIER(isatty);
35 _Py_IDENTIFIER(mode);
36 _Py_IDENTIFIER(name);
37 _Py_IDENTIFIER(raw);
38 _Py_IDENTIFIER(read);
39 _Py_IDENTIFIER(read1);
40 _Py_IDENTIFIER(readable);
41 _Py_IDENTIFIER(replace);
42 _Py_IDENTIFIER(reset);
43 _Py_IDENTIFIER(seek);
44 _Py_IDENTIFIER(seekable);
45 _Py_IDENTIFIER(setstate);
46 _Py_IDENTIFIER(tell);
47 _Py_IDENTIFIER(writable);
48 
49 /* TextIOBase */
50 
51 PyDoc_STRVAR(textiobase_doc,
52     "Base class for text I/O.\n"
53     "\n"
54     "This class provides a character and line based interface to stream\n"
55     "I/O. There is no readinto method because Python's character strings\n"
56     "are immutable. There is no public constructor.\n"
57     );
58 
59 static PyObject *
_unsupported(const char * message)60 _unsupported(const char *message)
61 {
62     _PyIO_State *state = IO_STATE();
63     if (state != NULL)
64         PyErr_SetString(state->unsupported_operation, message);
65     return NULL;
66 }
67 
68 PyDoc_STRVAR(textiobase_detach_doc,
69     "Separate the underlying buffer from the TextIOBase and return it.\n"
70     "\n"
71     "After the underlying buffer has been detached, the TextIO is in an\n"
72     "unusable state.\n"
73     );
74 
75 static PyObject *
textiobase_detach(PyObject * self)76 textiobase_detach(PyObject *self)
77 {
78     return _unsupported("detach");
79 }
80 
81 PyDoc_STRVAR(textiobase_read_doc,
82     "Read at most n characters from stream.\n"
83     "\n"
84     "Read from underlying buffer until we have n characters or we hit EOF.\n"
85     "If n is negative or omitted, read until EOF.\n"
86     );
87 
88 static PyObject *
textiobase_read(PyObject * self,PyObject * args)89 textiobase_read(PyObject *self, PyObject *args)
90 {
91     return _unsupported("read");
92 }
93 
94 PyDoc_STRVAR(textiobase_readline_doc,
95     "Read until newline or EOF.\n"
96     "\n"
97     "Returns an empty string if EOF is hit immediately.\n"
98     );
99 
100 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)101 textiobase_readline(PyObject *self, PyObject *args)
102 {
103     return _unsupported("readline");
104 }
105 
106 PyDoc_STRVAR(textiobase_write_doc,
107     "Write string to stream.\n"
108     "Returns the number of characters written (which is always equal to\n"
109     "the length of the string).\n"
110     );
111 
112 static PyObject *
textiobase_write(PyObject * self,PyObject * args)113 textiobase_write(PyObject *self, PyObject *args)
114 {
115     return _unsupported("write");
116 }
117 
118 PyDoc_STRVAR(textiobase_encoding_doc,
119     "Encoding of the text stream.\n"
120     "\n"
121     "Subclasses should override.\n"
122     );
123 
124 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)125 textiobase_encoding_get(PyObject *self, void *context)
126 {
127     Py_RETURN_NONE;
128 }
129 
130 PyDoc_STRVAR(textiobase_newlines_doc,
131     "Line endings translated so far.\n"
132     "\n"
133     "Only line endings translated during reading are considered.\n"
134     "\n"
135     "Subclasses should override.\n"
136     );
137 
138 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)139 textiobase_newlines_get(PyObject *self, void *context)
140 {
141     Py_RETURN_NONE;
142 }
143 
144 PyDoc_STRVAR(textiobase_errors_doc,
145     "The error setting of the decoder or encoder.\n"
146     "\n"
147     "Subclasses should override.\n"
148     );
149 
150 static PyObject *
textiobase_errors_get(PyObject * self,void * context)151 textiobase_errors_get(PyObject *self, void *context)
152 {
153     Py_RETURN_NONE;
154 }
155 
156 
157 static PyMethodDef textiobase_methods[] = {
158     {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
159     {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
160     {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
161     {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
162     {NULL, NULL}
163 };
164 
165 static PyGetSetDef textiobase_getset[] = {
166     {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
167     {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
168     {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
169     {NULL}
170 };
171 
172 PyTypeObject PyTextIOBase_Type = {
173     PyVarObject_HEAD_INIT(NULL, 0)
174     "_io._TextIOBase",          /*tp_name*/
175     0,                          /*tp_basicsize*/
176     0,                          /*tp_itemsize*/
177     0,                          /*tp_dealloc*/
178     0,                          /*tp_print*/
179     0,                          /*tp_getattr*/
180     0,                          /*tp_setattr*/
181     0,                          /*tp_compare */
182     0,                          /*tp_repr*/
183     0,                          /*tp_as_number*/
184     0,                          /*tp_as_sequence*/
185     0,                          /*tp_as_mapping*/
186     0,                          /*tp_hash */
187     0,                          /*tp_call*/
188     0,                          /*tp_str*/
189     0,                          /*tp_getattro*/
190     0,                          /*tp_setattro*/
191     0,                          /*tp_as_buffer*/
192     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
193         | Py_TPFLAGS_HAVE_FINALIZE,  /*tp_flags*/
194     textiobase_doc,             /* tp_doc */
195     0,                          /* tp_traverse */
196     0,                          /* tp_clear */
197     0,                          /* tp_richcompare */
198     0,                          /* tp_weaklistoffset */
199     0,                          /* tp_iter */
200     0,                          /* tp_iternext */
201     textiobase_methods,         /* tp_methods */
202     0,                          /* tp_members */
203     textiobase_getset,          /* tp_getset */
204     &PyIOBase_Type,             /* tp_base */
205     0,                          /* tp_dict */
206     0,                          /* tp_descr_get */
207     0,                          /* tp_descr_set */
208     0,                          /* tp_dictoffset */
209     0,                          /* tp_init */
210     0,                          /* tp_alloc */
211     0,                          /* tp_new */
212     0,                          /* tp_free */
213     0,                          /* tp_is_gc */
214     0,                          /* tp_bases */
215     0,                          /* tp_mro */
216     0,                          /* tp_cache */
217     0,                          /* tp_subclasses */
218     0,                          /* tp_weaklist */
219     0,                          /* tp_del */
220     0,                          /* tp_version_tag */
221     0,                          /* tp_finalize */
222 };
223 
224 
225 /* IncrementalNewlineDecoder */
226 
227 typedef struct {
228     PyObject_HEAD
229     PyObject *decoder;
230     PyObject *errors;
231     unsigned int pendingcr: 1;
232     unsigned int translate: 1;
233     unsigned int seennl: 3;
234 } nldecoder_object;
235 
236 /*[clinic input]
237 _io.IncrementalNewlineDecoder.__init__
238     decoder: object
239     translate: int
240     errors: object(c_default="NULL") = "strict"
241 
242 Codec used when reading a file in universal newlines mode.
243 
244 It wraps another incremental decoder, translating \r\n and \r into \n.
245 It also records the types of newlines encountered.  When used with
246 translate=False, it ensures that the newline sequence is returned in
247 one piece. When used with decoder=None, it expects unicode strings as
248 decode input and translates newlines without first invoking an external
249 decoder.
250 [clinic start generated code]*/
251 
252 static int
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object * self,PyObject * decoder,int translate,PyObject * errors)253 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
254                                             PyObject *decoder, int translate,
255                                             PyObject *errors)
256 /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
257 {
258     self->decoder = decoder;
259     Py_INCREF(decoder);
260 
261     if (errors == NULL) {
262         self->errors = PyUnicode_FromString("strict");
263         if (self->errors == NULL)
264             return -1;
265     }
266     else {
267         Py_INCREF(errors);
268         self->errors = errors;
269     }
270 
271     self->translate = translate;
272     self->seennl = 0;
273     self->pendingcr = 0;
274 
275     return 0;
276 }
277 
278 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)279 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
280 {
281     Py_CLEAR(self->decoder);
282     Py_CLEAR(self->errors);
283     Py_TYPE(self)->tp_free((PyObject *)self);
284 }
285 
286 static int
check_decoded(PyObject * decoded)287 check_decoded(PyObject *decoded)
288 {
289     if (decoded == NULL)
290         return -1;
291     if (!PyUnicode_Check(decoded)) {
292         PyErr_Format(PyExc_TypeError,
293                      "decoder should return a string result, not '%.200s'",
294                      Py_TYPE(decoded)->tp_name);
295         Py_DECREF(decoded);
296         return -1;
297     }
298     if (PyUnicode_READY(decoded) < 0) {
299         Py_DECREF(decoded);
300         return -1;
301     }
302     return 0;
303 }
304 
305 #define SEEN_CR   1
306 #define SEEN_LF   2
307 #define SEEN_CRLF 4
308 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
309 
310 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * myself,PyObject * input,int final)311 _PyIncrementalNewlineDecoder_decode(PyObject *myself,
312                                     PyObject *input, int final)
313 {
314     PyObject *output;
315     Py_ssize_t output_len;
316     nldecoder_object *self = (nldecoder_object *) myself;
317 
318     if (self->decoder == NULL) {
319         PyErr_SetString(PyExc_ValueError,
320                         "IncrementalNewlineDecoder.__init__ not called");
321         return NULL;
322     }
323 
324     /* decode input (with the eventual \r from a previous pass) */
325     if (self->decoder != Py_None) {
326         output = PyObject_CallMethodObjArgs(self->decoder,
327             _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
328     }
329     else {
330         output = input;
331         Py_INCREF(output);
332     }
333 
334     if (check_decoded(output) < 0)
335         return NULL;
336 
337     output_len = PyUnicode_GET_LENGTH(output);
338     if (self->pendingcr && (final || output_len > 0)) {
339         /* Prefix output with CR */
340         int kind;
341         PyObject *modified;
342         char *out;
343 
344         modified = PyUnicode_New(output_len + 1,
345                                  PyUnicode_MAX_CHAR_VALUE(output));
346         if (modified == NULL)
347             goto error;
348         kind = PyUnicode_KIND(modified);
349         out = PyUnicode_DATA(modified);
350         PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
351         memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
352         Py_DECREF(output);
353         output = modified; /* output remains ready */
354         self->pendingcr = 0;
355         output_len++;
356     }
357 
358     /* retain last \r even when not translating data:
359      * then readline() is sure to get \r\n in one pass
360      */
361     if (!final) {
362         if (output_len > 0
363             && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
364         {
365             PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
366             if (modified == NULL)
367                 goto error;
368             Py_DECREF(output);
369             output = modified;
370             self->pendingcr = 1;
371         }
372     }
373 
374     /* Record which newlines are read and do newline translation if desired,
375        all in one pass. */
376     {
377         void *in_str;
378         Py_ssize_t len;
379         int seennl = self->seennl;
380         int only_lf = 0;
381         int kind;
382 
383         in_str = PyUnicode_DATA(output);
384         len = PyUnicode_GET_LENGTH(output);
385         kind = PyUnicode_KIND(output);
386 
387         if (len == 0)
388             return output;
389 
390         /* If, up to now, newlines are consistently \n, do a quick check
391            for the \r *byte* with the libc's optimized memchr.
392            */
393         if (seennl == SEEN_LF || seennl == 0) {
394             only_lf = (memchr(in_str, '\r', kind * len) == NULL);
395         }
396 
397         if (only_lf) {
398             /* If not already seen, quick scan for a possible "\n" character.
399                (there's nothing else to be done, even when in translation mode)
400             */
401             if (seennl == 0 &&
402                 memchr(in_str, '\n', kind * len) != NULL) {
403                 if (kind == PyUnicode_1BYTE_KIND)
404                     seennl |= SEEN_LF;
405                 else {
406                     Py_ssize_t i = 0;
407                     for (;;) {
408                         Py_UCS4 c;
409                         /* Fast loop for non-control characters */
410                         while (PyUnicode_READ(kind, in_str, i) > '\n')
411                             i++;
412                         c = PyUnicode_READ(kind, in_str, i++);
413                         if (c == '\n') {
414                             seennl |= SEEN_LF;
415                             break;
416                         }
417                         if (i >= len)
418                             break;
419                     }
420                 }
421             }
422             /* Finished: we have scanned for newlines, and none of them
423                need translating */
424         }
425         else if (!self->translate) {
426             Py_ssize_t i = 0;
427             /* We have already seen all newline types, no need to scan again */
428             if (seennl == SEEN_ALL)
429                 goto endscan;
430             for (;;) {
431                 Py_UCS4 c;
432                 /* Fast loop for non-control characters */
433                 while (PyUnicode_READ(kind, in_str, i) > '\r')
434                     i++;
435                 c = PyUnicode_READ(kind, in_str, i++);
436                 if (c == '\n')
437                     seennl |= SEEN_LF;
438                 else if (c == '\r') {
439                     if (PyUnicode_READ(kind, in_str, i) == '\n') {
440                         seennl |= SEEN_CRLF;
441                         i++;
442                     }
443                     else
444                         seennl |= SEEN_CR;
445                 }
446                 if (i >= len)
447                     break;
448                 if (seennl == SEEN_ALL)
449                     break;
450             }
451         endscan:
452             ;
453         }
454         else {
455             void *translated;
456             int kind = PyUnicode_KIND(output);
457             void *in_str = PyUnicode_DATA(output);
458             Py_ssize_t in, out;
459             /* XXX: Previous in-place translation here is disabled as
460                resizing is not possible anymore */
461             /* We could try to optimize this so that we only do a copy
462                when there is something to translate. On the other hand,
463                we already know there is a \r byte, so chances are high
464                that something needs to be done. */
465             translated = PyMem_Malloc(kind * len);
466             if (translated == NULL) {
467                 PyErr_NoMemory();
468                 goto error;
469             }
470             in = out = 0;
471             for (;;) {
472                 Py_UCS4 c;
473                 /* Fast loop for non-control characters */
474                 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
475                     PyUnicode_WRITE(kind, translated, out++, c);
476                 if (c == '\n') {
477                     PyUnicode_WRITE(kind, translated, out++, c);
478                     seennl |= SEEN_LF;
479                     continue;
480                 }
481                 if (c == '\r') {
482                     if (PyUnicode_READ(kind, in_str, in) == '\n') {
483                         in++;
484                         seennl |= SEEN_CRLF;
485                     }
486                     else
487                         seennl |= SEEN_CR;
488                     PyUnicode_WRITE(kind, translated, out++, '\n');
489                     continue;
490                 }
491                 if (in > len)
492                     break;
493                 PyUnicode_WRITE(kind, translated, out++, c);
494             }
495             Py_DECREF(output);
496             output = PyUnicode_FromKindAndData(kind, translated, out);
497             PyMem_Free(translated);
498             if (!output)
499                 return NULL;
500         }
501         self->seennl |= seennl;
502     }
503 
504     return output;
505 
506   error:
507     Py_DECREF(output);
508     return NULL;
509 }
510 
511 /*[clinic input]
512 _io.IncrementalNewlineDecoder.decode
513     input: object
514     final: int(c_default="0") = False
515 [clinic start generated code]*/
516 
517 static PyObject *
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object * self,PyObject * input,int final)518 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
519                                           PyObject *input, int final)
520 /*[clinic end generated code: output=0d486755bb37a66e input=d65677385bfd6827]*/
521 {
522     return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
523 }
524 
525 /*[clinic input]
526 _io.IncrementalNewlineDecoder.getstate
527 [clinic start generated code]*/
528 
529 static PyObject *
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object * self)530 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
531 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
532 {
533     PyObject *buffer;
534     unsigned long long flag;
535 
536     if (self->decoder != Py_None) {
537         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
538            _PyIO_str_getstate, NULL);
539         if (state == NULL)
540             return NULL;
541         if (!PyArg_ParseTuple(state, "OK", &buffer, &flag)) {
542             Py_DECREF(state);
543             return NULL;
544         }
545         Py_INCREF(buffer);
546         Py_DECREF(state);
547     }
548     else {
549         buffer = PyBytes_FromString("");
550         flag = 0;
551     }
552     flag <<= 1;
553     if (self->pendingcr)
554         flag |= 1;
555     return Py_BuildValue("NK", buffer, flag);
556 }
557 
558 /*[clinic input]
559 _io.IncrementalNewlineDecoder.setstate
560     state: object
561     /
562 [clinic start generated code]*/
563 
564 static PyObject *
_io_IncrementalNewlineDecoder_setstate(nldecoder_object * self,PyObject * state)565 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
566                                        PyObject *state)
567 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
568 {
569     PyObject *buffer;
570     unsigned long long flag;
571 
572     if (!PyArg_ParseTuple(state, "OK", &buffer, &flag))
573         return NULL;
574 
575     self->pendingcr = (int) (flag & 1);
576     flag >>= 1;
577 
578     if (self->decoder != Py_None)
579         return _PyObject_CallMethodId(self->decoder,
580                                       &PyId_setstate, "((OK))", buffer, flag);
581     else
582         Py_RETURN_NONE;
583 }
584 
585 /*[clinic input]
586 _io.IncrementalNewlineDecoder.reset
587 [clinic start generated code]*/
588 
589 static PyObject *
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object * self)590 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
591 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
592 {
593     self->seennl = 0;
594     self->pendingcr = 0;
595     if (self->decoder != Py_None)
596         return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
597     else
598         Py_RETURN_NONE;
599 }
600 
601 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)602 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
603 {
604     switch (self->seennl) {
605     case SEEN_CR:
606         return PyUnicode_FromString("\r");
607     case SEEN_LF:
608         return PyUnicode_FromString("\n");
609     case SEEN_CRLF:
610         return PyUnicode_FromString("\r\n");
611     case SEEN_CR | SEEN_LF:
612         return Py_BuildValue("ss", "\r", "\n");
613     case SEEN_CR | SEEN_CRLF:
614         return Py_BuildValue("ss", "\r", "\r\n");
615     case SEEN_LF | SEEN_CRLF:
616         return Py_BuildValue("ss", "\n", "\r\n");
617     case SEEN_CR | SEEN_LF | SEEN_CRLF:
618         return Py_BuildValue("sss", "\r", "\n", "\r\n");
619     default:
620         Py_RETURN_NONE;
621    }
622 
623 }
624 
625 /* TextIOWrapper */
626 
627 typedef PyObject *
628         (*encodefunc_t)(PyObject *, PyObject *);
629 
630 typedef struct
631 {
632     PyObject_HEAD
633     int ok; /* initialized? */
634     int detached;
635     Py_ssize_t chunk_size;
636     PyObject *buffer;
637     PyObject *encoding;
638     PyObject *encoder;
639     PyObject *decoder;
640     PyObject *readnl;
641     PyObject *errors;
642     const char *writenl; /* utf-8 encoded, NULL stands for \n */
643     char line_buffering;
644     char write_through;
645     char readuniversal;
646     char readtranslate;
647     char writetranslate;
648     char seekable;
649     char has_read1;
650     char telling;
651     char finalizing;
652     /* Specialized encoding func (see below) */
653     encodefunc_t encodefunc;
654     /* Whether or not it's the start of the stream */
655     char encoding_start_of_stream;
656 
657     /* Reads and writes are internally buffered in order to speed things up.
658        However, any read will first flush the write buffer if itsn't empty.
659 
660        Please also note that text to be written is first encoded before being
661        buffered. This is necessary so that encoding errors are immediately
662        reported to the caller, but it unfortunately means that the
663        IncrementalEncoder (whose encode() method is always written in Python)
664        becomes a bottleneck for small writes.
665     */
666     PyObject *decoded_chars;       /* buffer for text returned from decoder */
667     Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
668     PyObject *pending_bytes;       /* list of bytes objects waiting to be
669                                       written, or NULL */
670     Py_ssize_t pending_bytes_count;
671 
672     /* snapshot is either None, or a tuple (dec_flags, next_input) where
673      * dec_flags is the second (integer) item of the decoder state and
674      * next_input is the chunk of input bytes that comes next after the
675      * snapshot point.  We use this to reconstruct decoder states in tell().
676      */
677     PyObject *snapshot;
678     /* Bytes-to-characters ratio for the current chunk. Serves as input for
679        the heuristic in tell(). */
680     double b2cratio;
681 
682     /* Cache raw object if it's a FileIO object */
683     PyObject *raw;
684 
685     PyObject *weakreflist;
686     PyObject *dict;
687 } textio;
688 
689 /* A couple of specialized cases in order to bypass the slow incremental
690    encoding methods for the most popular encodings. */
691 
692 static PyObject *
ascii_encode(textio * self,PyObject * text)693 ascii_encode(textio *self, PyObject *text)
694 {
695     return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
696 }
697 
698 static PyObject *
utf16be_encode(textio * self,PyObject * text)699 utf16be_encode(textio *self, PyObject *text)
700 {
701     return _PyUnicode_EncodeUTF16(text,
702                                   PyBytes_AS_STRING(self->errors), 1);
703 }
704 
705 static PyObject *
utf16le_encode(textio * self,PyObject * text)706 utf16le_encode(textio *self, PyObject *text)
707 {
708     return _PyUnicode_EncodeUTF16(text,
709                                   PyBytes_AS_STRING(self->errors), -1);
710 }
711 
712 static PyObject *
utf16_encode(textio * self,PyObject * text)713 utf16_encode(textio *self, PyObject *text)
714 {
715     if (!self->encoding_start_of_stream) {
716         /* Skip the BOM and use native byte ordering */
717 #if PY_BIG_ENDIAN
718         return utf16be_encode(self, text);
719 #else
720         return utf16le_encode(self, text);
721 #endif
722     }
723     return _PyUnicode_EncodeUTF16(text,
724                                   PyBytes_AS_STRING(self->errors), 0);
725 }
726 
727 static PyObject *
utf32be_encode(textio * self,PyObject * text)728 utf32be_encode(textio *self, PyObject *text)
729 {
730     return _PyUnicode_EncodeUTF32(text,
731                                   PyBytes_AS_STRING(self->errors), 1);
732 }
733 
734 static PyObject *
utf32le_encode(textio * self,PyObject * text)735 utf32le_encode(textio *self, PyObject *text)
736 {
737     return _PyUnicode_EncodeUTF32(text,
738                                   PyBytes_AS_STRING(self->errors), -1);
739 }
740 
741 static PyObject *
utf32_encode(textio * self,PyObject * text)742 utf32_encode(textio *self, PyObject *text)
743 {
744     if (!self->encoding_start_of_stream) {
745         /* Skip the BOM and use native byte ordering */
746 #if PY_BIG_ENDIAN
747         return utf32be_encode(self, text);
748 #else
749         return utf32le_encode(self, text);
750 #endif
751     }
752     return _PyUnicode_EncodeUTF32(text,
753                                   PyBytes_AS_STRING(self->errors), 0);
754 }
755 
756 static PyObject *
utf8_encode(textio * self,PyObject * text)757 utf8_encode(textio *self, PyObject *text)
758 {
759     return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
760 }
761 
762 static PyObject *
latin1_encode(textio * self,PyObject * text)763 latin1_encode(textio *self, PyObject *text)
764 {
765     return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
766 }
767 
768 /* Map normalized encoding names onto the specialized encoding funcs */
769 
770 typedef struct {
771     const char *name;
772     encodefunc_t encodefunc;
773 } encodefuncentry;
774 
775 static const encodefuncentry encodefuncs[] = {
776     {"ascii",       (encodefunc_t) ascii_encode},
777     {"iso8859-1",   (encodefunc_t) latin1_encode},
778     {"utf-8",       (encodefunc_t) utf8_encode},
779     {"utf-16-be",   (encodefunc_t) utf16be_encode},
780     {"utf-16-le",   (encodefunc_t) utf16le_encode},
781     {"utf-16",      (encodefunc_t) utf16_encode},
782     {"utf-32-be",   (encodefunc_t) utf32be_encode},
783     {"utf-32-le",   (encodefunc_t) utf32le_encode},
784     {"utf-32",      (encodefunc_t) utf32_encode},
785     {NULL, NULL}
786 };
787 
788 
789 /*[clinic input]
790 _io.TextIOWrapper.__init__
791     buffer: object
792     encoding: str(accept={str, NoneType}) = NULL
793     errors: str(accept={str, NoneType}) = NULL
794     newline: str(accept={str, NoneType}) = NULL
795     line_buffering: int(c_default="0") = False
796     write_through: int(c_default="0") = False
797 
798 Character and line based layer over a BufferedIOBase object, buffer.
799 
800 encoding gives the name of the encoding that the stream will be
801 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
802 
803 errors determines the strictness of encoding and decoding (see
804 help(codecs.Codec) or the documentation for codecs.register) and
805 defaults to "strict".
806 
807 newline controls how line endings are handled. It can be None, '',
808 '\n', '\r', and '\r\n'.  It works as follows:
809 
810 * On input, if newline is None, universal newlines mode is
811   enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
812   these are translated into '\n' before being returned to the
813   caller. If it is '', universal newline mode is enabled, but line
814   endings are returned to the caller untranslated. If it has any of
815   the other legal values, input lines are only terminated by the given
816   string, and the line ending is returned to the caller untranslated.
817 
818 * On output, if newline is None, any '\n' characters written are
819   translated to the system default line separator, os.linesep. If
820   newline is '' or '\n', no translation takes place. If newline is any
821   of the other legal values, any '\n' characters written are translated
822   to the given string.
823 
824 If line_buffering is True, a call to flush is implied when a call to
825 write contains a newline character.
826 [clinic start generated code]*/
827 
828 static int
_io_TextIOWrapper___init___impl(textio * self,PyObject * buffer,const char * encoding,const char * errors,const char * newline,int line_buffering,int write_through)829 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
830                                 const char *encoding, const char *errors,
831                                 const char *newline, int line_buffering,
832                                 int write_through)
833 /*[clinic end generated code: output=56a83402ce2a8381 input=3126cb3101a2c99b]*/
834 {
835     PyObject *raw, *codec_info = NULL;
836     _PyIO_State *state = NULL;
837     PyObject *res;
838     int r;
839 
840     self->ok = 0;
841     self->detached = 0;
842 
843     if (newline && newline[0] != '\0'
844         && !(newline[0] == '\n' && newline[1] == '\0')
845         && !(newline[0] == '\r' && newline[1] == '\0')
846         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
847         PyErr_Format(PyExc_ValueError,
848                      "illegal newline value: %s", newline);
849         return -1;
850     }
851 
852     Py_CLEAR(self->buffer);
853     Py_CLEAR(self->encoding);
854     Py_CLEAR(self->encoder);
855     Py_CLEAR(self->decoder);
856     Py_CLEAR(self->readnl);
857     Py_CLEAR(self->decoded_chars);
858     Py_CLEAR(self->pending_bytes);
859     Py_CLEAR(self->snapshot);
860     Py_CLEAR(self->errors);
861     Py_CLEAR(self->raw);
862     self->decoded_chars_used = 0;
863     self->pending_bytes_count = 0;
864     self->encodefunc = NULL;
865     self->b2cratio = 0.0;
866 
867     if (encoding == NULL) {
868         /* Try os.device_encoding(fileno) */
869         PyObject *fileno;
870         state = IO_STATE();
871         if (state == NULL)
872             goto error;
873         fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
874         /* Ignore only AttributeError and UnsupportedOperation */
875         if (fileno == NULL) {
876             if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
877                 PyErr_ExceptionMatches(state->unsupported_operation)) {
878                 PyErr_Clear();
879             }
880             else {
881                 goto error;
882             }
883         }
884         else {
885             int fd = _PyLong_AsInt(fileno);
886             Py_DECREF(fileno);
887             if (fd == -1 && PyErr_Occurred()) {
888                 goto error;
889             }
890 
891             self->encoding = _Py_device_encoding(fd);
892             if (self->encoding == NULL)
893                 goto error;
894             else if (!PyUnicode_Check(self->encoding))
895                 Py_CLEAR(self->encoding);
896         }
897     }
898     if (encoding == NULL && self->encoding == NULL) {
899         PyObject *locale_module = _PyIO_get_locale_module(state);
900         if (locale_module == NULL)
901             goto catch_ImportError;
902         self->encoding = _PyObject_CallMethodId(
903             locale_module, &PyId_getpreferredencoding, "O", Py_False);
904         Py_DECREF(locale_module);
905         if (self->encoding == NULL) {
906           catch_ImportError:
907             /*
908              Importing locale can raise an ImportError because of
909              _functools, and locale.getpreferredencoding can raise an
910              ImportError if _locale is not available.  These will happen
911              during module building.
912             */
913             if (PyErr_ExceptionMatches(PyExc_ImportError)) {
914                 PyErr_Clear();
915                 self->encoding = PyUnicode_FromString("ascii");
916             }
917             else
918                 goto error;
919         }
920         else if (!PyUnicode_Check(self->encoding))
921             Py_CLEAR(self->encoding);
922     }
923     if (self->encoding != NULL) {
924         encoding = PyUnicode_AsUTF8(self->encoding);
925         if (encoding == NULL)
926             goto error;
927     }
928     else if (encoding != NULL) {
929         self->encoding = PyUnicode_FromString(encoding);
930         if (self->encoding == NULL)
931             goto error;
932     }
933     else {
934         PyErr_SetString(PyExc_IOError,
935                         "could not determine default encoding");
936     }
937 
938     /* Check we have been asked for a real text encoding */
939     codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
940     if (codec_info == NULL) {
941         Py_CLEAR(self->encoding);
942         goto error;
943     }
944 
945     /* XXX: Failures beyond this point have the potential to leak elements
946      * of the partially constructed object (like self->encoding)
947      */
948 
949     if (errors == NULL)
950         errors = "strict";
951     self->errors = PyBytes_FromString(errors);
952     if (self->errors == NULL)
953         goto error;
954 
955     self->chunk_size = 8192;
956     self->readuniversal = (newline == NULL || newline[0] == '\0');
957     self->line_buffering = line_buffering;
958     self->write_through = write_through;
959     self->readtranslate = (newline == NULL);
960     if (newline) {
961         self->readnl = PyUnicode_FromString(newline);
962         if (self->readnl == NULL)
963             goto error;
964     }
965     self->writetranslate = (newline == NULL || newline[0] != '\0');
966     if (!self->readuniversal && self->readnl) {
967         self->writenl = PyUnicode_AsUTF8(self->readnl);
968         if (self->writenl == NULL)
969             goto error;
970         if (!strcmp(self->writenl, "\n"))
971             self->writenl = NULL;
972     }
973 #ifdef MS_WINDOWS
974     else
975         self->writenl = "\r\n";
976 #endif
977 
978     /* Build the decoder object */
979     res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
980     if (res == NULL)
981         goto error;
982     r = PyObject_IsTrue(res);
983     Py_DECREF(res);
984     if (r == -1)
985         goto error;
986     if (r == 1) {
987         self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
988                                                            errors);
989         if (self->decoder == NULL)
990             goto error;
991 
992         if (self->readuniversal) {
993             PyObject *incrementalDecoder = PyObject_CallFunction(
994                 (PyObject *)&PyIncrementalNewlineDecoder_Type,
995                 "Oi", self->decoder, (int)self->readtranslate);
996             if (incrementalDecoder == NULL)
997                 goto error;
998             Py_XSETREF(self->decoder, incrementalDecoder);
999         }
1000     }
1001 
1002     /* Build the encoder object */
1003     res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
1004     if (res == NULL)
1005         goto error;
1006     r = PyObject_IsTrue(res);
1007     Py_DECREF(res);
1008     if (r == -1)
1009         goto error;
1010     if (r == 1) {
1011         self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
1012                                                            errors);
1013         if (self->encoder == NULL)
1014             goto error;
1015         /* Get the normalized name of the codec */
1016         res = _PyObject_GetAttrId(codec_info, &PyId_name);
1017         if (res == NULL) {
1018             if (PyErr_ExceptionMatches(PyExc_AttributeError))
1019                 PyErr_Clear();
1020             else
1021                 goto error;
1022         }
1023         else if (PyUnicode_Check(res)) {
1024             const encodefuncentry *e = encodefuncs;
1025             while (e->name != NULL) {
1026                 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
1027                     self->encodefunc = e->encodefunc;
1028                     break;
1029                 }
1030                 e++;
1031             }
1032         }
1033         Py_XDECREF(res);
1034     }
1035 
1036     /* Finished sorting out the codec details */
1037     Py_CLEAR(codec_info);
1038 
1039     self->buffer = buffer;
1040     Py_INCREF(buffer);
1041 
1042     if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1043         Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1044         Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1045         raw = _PyObject_GetAttrId(buffer, &PyId_raw);
1046         /* Cache the raw FileIO object to speed up 'closed' checks */
1047         if (raw == NULL) {
1048             if (PyErr_ExceptionMatches(PyExc_AttributeError))
1049                 PyErr_Clear();
1050             else
1051                 goto error;
1052         }
1053         else if (Py_TYPE(raw) == &PyFileIO_Type)
1054             self->raw = raw;
1055         else
1056             Py_DECREF(raw);
1057     }
1058 
1059     res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
1060     if (res == NULL)
1061         goto error;
1062     r = PyObject_IsTrue(res);
1063     Py_DECREF(res);
1064     if (r < 0)
1065         goto error;
1066     self->seekable = self->telling = r;
1067 
1068     self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
1069 
1070     self->encoding_start_of_stream = 0;
1071     if (self->seekable && self->encoder) {
1072         PyObject *cookieObj;
1073         int cmp;
1074 
1075         self->encoding_start_of_stream = 1;
1076 
1077         cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1078         if (cookieObj == NULL)
1079             goto error;
1080 
1081         cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1082         Py_DECREF(cookieObj);
1083         if (cmp < 0) {
1084             goto error;
1085         }
1086 
1087         if (cmp == 0) {
1088             self->encoding_start_of_stream = 0;
1089             res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1090                                              _PyIO_zero, NULL);
1091             if (res == NULL)
1092                 goto error;
1093             Py_DECREF(res);
1094         }
1095     }
1096 
1097     self->ok = 1;
1098     return 0;
1099 
1100   error:
1101     Py_XDECREF(codec_info);
1102     return -1;
1103 }
1104 
1105 static int
textiowrapper_clear(textio * self)1106 textiowrapper_clear(textio *self)
1107 {
1108     self->ok = 0;
1109     Py_CLEAR(self->buffer);
1110     Py_CLEAR(self->encoding);
1111     Py_CLEAR(self->encoder);
1112     Py_CLEAR(self->decoder);
1113     Py_CLEAR(self->readnl);
1114     Py_CLEAR(self->decoded_chars);
1115     Py_CLEAR(self->pending_bytes);
1116     Py_CLEAR(self->snapshot);
1117     Py_CLEAR(self->errors);
1118     Py_CLEAR(self->raw);
1119 
1120     Py_CLEAR(self->dict);
1121     return 0;
1122 }
1123 
1124 static void
textiowrapper_dealloc(textio * self)1125 textiowrapper_dealloc(textio *self)
1126 {
1127     self->finalizing = 1;
1128     if (_PyIOBase_finalize((PyObject *) self) < 0)
1129         return;
1130     self->ok = 0;
1131     _PyObject_GC_UNTRACK(self);
1132     if (self->weakreflist != NULL)
1133         PyObject_ClearWeakRefs((PyObject *)self);
1134     textiowrapper_clear(self);
1135     Py_TYPE(self)->tp_free((PyObject *)self);
1136 }
1137 
1138 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1139 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1140 {
1141     Py_VISIT(self->buffer);
1142     Py_VISIT(self->encoding);
1143     Py_VISIT(self->encoder);
1144     Py_VISIT(self->decoder);
1145     Py_VISIT(self->readnl);
1146     Py_VISIT(self->decoded_chars);
1147     Py_VISIT(self->pending_bytes);
1148     Py_VISIT(self->snapshot);
1149     Py_VISIT(self->errors);
1150     Py_VISIT(self->raw);
1151 
1152     Py_VISIT(self->dict);
1153     return 0;
1154 }
1155 
1156 static PyObject *
1157 textiowrapper_closed_get(textio *self, void *context);
1158 
1159 /* This macro takes some shortcuts to make the common case faster. */
1160 #define CHECK_CLOSED(self) \
1161     do { \
1162         int r; \
1163         PyObject *_res; \
1164         if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1165             if (self->raw != NULL) \
1166                 r = _PyFileIO_closed(self->raw); \
1167             else { \
1168                 _res = textiowrapper_closed_get(self, NULL); \
1169                 if (_res == NULL) \
1170                     return NULL; \
1171                 r = PyObject_IsTrue(_res); \
1172                 Py_DECREF(_res); \
1173                 if (r < 0) \
1174                     return NULL; \
1175             } \
1176             if (r > 0) { \
1177                 PyErr_SetString(PyExc_ValueError, \
1178                                 "I/O operation on closed file."); \
1179                 return NULL; \
1180             } \
1181         } \
1182         else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1183             return NULL; \
1184     } while (0)
1185 
1186 #define CHECK_INITIALIZED(self) \
1187     if (self->ok <= 0) { \
1188         PyErr_SetString(PyExc_ValueError, \
1189             "I/O operation on uninitialized object"); \
1190         return NULL; \
1191     }
1192 
1193 #define CHECK_ATTACHED(self) \
1194     CHECK_INITIALIZED(self); \
1195     if (self->detached) { \
1196         PyErr_SetString(PyExc_ValueError, \
1197              "underlying buffer has been detached"); \
1198         return NULL; \
1199     }
1200 
1201 #define CHECK_ATTACHED_INT(self) \
1202     if (self->ok <= 0) { \
1203         PyErr_SetString(PyExc_ValueError, \
1204             "I/O operation on uninitialized object"); \
1205         return -1; \
1206     } else if (self->detached) { \
1207         PyErr_SetString(PyExc_ValueError, \
1208              "underlying buffer has been detached"); \
1209         return -1; \
1210     }
1211 
1212 
1213 /*[clinic input]
1214 _io.TextIOWrapper.detach
1215 [clinic start generated code]*/
1216 
1217 static PyObject *
_io_TextIOWrapper_detach_impl(textio * self)1218 _io_TextIOWrapper_detach_impl(textio *self)
1219 /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1220 {
1221     PyObject *buffer, *res;
1222     CHECK_ATTACHED(self);
1223     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1224     if (res == NULL)
1225         return NULL;
1226     Py_DECREF(res);
1227     buffer = self->buffer;
1228     self->buffer = NULL;
1229     self->detached = 1;
1230     return buffer;
1231 }
1232 
1233 /* Flush the internal write buffer. This doesn't explicitly flush the
1234    underlying buffered object, though. */
1235 static int
_textiowrapper_writeflush(textio * self)1236 _textiowrapper_writeflush(textio *self)
1237 {
1238     PyObject *pending, *b, *ret;
1239 
1240     if (self->pending_bytes == NULL)
1241         return 0;
1242 
1243     pending = self->pending_bytes;
1244     Py_INCREF(pending);
1245     self->pending_bytes_count = 0;
1246     Py_CLEAR(self->pending_bytes);
1247 
1248     b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1249     Py_DECREF(pending);
1250     if (b == NULL)
1251         return -1;
1252     ret = NULL;
1253     do {
1254         ret = PyObject_CallMethodObjArgs(self->buffer,
1255                                          _PyIO_str_write, b, NULL);
1256     } while (ret == NULL && _PyIO_trap_eintr());
1257     Py_DECREF(b);
1258     if (ret == NULL)
1259         return -1;
1260     Py_DECREF(ret);
1261     return 0;
1262 }
1263 
1264 /*[clinic input]
1265 _io.TextIOWrapper.write
1266     text: unicode
1267     /
1268 [clinic start generated code]*/
1269 
1270 static PyObject *
_io_TextIOWrapper_write_impl(textio * self,PyObject * text)1271 _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1272 /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1273 {
1274     PyObject *ret;
1275     PyObject *b;
1276     Py_ssize_t textlen;
1277     int haslf = 0;
1278     int needflush = 0, text_needflush = 0;
1279 
1280     if (PyUnicode_READY(text) == -1)
1281         return NULL;
1282 
1283     CHECK_ATTACHED(self);
1284     CHECK_CLOSED(self);
1285 
1286     if (self->encoder == NULL)
1287         return _unsupported("not writable");
1288 
1289     Py_INCREF(text);
1290 
1291     textlen = PyUnicode_GET_LENGTH(text);
1292 
1293     if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1294         if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1295             haslf = 1;
1296 
1297     if (haslf && self->writetranslate && self->writenl != NULL) {
1298         PyObject *newtext = _PyObject_CallMethodId(
1299             text, &PyId_replace, "ss", "\n", self->writenl);
1300         Py_DECREF(text);
1301         if (newtext == NULL)
1302             return NULL;
1303         text = newtext;
1304     }
1305 
1306     if (self->write_through)
1307         text_needflush = 1;
1308     if (self->line_buffering &&
1309         (haslf ||
1310          PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1311         needflush = 1;
1312 
1313     /* XXX What if we were just reading? */
1314     if (self->encodefunc != NULL) {
1315         b = (*self->encodefunc)((PyObject *) self, text);
1316         self->encoding_start_of_stream = 0;
1317     }
1318     else
1319         b = PyObject_CallMethodObjArgs(self->encoder,
1320                                        _PyIO_str_encode, text, NULL);
1321     Py_DECREF(text);
1322     if (b == NULL)
1323         return NULL;
1324 
1325     if (self->pending_bytes == NULL) {
1326         self->pending_bytes = PyList_New(0);
1327         if (self->pending_bytes == NULL) {
1328             Py_DECREF(b);
1329             return NULL;
1330         }
1331         self->pending_bytes_count = 0;
1332     }
1333     if (PyList_Append(self->pending_bytes, b) < 0) {
1334         Py_DECREF(b);
1335         return NULL;
1336     }
1337     self->pending_bytes_count += PyBytes_GET_SIZE(b);
1338     Py_DECREF(b);
1339     if (self->pending_bytes_count > self->chunk_size || needflush ||
1340         text_needflush) {
1341         if (_textiowrapper_writeflush(self) < 0)
1342             return NULL;
1343     }
1344 
1345     if (needflush) {
1346         ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1347         if (ret == NULL)
1348             return NULL;
1349         Py_DECREF(ret);
1350     }
1351 
1352     Py_CLEAR(self->snapshot);
1353 
1354     if (self->decoder) {
1355         ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
1356         if (ret == NULL)
1357             return NULL;
1358         Py_DECREF(ret);
1359     }
1360 
1361     return PyLong_FromSsize_t(textlen);
1362 }
1363 
1364 /* Steal a reference to chars and store it in the decoded_char buffer;
1365  */
1366 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1367 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1368 {
1369     Py_XSETREF(self->decoded_chars, chars);
1370     self->decoded_chars_used = 0;
1371 }
1372 
1373 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1374 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1375 {
1376     PyObject *chars;
1377     Py_ssize_t avail;
1378 
1379     if (self->decoded_chars == NULL)
1380         return PyUnicode_FromStringAndSize(NULL, 0);
1381 
1382     /* decoded_chars is guaranteed to be "ready". */
1383     avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1384              - self->decoded_chars_used);
1385 
1386     assert(avail >= 0);
1387 
1388     if (n < 0 || n > avail)
1389         n = avail;
1390 
1391     if (self->decoded_chars_used > 0 || n < avail) {
1392         chars = PyUnicode_Substring(self->decoded_chars,
1393                                     self->decoded_chars_used,
1394                                     self->decoded_chars_used + n);
1395         if (chars == NULL)
1396             return NULL;
1397     }
1398     else {
1399         chars = self->decoded_chars;
1400         Py_INCREF(chars);
1401     }
1402 
1403     self->decoded_chars_used += n;
1404     return chars;
1405 }
1406 
1407 /* Read and decode the next chunk of data from the BufferedReader.
1408  */
1409 static int
textiowrapper_read_chunk(textio * self,Py_ssize_t size_hint)1410 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1411 {
1412     PyObject *dec_buffer = NULL;
1413     PyObject *dec_flags = NULL;
1414     PyObject *input_chunk = NULL;
1415     Py_buffer input_chunk_buf;
1416     PyObject *decoded_chars, *chunk_size;
1417     Py_ssize_t nbytes, nchars;
1418     int eof;
1419 
1420     /* The return value is True unless EOF was reached.  The decoded string is
1421      * placed in self._decoded_chars (replacing its previous value).  The
1422      * entire input chunk is sent to the decoder, though some of it may remain
1423      * buffered in the decoder, yet to be converted.
1424      */
1425 
1426     if (self->decoder == NULL) {
1427         _unsupported("not readable");
1428         return -1;
1429     }
1430 
1431     if (self->telling) {
1432         /* To prepare for tell(), we need to snapshot a point in the file
1433          * where the decoder's input buffer is empty.
1434          */
1435 
1436         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1437                                                      _PyIO_str_getstate, NULL);
1438         if (state == NULL)
1439             return -1;
1440         /* Given this, we know there was a valid snapshot point
1441          * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1442          */
1443         if (PyArg_ParseTuple(state, "OO", &dec_buffer, &dec_flags) < 0) {
1444             Py_DECREF(state);
1445             return -1;
1446         }
1447 
1448         if (!PyBytes_Check(dec_buffer)) {
1449             PyErr_Format(PyExc_TypeError,
1450                          "decoder getstate() should have returned a bytes "
1451                          "object, not '%.200s'",
1452                          Py_TYPE(dec_buffer)->tp_name);
1453             Py_DECREF(state);
1454             return -1;
1455         }
1456         Py_INCREF(dec_buffer);
1457         Py_INCREF(dec_flags);
1458         Py_DECREF(state);
1459     }
1460 
1461     /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1462     if (size_hint > 0) {
1463         size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1464     }
1465     chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1466     if (chunk_size == NULL)
1467         goto fail;
1468 
1469     input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1470         (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1471         chunk_size, NULL);
1472     Py_DECREF(chunk_size);
1473     if (input_chunk == NULL)
1474         goto fail;
1475 
1476     if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1477         PyErr_Format(PyExc_TypeError,
1478                      "underlying %s() should have returned a bytes-like object, "
1479                      "not '%.200s'", (self->has_read1 ? "read1": "read"),
1480                      Py_TYPE(input_chunk)->tp_name);
1481         goto fail;
1482     }
1483 
1484     nbytes = input_chunk_buf.len;
1485     eof = (nbytes == 0);
1486     if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1487         decoded_chars = _PyIncrementalNewlineDecoder_decode(
1488             self->decoder, input_chunk, eof);
1489     }
1490     else {
1491         decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1492             _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1493     }
1494     PyBuffer_Release(&input_chunk_buf);
1495 
1496     if (check_decoded(decoded_chars) < 0)
1497         goto fail;
1498     textiowrapper_set_decoded_chars(self, decoded_chars);
1499     nchars = PyUnicode_GET_LENGTH(decoded_chars);
1500     if (nchars > 0)
1501         self->b2cratio = (double) nbytes / nchars;
1502     else
1503         self->b2cratio = 0.0;
1504     if (nchars > 0)
1505         eof = 0;
1506 
1507     if (self->telling) {
1508         /* At the snapshot point, len(dec_buffer) bytes before the read, the
1509          * next input to be decoded is dec_buffer + input_chunk.
1510          */
1511         PyObject *next_input = dec_buffer;
1512         PyBytes_Concat(&next_input, input_chunk);
1513         if (next_input == NULL) {
1514             dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1515             goto fail;
1516         }
1517         Py_XSETREF(self->snapshot, Py_BuildValue("NN", dec_flags, next_input));
1518     }
1519     Py_DECREF(input_chunk);
1520 
1521     return (eof == 0);
1522 
1523   fail:
1524     Py_XDECREF(dec_buffer);
1525     Py_XDECREF(dec_flags);
1526     Py_XDECREF(input_chunk);
1527     return -1;
1528 }
1529 
1530 /*[clinic input]
1531 _io.TextIOWrapper.read
1532     size as n: io_ssize_t = -1
1533     /
1534 [clinic start generated code]*/
1535 
1536 static PyObject *
_io_TextIOWrapper_read_impl(textio * self,Py_ssize_t n)1537 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1538 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=8c09398424085cca]*/
1539 {
1540     PyObject *result = NULL, *chunks = NULL;
1541 
1542     CHECK_ATTACHED(self);
1543     CHECK_CLOSED(self);
1544 
1545     if (self->decoder == NULL)
1546         return _unsupported("not readable");
1547 
1548     if (_textiowrapper_writeflush(self) < 0)
1549         return NULL;
1550 
1551     if (n < 0) {
1552         /* Read everything */
1553         PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
1554         PyObject *decoded;
1555         if (bytes == NULL)
1556             goto fail;
1557 
1558         if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1559             decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1560                                                           bytes, 1);
1561         else
1562             decoded = PyObject_CallMethodObjArgs(
1563                 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
1564         Py_DECREF(bytes);
1565         if (check_decoded(decoded) < 0)
1566             goto fail;
1567 
1568         result = textiowrapper_get_decoded_chars(self, -1);
1569 
1570         if (result == NULL) {
1571             Py_DECREF(decoded);
1572             return NULL;
1573         }
1574 
1575         PyUnicode_AppendAndDel(&result, decoded);
1576         if (result == NULL)
1577             goto fail;
1578 
1579         Py_CLEAR(self->snapshot);
1580         return result;
1581     }
1582     else {
1583         int res = 1;
1584         Py_ssize_t remaining = n;
1585 
1586         result = textiowrapper_get_decoded_chars(self, n);
1587         if (result == NULL)
1588             goto fail;
1589         if (PyUnicode_READY(result) == -1)
1590             goto fail;
1591         remaining -= PyUnicode_GET_LENGTH(result);
1592 
1593         /* Keep reading chunks until we have n characters to return */
1594         while (remaining > 0) {
1595             res = textiowrapper_read_chunk(self, remaining);
1596             if (res < 0) {
1597                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1598                    when EINTR occurs so we needn't do it ourselves. */
1599                 if (_PyIO_trap_eintr()) {
1600                     continue;
1601                 }
1602                 goto fail;
1603             }
1604             if (res == 0)  /* EOF */
1605                 break;
1606             if (chunks == NULL) {
1607                 chunks = PyList_New(0);
1608                 if (chunks == NULL)
1609                     goto fail;
1610             }
1611             if (PyUnicode_GET_LENGTH(result) > 0 &&
1612                 PyList_Append(chunks, result) < 0)
1613                 goto fail;
1614             Py_DECREF(result);
1615             result = textiowrapper_get_decoded_chars(self, remaining);
1616             if (result == NULL)
1617                 goto fail;
1618             remaining -= PyUnicode_GET_LENGTH(result);
1619         }
1620         if (chunks != NULL) {
1621             if (result != NULL && PyList_Append(chunks, result) < 0)
1622                 goto fail;
1623             Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
1624             if (result == NULL)
1625                 goto fail;
1626             Py_CLEAR(chunks);
1627         }
1628         return result;
1629     }
1630   fail:
1631     Py_XDECREF(result);
1632     Py_XDECREF(chunks);
1633     return NULL;
1634 }
1635 
1636 
1637 /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
1638    that is to the NUL character. Otherwise the function will produce
1639    incorrect results. */
1640 static const char *
find_control_char(int kind,const char * s,const char * end,Py_UCS4 ch)1641 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
1642 {
1643     if (kind == PyUnicode_1BYTE_KIND) {
1644         assert(ch < 256);
1645         return (char *) memchr((void *) s, (char) ch, end - s);
1646     }
1647     for (;;) {
1648         while (PyUnicode_READ(kind, s, 0) > ch)
1649             s += kind;
1650         if (PyUnicode_READ(kind, s, 0) == ch)
1651             return s;
1652         if (s == end)
1653             return NULL;
1654         s += kind;
1655     }
1656 }
1657 
1658 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,int kind,const char * start,const char * end,Py_ssize_t * consumed)1659 _PyIO_find_line_ending(
1660     int translated, int universal, PyObject *readnl,
1661     int kind, const char *start, const char *end, Py_ssize_t *consumed)
1662 {
1663     Py_ssize_t len = ((char*)end - (char*)start)/kind;
1664 
1665     if (translated) {
1666         /* Newlines are already translated, only search for \n */
1667         const char *pos = find_control_char(kind, start, end, '\n');
1668         if (pos != NULL)
1669             return (pos - start)/kind + 1;
1670         else {
1671             *consumed = len;
1672             return -1;
1673         }
1674     }
1675     else if (universal) {
1676         /* Universal newline search. Find any of \r, \r\n, \n
1677          * The decoder ensures that \r\n are not split in two pieces
1678          */
1679         const char *s = start;
1680         for (;;) {
1681             Py_UCS4 ch;
1682             /* Fast path for non-control chars. The loop always ends
1683                since the Unicode string is NUL-terminated. */
1684             while (PyUnicode_READ(kind, s, 0) > '\r')
1685                 s += kind;
1686             if (s >= end) {
1687                 *consumed = len;
1688                 return -1;
1689             }
1690             ch = PyUnicode_READ(kind, s, 0);
1691             s += kind;
1692             if (ch == '\n')
1693                 return (s - start)/kind;
1694             if (ch == '\r') {
1695                 if (PyUnicode_READ(kind, s, 0) == '\n')
1696                     return (s - start)/kind + 1;
1697                 else
1698                     return (s - start)/kind;
1699             }
1700         }
1701     }
1702     else {
1703         /* Non-universal mode. */
1704         Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1705         Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
1706         /* Assume that readnl is an ASCII character. */
1707         assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
1708         if (readnl_len == 1) {
1709             const char *pos = find_control_char(kind, start, end, nl[0]);
1710             if (pos != NULL)
1711                 return (pos - start)/kind + 1;
1712             *consumed = len;
1713             return -1;
1714         }
1715         else {
1716             const char *s = start;
1717             const char *e = end - (readnl_len - 1)*kind;
1718             const char *pos;
1719             if (e < s)
1720                 e = s;
1721             while (s < e) {
1722                 Py_ssize_t i;
1723                 const char *pos = find_control_char(kind, s, end, nl[0]);
1724                 if (pos == NULL || pos >= e)
1725                     break;
1726                 for (i = 1; i < readnl_len; i++) {
1727                     if (PyUnicode_READ(kind, pos, i) != nl[i])
1728                         break;
1729                 }
1730                 if (i == readnl_len)
1731                     return (pos - start)/kind + readnl_len;
1732                 s = pos + kind;
1733             }
1734             pos = find_control_char(kind, e, end, nl[0]);
1735             if (pos == NULL)
1736                 *consumed = len;
1737             else
1738                 *consumed = (pos - start)/kind;
1739             return -1;
1740         }
1741     }
1742 }
1743 
1744 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)1745 _textiowrapper_readline(textio *self, Py_ssize_t limit)
1746 {
1747     PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1748     Py_ssize_t start, endpos, chunked, offset_to_buffer;
1749     int res;
1750 
1751     CHECK_CLOSED(self);
1752 
1753     if (_textiowrapper_writeflush(self) < 0)
1754         return NULL;
1755 
1756     chunked = 0;
1757 
1758     while (1) {
1759         char *ptr;
1760         Py_ssize_t line_len;
1761         int kind;
1762         Py_ssize_t consumed = 0;
1763 
1764         /* First, get some data if necessary */
1765         res = 1;
1766         while (!self->decoded_chars ||
1767                !PyUnicode_GET_LENGTH(self->decoded_chars)) {
1768             res = textiowrapper_read_chunk(self, 0);
1769             if (res < 0) {
1770                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1771                    when EINTR occurs so we needn't do it ourselves. */
1772                 if (_PyIO_trap_eintr()) {
1773                     continue;
1774                 }
1775                 goto error;
1776             }
1777             if (res == 0)
1778                 break;
1779         }
1780         if (res == 0) {
1781             /* end of file */
1782             textiowrapper_set_decoded_chars(self, NULL);
1783             Py_CLEAR(self->snapshot);
1784             start = endpos = offset_to_buffer = 0;
1785             break;
1786         }
1787 
1788         if (remaining == NULL) {
1789             line = self->decoded_chars;
1790             start = self->decoded_chars_used;
1791             offset_to_buffer = 0;
1792             Py_INCREF(line);
1793         }
1794         else {
1795             assert(self->decoded_chars_used == 0);
1796             line = PyUnicode_Concat(remaining, self->decoded_chars);
1797             start = 0;
1798             offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
1799             Py_CLEAR(remaining);
1800             if (line == NULL)
1801                 goto error;
1802             if (PyUnicode_READY(line) == -1)
1803                 goto error;
1804         }
1805 
1806         ptr = PyUnicode_DATA(line);
1807         line_len = PyUnicode_GET_LENGTH(line);
1808         kind = PyUnicode_KIND(line);
1809 
1810         endpos = _PyIO_find_line_ending(
1811             self->readtranslate, self->readuniversal, self->readnl,
1812             kind,
1813             ptr + kind * start,
1814             ptr + kind * line_len,
1815             &consumed);
1816         if (endpos >= 0) {
1817             endpos += start;
1818             if (limit >= 0 && (endpos - start) + chunked >= limit)
1819                 endpos = start + limit - chunked;
1820             break;
1821         }
1822 
1823         /* We can put aside up to `endpos` */
1824         endpos = consumed + start;
1825         if (limit >= 0 && (endpos - start) + chunked >= limit) {
1826             /* Didn't find line ending, but reached length limit */
1827             endpos = start + limit - chunked;
1828             break;
1829         }
1830 
1831         if (endpos > start) {
1832             /* No line ending seen yet - put aside current data */
1833             PyObject *s;
1834             if (chunks == NULL) {
1835                 chunks = PyList_New(0);
1836                 if (chunks == NULL)
1837                     goto error;
1838             }
1839             s = PyUnicode_Substring(line, start, endpos);
1840             if (s == NULL)
1841                 goto error;
1842             if (PyList_Append(chunks, s) < 0) {
1843                 Py_DECREF(s);
1844                 goto error;
1845             }
1846             chunked += PyUnicode_GET_LENGTH(s);
1847             Py_DECREF(s);
1848         }
1849         /* There may be some remaining bytes we'll have to prepend to the
1850            next chunk of data */
1851         if (endpos < line_len) {
1852             remaining = PyUnicode_Substring(line, endpos, line_len);
1853             if (remaining == NULL)
1854                 goto error;
1855         }
1856         Py_CLEAR(line);
1857         /* We have consumed the buffer */
1858         textiowrapper_set_decoded_chars(self, NULL);
1859     }
1860 
1861     if (line != NULL) {
1862         /* Our line ends in the current buffer */
1863         self->decoded_chars_used = endpos - offset_to_buffer;
1864         if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1865             PyObject *s = PyUnicode_Substring(line, start, endpos);
1866             Py_CLEAR(line);
1867             if (s == NULL)
1868                 goto error;
1869             line = s;
1870         }
1871     }
1872     if (remaining != NULL) {
1873         if (chunks == NULL) {
1874             chunks = PyList_New(0);
1875             if (chunks == NULL)
1876                 goto error;
1877         }
1878         if (PyList_Append(chunks, remaining) < 0)
1879             goto error;
1880         Py_CLEAR(remaining);
1881     }
1882     if (chunks != NULL) {
1883         if (line != NULL) {
1884             if (PyList_Append(chunks, line) < 0)
1885                 goto error;
1886             Py_DECREF(line);
1887         }
1888         line = PyUnicode_Join(_PyIO_empty_str, chunks);
1889         if (line == NULL)
1890             goto error;
1891         Py_CLEAR(chunks);
1892     }
1893     if (line == NULL) {
1894         Py_INCREF(_PyIO_empty_str);
1895         line = _PyIO_empty_str;
1896     }
1897 
1898     return line;
1899 
1900   error:
1901     Py_XDECREF(chunks);
1902     Py_XDECREF(remaining);
1903     Py_XDECREF(line);
1904     return NULL;
1905 }
1906 
1907 /*[clinic input]
1908 _io.TextIOWrapper.readline
1909     size: Py_ssize_t = -1
1910     /
1911 [clinic start generated code]*/
1912 
1913 static PyObject *
_io_TextIOWrapper_readline_impl(textio * self,Py_ssize_t size)1914 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
1915 /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
1916 {
1917     CHECK_ATTACHED(self);
1918     return _textiowrapper_readline(self, size);
1919 }
1920 
1921 /* Seek and Tell */
1922 
1923 typedef struct {
1924     Py_off_t start_pos;
1925     int dec_flags;
1926     int bytes_to_feed;
1927     int chars_to_skip;
1928     char need_eof;
1929 } cookie_type;
1930 
1931 /*
1932    To speed up cookie packing/unpacking, we store the fields in a temporary
1933    string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1934    The following macros define at which offsets in the intermediary byte
1935    string the various CookieStruct fields will be stored.
1936  */
1937 
1938 #define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1939 
1940 #if PY_BIG_ENDIAN
1941 /* We want the least significant byte of start_pos to also be the least
1942    significant byte of the cookie, which means that in big-endian mode we
1943    must copy the fields in reverse order. */
1944 
1945 # define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
1946 # define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
1947 # define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
1948 # define OFF_CHARS_TO_SKIP  (sizeof(char))
1949 # define OFF_NEED_EOF       0
1950 
1951 #else
1952 /* Little-endian mode: the least significant byte of start_pos will
1953    naturally end up the least significant byte of the cookie. */
1954 
1955 # define OFF_START_POS      0
1956 # define OFF_DEC_FLAGS      (sizeof(Py_off_t))
1957 # define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
1958 # define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
1959 # define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
1960 
1961 #endif
1962 
1963 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)1964 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1965 {
1966     unsigned char buffer[COOKIE_BUF_LEN];
1967     PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1968     if (cookieLong == NULL)
1969         return -1;
1970 
1971     if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1972                             PY_LITTLE_ENDIAN, 0) < 0) {
1973         Py_DECREF(cookieLong);
1974         return -1;
1975     }
1976     Py_DECREF(cookieLong);
1977 
1978     memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1979     memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1980     memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1981     memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1982     memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1983 
1984     return 0;
1985 }
1986 
1987 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)1988 textiowrapper_build_cookie(cookie_type *cookie)
1989 {
1990     unsigned char buffer[COOKIE_BUF_LEN];
1991 
1992     memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1993     memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1994     memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1995     memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1996     memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1997 
1998     return _PyLong_FromByteArray(buffer, sizeof(buffer),
1999                                  PY_LITTLE_ENDIAN, 0);
2000 }
2001 
2002 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)2003 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2004 {
2005     PyObject *res;
2006     /* When seeking to the start of the stream, we call decoder.reset()
2007        rather than decoder.getstate().
2008        This is for a few decoders such as utf-16 for which the state value
2009        at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2010        utf-16, that we are expecting a BOM).
2011     */
2012     if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2013         res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2014     else
2015         res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2016                                      "((yi))", "", cookie->dec_flags);
2017     if (res == NULL)
2018         return -1;
2019     Py_DECREF(res);
2020     return 0;
2021 }
2022 
2023 static int
_textiowrapper_encoder_reset(textio * self,int start_of_stream)2024 _textiowrapper_encoder_reset(textio *self, int start_of_stream)
2025 {
2026     PyObject *res;
2027     if (start_of_stream) {
2028         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2029         self->encoding_start_of_stream = 1;
2030     }
2031     else {
2032         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2033                                          _PyIO_zero, NULL);
2034         self->encoding_start_of_stream = 0;
2035     }
2036     if (res == NULL)
2037         return -1;
2038     Py_DECREF(res);
2039     return 0;
2040 }
2041 
2042 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)2043 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2044 {
2045     /* Same as _textiowrapper_decoder_setstate() above. */
2046     return _textiowrapper_encoder_reset(
2047         self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2048 }
2049 
2050 /*[clinic input]
2051 _io.TextIOWrapper.seek
2052     cookie as cookieObj: object
2053     whence: int = 0
2054     /
2055 [clinic start generated code]*/
2056 
2057 static PyObject *
_io_TextIOWrapper_seek_impl(textio * self,PyObject * cookieObj,int whence)2058 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2059 /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2060 {
2061     PyObject *posobj;
2062     cookie_type cookie;
2063     PyObject *res;
2064     int cmp;
2065 
2066     CHECK_ATTACHED(self);
2067     CHECK_CLOSED(self);
2068 
2069     Py_INCREF(cookieObj);
2070 
2071     if (!self->seekable) {
2072         _unsupported("underlying stream is not seekable");
2073         goto fail;
2074     }
2075 
2076     if (whence == 1) {
2077         /* seek relative to current position */
2078         cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2079         if (cmp < 0)
2080             goto fail;
2081 
2082         if (cmp == 0) {
2083             _unsupported("can't do nonzero cur-relative seeks");
2084             goto fail;
2085         }
2086 
2087         /* Seeking to the current position should attempt to
2088          * sync the underlying buffer with the current position.
2089          */
2090         Py_DECREF(cookieObj);
2091         cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
2092         if (cookieObj == NULL)
2093             goto fail;
2094     }
2095     else if (whence == 2) {
2096         /* seek relative to end of file */
2097         cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2098         if (cmp < 0)
2099             goto fail;
2100 
2101         if (cmp == 0) {
2102             _unsupported("can't do nonzero end-relative seeks");
2103             goto fail;
2104         }
2105 
2106         res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2107         if (res == NULL)
2108             goto fail;
2109         Py_DECREF(res);
2110 
2111         textiowrapper_set_decoded_chars(self, NULL);
2112         Py_CLEAR(self->snapshot);
2113         if (self->decoder) {
2114             res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
2115             if (res == NULL)
2116                 goto fail;
2117             Py_DECREF(res);
2118         }
2119 
2120         res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
2121         Py_CLEAR(cookieObj);
2122         if (res == NULL)
2123             goto fail;
2124         if (self->encoder) {
2125             /* If seek() == 0, we are at the start of stream, otherwise not */
2126             cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ);
2127             if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2128                 Py_DECREF(res);
2129                 goto fail;
2130             }
2131         }
2132         return res;
2133     }
2134     else if (whence != 0) {
2135         PyErr_Format(PyExc_ValueError,
2136                      "invalid whence (%d, should be 0, 1 or 2)", whence);
2137         goto fail;
2138     }
2139 
2140     cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2141     if (cmp < 0)
2142         goto fail;
2143 
2144     if (cmp == 1) {
2145         PyErr_Format(PyExc_ValueError,
2146                      "negative seek position %R", cookieObj);
2147         goto fail;
2148     }
2149 
2150     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2151     if (res == NULL)
2152         goto fail;
2153     Py_DECREF(res);
2154 
2155     /* The strategy of seek() is to go back to the safe start point
2156      * and replay the effect of read(chars_to_skip) from there.
2157      */
2158     if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2159         goto fail;
2160 
2161     /* Seek back to the safe start point. */
2162     posobj = PyLong_FromOff_t(cookie.start_pos);
2163     if (posobj == NULL)
2164         goto fail;
2165     res = PyObject_CallMethodObjArgs(self->buffer,
2166                                      _PyIO_str_seek, posobj, NULL);
2167     Py_DECREF(posobj);
2168     if (res == NULL)
2169         goto fail;
2170     Py_DECREF(res);
2171 
2172     textiowrapper_set_decoded_chars(self, NULL);
2173     Py_CLEAR(self->snapshot);
2174 
2175     /* Restore the decoder to its state from the safe start point. */
2176     if (self->decoder) {
2177         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2178             goto fail;
2179     }
2180 
2181     if (cookie.chars_to_skip) {
2182         /* Just like _read_chunk, feed the decoder and save a snapshot. */
2183         PyObject *input_chunk = _PyObject_CallMethodId(
2184             self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
2185         PyObject *decoded;
2186 
2187         if (input_chunk == NULL)
2188             goto fail;
2189 
2190         if (!PyBytes_Check(input_chunk)) {
2191             PyErr_Format(PyExc_TypeError,
2192                          "underlying read() should have returned a bytes "
2193                          "object, not '%.200s'",
2194                          Py_TYPE(input_chunk)->tp_name);
2195             Py_DECREF(input_chunk);
2196             goto fail;
2197         }
2198 
2199         self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2200         if (self->snapshot == NULL) {
2201             Py_DECREF(input_chunk);
2202             goto fail;
2203         }
2204 
2205         decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2206             "Oi", input_chunk, (int)cookie.need_eof);
2207 
2208         if (check_decoded(decoded) < 0)
2209             goto fail;
2210 
2211         textiowrapper_set_decoded_chars(self, decoded);
2212 
2213         /* Skip chars_to_skip of the decoded characters. */
2214         if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2215             PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2216             goto fail;
2217         }
2218         self->decoded_chars_used = cookie.chars_to_skip;
2219     }
2220     else {
2221         self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2222         if (self->snapshot == NULL)
2223             goto fail;
2224     }
2225 
2226     /* Finally, reset the encoder (merely useful for proper BOM handling) */
2227     if (self->encoder) {
2228         if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2229             goto fail;
2230     }
2231     return cookieObj;
2232   fail:
2233     Py_XDECREF(cookieObj);
2234     return NULL;
2235 
2236 }
2237 
2238 /*[clinic input]
2239 _io.TextIOWrapper.tell
2240 [clinic start generated code]*/
2241 
2242 static PyObject *
_io_TextIOWrapper_tell_impl(textio * self)2243 _io_TextIOWrapper_tell_impl(textio *self)
2244 /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2245 {
2246     PyObject *res;
2247     PyObject *posobj = NULL;
2248     cookie_type cookie = {0,0,0,0,0};
2249     PyObject *next_input;
2250     Py_ssize_t chars_to_skip, chars_decoded;
2251     Py_ssize_t skip_bytes, skip_back;
2252     PyObject *saved_state = NULL;
2253     char *input, *input_end;
2254     Py_ssize_t dec_buffer_len;
2255     int dec_flags;
2256 
2257     CHECK_ATTACHED(self);
2258     CHECK_CLOSED(self);
2259 
2260     if (!self->seekable) {
2261         _unsupported("underlying stream is not seekable");
2262         goto fail;
2263     }
2264     if (!self->telling) {
2265         PyErr_SetString(PyExc_IOError,
2266                         "telling position disabled by next() call");
2267         goto fail;
2268     }
2269 
2270     if (_textiowrapper_writeflush(self) < 0)
2271         return NULL;
2272     res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2273     if (res == NULL)
2274         goto fail;
2275     Py_DECREF(res);
2276 
2277     posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
2278     if (posobj == NULL)
2279         goto fail;
2280 
2281     if (self->decoder == NULL || self->snapshot == NULL) {
2282         assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2283         return posobj;
2284     }
2285 
2286 #if defined(HAVE_LARGEFILE_SUPPORT)
2287     cookie.start_pos = PyLong_AsLongLong(posobj);
2288 #else
2289     cookie.start_pos = PyLong_AsLong(posobj);
2290 #endif
2291     Py_DECREF(posobj);
2292     if (PyErr_Occurred())
2293         goto fail;
2294 
2295     /* Skip backward to the snapshot point (see _read_chunk). */
2296     if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2297         goto fail;
2298 
2299     assert (PyBytes_Check(next_input));
2300 
2301     cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2302 
2303     /* How many decoded characters have been used up since the snapshot? */
2304     if (self->decoded_chars_used == 0)  {
2305         /* We haven't moved from the snapshot point. */
2306         return textiowrapper_build_cookie(&cookie);
2307     }
2308 
2309     chars_to_skip = self->decoded_chars_used;
2310 
2311     /* Decoder state will be restored at the end */
2312     saved_state = PyObject_CallMethodObjArgs(self->decoder,
2313                                              _PyIO_str_getstate, NULL);
2314     if (saved_state == NULL)
2315         goto fail;
2316 
2317 #define DECODER_GETSTATE() do { \
2318         PyObject *dec_buffer; \
2319         PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2320             _PyIO_str_getstate, NULL); \
2321         if (_state == NULL) \
2322             goto fail; \
2323         if (!PyArg_ParseTuple(_state, "Oi", &dec_buffer, &dec_flags)) { \
2324             Py_DECREF(_state); \
2325             goto fail; \
2326         } \
2327         if (!PyBytes_Check(dec_buffer)) { \
2328             PyErr_Format(PyExc_TypeError, \
2329                          "decoder getstate() should have returned a bytes " \
2330                          "object, not '%.200s'", \
2331                          Py_TYPE(dec_buffer)->tp_name); \
2332             Py_DECREF(_state); \
2333             goto fail; \
2334         } \
2335         dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2336         Py_DECREF(_state); \
2337     } while (0)
2338 
2339 #define DECODER_DECODE(start, len, res) do { \
2340         PyObject *_decoded = _PyObject_CallMethodId( \
2341             self->decoder, &PyId_decode, "y#", start, len); \
2342         if (check_decoded(_decoded) < 0) \
2343             goto fail; \
2344         res = PyUnicode_GET_LENGTH(_decoded); \
2345         Py_DECREF(_decoded); \
2346     } while (0)
2347 
2348     /* Fast search for an acceptable start point, close to our
2349        current pos */
2350     skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2351     skip_back = 1;
2352     assert(skip_back <= PyBytes_GET_SIZE(next_input));
2353     input = PyBytes_AS_STRING(next_input);
2354     while (skip_bytes > 0) {
2355         /* Decode up to temptative start point */
2356         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2357             goto fail;
2358         DECODER_DECODE(input, skip_bytes, chars_decoded);
2359         if (chars_decoded <= chars_to_skip) {
2360             DECODER_GETSTATE();
2361             if (dec_buffer_len == 0) {
2362                 /* Before pos and no bytes buffered in decoder => OK */
2363                 cookie.dec_flags = dec_flags;
2364                 chars_to_skip -= chars_decoded;
2365                 break;
2366             }
2367             /* Skip back by buffered amount and reset heuristic */
2368             skip_bytes -= dec_buffer_len;
2369             skip_back = 1;
2370         }
2371         else {
2372             /* We're too far ahead, skip back a bit */
2373             skip_bytes -= skip_back;
2374             skip_back *= 2;
2375         }
2376     }
2377     if (skip_bytes <= 0) {
2378         skip_bytes = 0;
2379         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2380             goto fail;
2381     }
2382 
2383     /* Note our initial start point. */
2384     cookie.start_pos += skip_bytes;
2385     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2386     if (chars_to_skip == 0)
2387         goto finally;
2388 
2389     /* We should be close to the desired position.  Now feed the decoder one
2390      * byte at a time until we reach the `chars_to_skip` target.
2391      * As we go, note the nearest "safe start point" before the current
2392      * location (a point where the decoder has nothing buffered, so seek()
2393      * can safely start from there and advance to this location).
2394      */
2395     chars_decoded = 0;
2396     input = PyBytes_AS_STRING(next_input);
2397     input_end = input + PyBytes_GET_SIZE(next_input);
2398     input += skip_bytes;
2399     while (input < input_end) {
2400         Py_ssize_t n;
2401 
2402         DECODER_DECODE(input, (Py_ssize_t)1, n);
2403         /* We got n chars for 1 byte */
2404         chars_decoded += n;
2405         cookie.bytes_to_feed += 1;
2406         DECODER_GETSTATE();
2407 
2408         if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2409             /* Decoder buffer is empty, so this is a safe start point. */
2410             cookie.start_pos += cookie.bytes_to_feed;
2411             chars_to_skip -= chars_decoded;
2412             cookie.dec_flags = dec_flags;
2413             cookie.bytes_to_feed = 0;
2414             chars_decoded = 0;
2415         }
2416         if (chars_decoded >= chars_to_skip)
2417             break;
2418         input++;
2419     }
2420     if (input == input_end) {
2421         /* We didn't get enough decoded data; signal EOF to get more. */
2422         PyObject *decoded = _PyObject_CallMethodId(
2423             self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
2424         if (check_decoded(decoded) < 0)
2425             goto fail;
2426         chars_decoded += PyUnicode_GET_LENGTH(decoded);
2427         Py_DECREF(decoded);
2428         cookie.need_eof = 1;
2429 
2430         if (chars_decoded < chars_to_skip) {
2431             PyErr_SetString(PyExc_IOError,
2432                             "can't reconstruct logical file position");
2433             goto fail;
2434         }
2435     }
2436 
2437 finally:
2438     res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
2439     Py_DECREF(saved_state);
2440     if (res == NULL)
2441         return NULL;
2442     Py_DECREF(res);
2443 
2444     /* The returned cookie corresponds to the last safe start point. */
2445     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2446     return textiowrapper_build_cookie(&cookie);
2447 
2448 fail:
2449     if (saved_state) {
2450         PyObject *type, *value, *traceback;
2451         PyErr_Fetch(&type, &value, &traceback);
2452         res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
2453         _PyErr_ChainExceptions(type, value, traceback);
2454         Py_DECREF(saved_state);
2455         Py_XDECREF(res);
2456     }
2457     return NULL;
2458 }
2459 
2460 /*[clinic input]
2461 _io.TextIOWrapper.truncate
2462     pos: object = None
2463     /
2464 [clinic start generated code]*/
2465 
2466 static PyObject *
_io_TextIOWrapper_truncate_impl(textio * self,PyObject * pos)2467 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2468 /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2469 {
2470     PyObject *res;
2471 
2472     CHECK_ATTACHED(self)
2473 
2474     res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2475     if (res == NULL)
2476         return NULL;
2477     Py_DECREF(res);
2478 
2479     return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2480 }
2481 
2482 static PyObject *
textiowrapper_repr(textio * self)2483 textiowrapper_repr(textio *self)
2484 {
2485     PyObject *nameobj, *modeobj, *res, *s;
2486 
2487     CHECK_INITIALIZED(self);
2488 
2489     res = PyUnicode_FromString("<_io.TextIOWrapper");
2490     if (res == NULL)
2491         return NULL;
2492 
2493     nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
2494     if (nameobj == NULL) {
2495         if (PyErr_ExceptionMatches(PyExc_Exception))
2496             PyErr_Clear();
2497         else
2498             goto error;
2499     }
2500     else {
2501         s = PyUnicode_FromFormat(" name=%R", nameobj);
2502         Py_DECREF(nameobj);
2503         if (s == NULL)
2504             goto error;
2505         PyUnicode_AppendAndDel(&res, s);
2506         if (res == NULL)
2507             return NULL;
2508     }
2509     modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
2510     if (modeobj == NULL) {
2511         if (PyErr_ExceptionMatches(PyExc_Exception))
2512             PyErr_Clear();
2513         else
2514             goto error;
2515     }
2516     else {
2517         s = PyUnicode_FromFormat(" mode=%R", modeobj);
2518         Py_DECREF(modeobj);
2519         if (s == NULL)
2520             goto error;
2521         PyUnicode_AppendAndDel(&res, s);
2522         if (res == NULL)
2523             return NULL;
2524     }
2525     s = PyUnicode_FromFormat("%U encoding=%R>",
2526                              res, self->encoding);
2527     Py_DECREF(res);
2528     return s;
2529 error:
2530     Py_XDECREF(res);
2531     return NULL;
2532 }
2533 
2534 
2535 /* Inquiries */
2536 
2537 /*[clinic input]
2538 _io.TextIOWrapper.fileno
2539 [clinic start generated code]*/
2540 
2541 static PyObject *
_io_TextIOWrapper_fileno_impl(textio * self)2542 _io_TextIOWrapper_fileno_impl(textio *self)
2543 /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2544 {
2545     CHECK_ATTACHED(self);
2546     return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
2547 }
2548 
2549 /*[clinic input]
2550 _io.TextIOWrapper.seekable
2551 [clinic start generated code]*/
2552 
2553 static PyObject *
_io_TextIOWrapper_seekable_impl(textio * self)2554 _io_TextIOWrapper_seekable_impl(textio *self)
2555 /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2556 {
2557     CHECK_ATTACHED(self);
2558     return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
2559 }
2560 
2561 /*[clinic input]
2562 _io.TextIOWrapper.readable
2563 [clinic start generated code]*/
2564 
2565 static PyObject *
_io_TextIOWrapper_readable_impl(textio * self)2566 _io_TextIOWrapper_readable_impl(textio *self)
2567 /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2568 {
2569     CHECK_ATTACHED(self);
2570     return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
2571 }
2572 
2573 /*[clinic input]
2574 _io.TextIOWrapper.writable
2575 [clinic start generated code]*/
2576 
2577 static PyObject *
_io_TextIOWrapper_writable_impl(textio * self)2578 _io_TextIOWrapper_writable_impl(textio *self)
2579 /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
2580 {
2581     CHECK_ATTACHED(self);
2582     return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
2583 }
2584 
2585 /*[clinic input]
2586 _io.TextIOWrapper.isatty
2587 [clinic start generated code]*/
2588 
2589 static PyObject *
_io_TextIOWrapper_isatty_impl(textio * self)2590 _io_TextIOWrapper_isatty_impl(textio *self)
2591 /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
2592 {
2593     CHECK_ATTACHED(self);
2594     return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
2595 }
2596 
2597 static PyObject *
textiowrapper_getstate(textio * self,PyObject * args)2598 textiowrapper_getstate(textio *self, PyObject *args)
2599 {
2600     PyErr_Format(PyExc_TypeError,
2601                  "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2602     return NULL;
2603 }
2604 
2605 /*[clinic input]
2606 _io.TextIOWrapper.flush
2607 [clinic start generated code]*/
2608 
2609 static PyObject *
_io_TextIOWrapper_flush_impl(textio * self)2610 _io_TextIOWrapper_flush_impl(textio *self)
2611 /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
2612 {
2613     CHECK_ATTACHED(self);
2614     CHECK_CLOSED(self);
2615     self->telling = self->seekable;
2616     if (_textiowrapper_writeflush(self) < 0)
2617         return NULL;
2618     return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
2619 }
2620 
2621 /*[clinic input]
2622 _io.TextIOWrapper.close
2623 [clinic start generated code]*/
2624 
2625 static PyObject *
_io_TextIOWrapper_close_impl(textio * self)2626 _io_TextIOWrapper_close_impl(textio *self)
2627 /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
2628 {
2629     PyObject *res;
2630     int r;
2631     CHECK_ATTACHED(self);
2632 
2633     res = textiowrapper_closed_get(self, NULL);
2634     if (res == NULL)
2635         return NULL;
2636     r = PyObject_IsTrue(res);
2637     Py_DECREF(res);
2638     if (r < 0)
2639         return NULL;
2640 
2641     if (r > 0) {
2642         Py_RETURN_NONE; /* stream already closed */
2643     }
2644     else {
2645         PyObject *exc = NULL, *val, *tb;
2646         if (self->finalizing) {
2647             res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
2648             if (res)
2649                 Py_DECREF(res);
2650             else
2651                 PyErr_Clear();
2652         }
2653         res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2654         if (res == NULL)
2655             PyErr_Fetch(&exc, &val, &tb);
2656         else
2657             Py_DECREF(res);
2658 
2659         res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2660         if (exc != NULL) {
2661             _PyErr_ChainExceptions(exc, val, tb);
2662             Py_CLEAR(res);
2663         }
2664         return res;
2665     }
2666 }
2667 
2668 static PyObject *
textiowrapper_iternext(textio * self)2669 textiowrapper_iternext(textio *self)
2670 {
2671     PyObject *line;
2672 
2673     CHECK_ATTACHED(self);
2674 
2675     self->telling = 0;
2676     if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2677         /* Skip method call overhead for speed */
2678         line = _textiowrapper_readline(self, -1);
2679     }
2680     else {
2681         line = PyObject_CallMethodObjArgs((PyObject *)self,
2682                                            _PyIO_str_readline, NULL);
2683         if (line && !PyUnicode_Check(line)) {
2684             PyErr_Format(PyExc_IOError,
2685                          "readline() should have returned a str object, "
2686                          "not '%.200s'", Py_TYPE(line)->tp_name);
2687             Py_DECREF(line);
2688             return NULL;
2689         }
2690     }
2691 
2692     if (line == NULL || PyUnicode_READY(line) == -1)
2693         return NULL;
2694 
2695     if (PyUnicode_GET_LENGTH(line) == 0) {
2696         /* Reached EOF or would have blocked */
2697         Py_DECREF(line);
2698         Py_CLEAR(self->snapshot);
2699         self->telling = self->seekable;
2700         return NULL;
2701     }
2702 
2703     return line;
2704 }
2705 
2706 static PyObject *
textiowrapper_name_get(textio * self,void * context)2707 textiowrapper_name_get(textio *self, void *context)
2708 {
2709     CHECK_ATTACHED(self);
2710     return _PyObject_GetAttrId(self->buffer, &PyId_name);
2711 }
2712 
2713 static PyObject *
textiowrapper_closed_get(textio * self,void * context)2714 textiowrapper_closed_get(textio *self, void *context)
2715 {
2716     CHECK_ATTACHED(self);
2717     return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2718 }
2719 
2720 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)2721 textiowrapper_newlines_get(textio *self, void *context)
2722 {
2723     PyObject *res;
2724     CHECK_ATTACHED(self);
2725     if (self->decoder == NULL)
2726         Py_RETURN_NONE;
2727     res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2728     if (res == NULL) {
2729         if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2730             PyErr_Clear();
2731             Py_RETURN_NONE;
2732         }
2733         else {
2734             return NULL;
2735         }
2736     }
2737     return res;
2738 }
2739 
2740 static PyObject *
textiowrapper_errors_get(textio * self,void * context)2741 textiowrapper_errors_get(textio *self, void *context)
2742 {
2743     CHECK_INITIALIZED(self);
2744     return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2745 }
2746 
2747 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)2748 textiowrapper_chunk_size_get(textio *self, void *context)
2749 {
2750     CHECK_ATTACHED(self);
2751     return PyLong_FromSsize_t(self->chunk_size);
2752 }
2753 
2754 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)2755 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2756 {
2757     Py_ssize_t n;
2758     CHECK_ATTACHED_INT(self);
2759     n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
2760     if (n == -1 && PyErr_Occurred())
2761         return -1;
2762     if (n <= 0) {
2763         PyErr_SetString(PyExc_ValueError,
2764                         "a strictly positive integer is required");
2765         return -1;
2766     }
2767     self->chunk_size = n;
2768     return 0;
2769 }
2770 
2771 #include "clinic/textio.c.h"
2772 
2773 static PyMethodDef incrementalnewlinedecoder_methods[] = {
2774     _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
2775     _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
2776     _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
2777     _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
2778     {NULL}
2779 };
2780 
2781 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
2782     {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
2783     {NULL}
2784 };
2785 
2786 PyTypeObject PyIncrementalNewlineDecoder_Type = {
2787     PyVarObject_HEAD_INIT(NULL, 0)
2788     "_io.IncrementalNewlineDecoder", /*tp_name*/
2789     sizeof(nldecoder_object), /*tp_basicsize*/
2790     0,                          /*tp_itemsize*/
2791     (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
2792     0,                          /*tp_print*/
2793     0,                          /*tp_getattr*/
2794     0,                          /*tp_setattr*/
2795     0,                          /*tp_compare */
2796     0,                          /*tp_repr*/
2797     0,                          /*tp_as_number*/
2798     0,                          /*tp_as_sequence*/
2799     0,                          /*tp_as_mapping*/
2800     0,                          /*tp_hash */
2801     0,                          /*tp_call*/
2802     0,                          /*tp_str*/
2803     0,                          /*tp_getattro*/
2804     0,                          /*tp_setattro*/
2805     0,                          /*tp_as_buffer*/
2806     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
2807     _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
2808     0,                          /* tp_traverse */
2809     0,                          /* tp_clear */
2810     0,                          /* tp_richcompare */
2811     0,                          /*tp_weaklistoffset*/
2812     0,                          /* tp_iter */
2813     0,                          /* tp_iternext */
2814     incrementalnewlinedecoder_methods, /* tp_methods */
2815     0,                          /* tp_members */
2816     incrementalnewlinedecoder_getset, /* tp_getset */
2817     0,                          /* tp_base */
2818     0,                          /* tp_dict */
2819     0,                          /* tp_descr_get */
2820     0,                          /* tp_descr_set */
2821     0,                          /* tp_dictoffset */
2822     _io_IncrementalNewlineDecoder___init__, /* tp_init */
2823     0,                          /* tp_alloc */
2824     PyType_GenericNew,          /* tp_new */
2825 };
2826 
2827 
2828 static PyMethodDef textiowrapper_methods[] = {
2829     _IO_TEXTIOWRAPPER_DETACH_METHODDEF
2830     _IO_TEXTIOWRAPPER_WRITE_METHODDEF
2831     _IO_TEXTIOWRAPPER_READ_METHODDEF
2832     _IO_TEXTIOWRAPPER_READLINE_METHODDEF
2833     _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
2834     _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
2835 
2836     _IO_TEXTIOWRAPPER_FILENO_METHODDEF
2837     _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
2838     _IO_TEXTIOWRAPPER_READABLE_METHODDEF
2839     _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
2840     _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
2841     {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
2842 
2843     _IO_TEXTIOWRAPPER_SEEK_METHODDEF
2844     _IO_TEXTIOWRAPPER_TELL_METHODDEF
2845     _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
2846     {NULL, NULL}
2847 };
2848 
2849 static PyMemberDef textiowrapper_members[] = {
2850     {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2851     {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2852     {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2853     {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
2854     {NULL}
2855 };
2856 
2857 static PyGetSetDef textiowrapper_getset[] = {
2858     {"name", (getter)textiowrapper_name_get, NULL, NULL},
2859     {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2860 /*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2861 */
2862     {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2863     {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2864     {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2865                     (setter)textiowrapper_chunk_size_set, NULL},
2866     {NULL}
2867 };
2868 
2869 PyTypeObject PyTextIOWrapper_Type = {
2870     PyVarObject_HEAD_INIT(NULL, 0)
2871     "_io.TextIOWrapper",        /*tp_name*/
2872     sizeof(textio), /*tp_basicsize*/
2873     0,                          /*tp_itemsize*/
2874     (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2875     0,                          /*tp_print*/
2876     0,                          /*tp_getattr*/
2877     0,                          /*tps_etattr*/
2878     0,                          /*tp_compare */
2879     (reprfunc)textiowrapper_repr,/*tp_repr*/
2880     0,                          /*tp_as_number*/
2881     0,                          /*tp_as_sequence*/
2882     0,                          /*tp_as_mapping*/
2883     0,                          /*tp_hash */
2884     0,                          /*tp_call*/
2885     0,                          /*tp_str*/
2886     0,                          /*tp_getattro*/
2887     0,                          /*tp_setattro*/
2888     0,                          /*tp_as_buffer*/
2889     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2890         | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
2891     _io_TextIOWrapper___init____doc__, /* tp_doc */
2892     (traverseproc)textiowrapper_traverse, /* tp_traverse */
2893     (inquiry)textiowrapper_clear, /* tp_clear */
2894     0,                          /* tp_richcompare */
2895     offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2896     0,                          /* tp_iter */
2897     (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2898     textiowrapper_methods,      /* tp_methods */
2899     textiowrapper_members,      /* tp_members */
2900     textiowrapper_getset,       /* tp_getset */
2901     0,                          /* tp_base */
2902     0,                          /* tp_dict */
2903     0,                          /* tp_descr_get */
2904     0,                          /* tp_descr_set */
2905     offsetof(textio, dict), /*tp_dictoffset*/
2906     _io_TextIOWrapper___init__, /* tp_init */
2907     0,                          /* tp_alloc */
2908     PyType_GenericNew,          /* tp_new */
2909     0,                          /* tp_free */
2910     0,                          /* tp_is_gc */
2911     0,                          /* tp_bases */
2912     0,                          /* tp_mro */
2913     0,                          /* tp_cache */
2914     0,                          /* tp_subclasses */
2915     0,                          /* tp_weaklist */
2916     0,                          /* tp_del */
2917     0,                          /* tp_version_tag */
2918     0,                          /* tp_finalize */
2919 };
2920