1 /*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8
9 #include "Python.h"
10 #include "pycore_call.h" // _PyObject_CallMethod()
11 #include "pycore_codecs.h" // _PyCodecInfo_GetIncrementalDecoder()
12 #include "pycore_fileutils.h" // _Py_GetLocaleEncoding()
13 #include "pycore_interp.h" // PyInterpreterState.fs_codec
14 #include "pycore_long.h" // _PyLong_GetZero()
15 #include "pycore_object.h" // _PyObject_GC_UNTRACK()
16 #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1()
17 #include "pycore_pystate.h" // _PyInterpreterState_GET()
18
19 #include "_iomodule.h"
20
21 /*[clinic input]
22 module _io
23 class _io.IncrementalNewlineDecoder "nldecoder_object *" "clinic_state()->PyIncrementalNewlineDecoder_Type"
24 class _io.TextIOWrapper "textio *" "clinic_state()->TextIOWrapper_Type"
25 class _io._TextIOBase "PyObject *" "&PyTextIOBase_Type"
26 [clinic start generated code]*/
27 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=8b7f24fa13bfdd7f]*/
28
29 typedef struct nldecoder_object nldecoder_object;
30 typedef struct textio textio;
31
32 #define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
33 #include "clinic/textio.c.h"
34 #undef clinic_state
35
36 /* TextIOBase */
37
38 PyDoc_STRVAR(textiobase_doc,
39 "Base class for text I/O.\n"
40 "\n"
41 "This class provides a character and line based interface to stream\n"
42 "I/O. There is no readinto method because Python's character strings\n"
43 "are immutable.\n"
44 );
45
46 static PyObject *
_unsupported(_PyIO_State * state,const char * message)47 _unsupported(_PyIO_State *state, const char *message)
48 {
49 PyErr_SetString(state->unsupported_operation, message);
50 return NULL;
51 }
52
53 /*[clinic input]
54 _io._TextIOBase.detach
55 cls: defining_class
56 /
57
58 Separate the underlying buffer from the TextIOBase and return it.
59
60 After the underlying buffer has been detached, the TextIO is in an unusable state.
61 [clinic start generated code]*/
62
63 static PyObject *
_io__TextIOBase_detach_impl(PyObject * self,PyTypeObject * cls)64 _io__TextIOBase_detach_impl(PyObject *self, PyTypeObject *cls)
65 /*[clinic end generated code: output=50915f40c609eaa4 input=987ca3640d0a3776]*/
66 {
67 _PyIO_State *state = get_io_state_by_cls(cls);
68 return _unsupported(state, "detach");
69 }
70
71 /*[clinic input]
72 _io._TextIOBase.read
73 cls: defining_class
74 size: int(unused=True) = -1
75 /
76
77 Read at most size characters from stream.
78
79 Read from underlying buffer until we have size characters or we hit EOF.
80 If size is negative or omitted, read until EOF.
81 [clinic start generated code]*/
82
83 static PyObject *
_io__TextIOBase_read_impl(PyObject * self,PyTypeObject * cls,int Py_UNUSED (size))84 _io__TextIOBase_read_impl(PyObject *self, PyTypeObject *cls,
85 int Py_UNUSED(size))
86 /*[clinic end generated code: output=51a5178a309ce647 input=f5e37720f9fc563f]*/
87 {
88 _PyIO_State *state = get_io_state_by_cls(cls);
89 return _unsupported(state, "read");
90 }
91
92 /*[clinic input]
93 _io._TextIOBase.readline
94 cls: defining_class
95 size: int(unused=True) = -1
96 /
97
98 Read until newline or EOF.
99
100 Return an empty string if EOF is hit immediately.
101 If size is specified, at most size characters will be read.
102 [clinic start generated code]*/
103
104 static PyObject *
_io__TextIOBase_readline_impl(PyObject * self,PyTypeObject * cls,int Py_UNUSED (size))105 _io__TextIOBase_readline_impl(PyObject *self, PyTypeObject *cls,
106 int Py_UNUSED(size))
107 /*[clinic end generated code: output=3f47d7966d6d074e input=42eafec94107fa27]*/
108 {
109 _PyIO_State *state = get_io_state_by_cls(cls);
110 return _unsupported(state, "readline");
111 }
112
113 /*[clinic input]
114 _io._TextIOBase.write
115 cls: defining_class
116 s: str(unused=True)
117 /
118
119 Write string s to stream.
120
121 Return the number of characters written
122 (which is always equal to the length of the string).
123 [clinic start generated code]*/
124
125 static PyObject *
_io__TextIOBase_write_impl(PyObject * self,PyTypeObject * cls,const char * Py_UNUSED (s))126 _io__TextIOBase_write_impl(PyObject *self, PyTypeObject *cls,
127 const char *Py_UNUSED(s))
128 /*[clinic end generated code: output=18b28231460275de input=e9cabaa5f6732b07]*/
129 {
130 _PyIO_State *state = get_io_state_by_cls(cls);
131 return _unsupported(state, "write");
132 }
133
134 /*[clinic input]
135 @getter
136 _io._TextIOBase.encoding
137
138 Encoding of the text stream.
139
140 Subclasses should override.
141 [clinic start generated code]*/
142
143 static PyObject *
_io__TextIOBase_encoding_get_impl(PyObject * self)144 _io__TextIOBase_encoding_get_impl(PyObject *self)
145 /*[clinic end generated code: output=e0f5d8f548b92432 input=4736d7621dd38f43]*/
146 {
147 Py_RETURN_NONE;
148 }
149
150 /*[clinic input]
151 @getter
152 _io._TextIOBase.newlines
153
154 Line endings translated so far.
155
156 Only line endings translated during reading are considered.
157
158 Subclasses should override.
159 [clinic start generated code]*/
160
161 static PyObject *
_io__TextIOBase_newlines_get_impl(PyObject * self)162 _io__TextIOBase_newlines_get_impl(PyObject *self)
163 /*[clinic end generated code: output=46ec147fb9f00c2a input=a5b196d076af1164]*/
164 {
165 Py_RETURN_NONE;
166 }
167
168 /*[clinic input]
169 @getter
170 _io._TextIOBase.errors
171
172 The error setting of the decoder or encoder.
173
174 Subclasses should override.
175 [clinic start generated code]*/
176
177 static PyObject *
_io__TextIOBase_errors_get_impl(PyObject * self)178 _io__TextIOBase_errors_get_impl(PyObject *self)
179 /*[clinic end generated code: output=c6623d6addcd087d input=974aa52d1db93a82]*/
180 {
181 Py_RETURN_NONE;
182 }
183
184
185 static PyMethodDef textiobase_methods[] = {
186 _IO__TEXTIOBASE_DETACH_METHODDEF
187 _IO__TEXTIOBASE_READ_METHODDEF
188 _IO__TEXTIOBASE_READLINE_METHODDEF
189 _IO__TEXTIOBASE_WRITE_METHODDEF
190 {NULL, NULL}
191 };
192
193 static PyGetSetDef textiobase_getset[] = {
194 _IO__TEXTIOBASE_ENCODING_GETSETDEF
195 _IO__TEXTIOBASE_NEWLINES_GETSETDEF
196 _IO__TEXTIOBASE_ERRORS_GETSETDEF
197 {NULL}
198 };
199
200 static PyType_Slot textiobase_slots[] = {
201 {Py_tp_doc, (void *)textiobase_doc},
202 {Py_tp_methods, textiobase_methods},
203 {Py_tp_getset, textiobase_getset},
204 {0, NULL},
205 };
206
207 /* Do not set Py_TPFLAGS_HAVE_GC so that tp_traverse and tp_clear are inherited */
208 PyType_Spec textiobase_spec = {
209 .name = "_io._TextIOBase",
210 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
211 Py_TPFLAGS_IMMUTABLETYPE),
212 .slots = textiobase_slots,
213 };
214
215 /* IncrementalNewlineDecoder */
216
217 struct nldecoder_object {
218 PyObject_HEAD
219 PyObject *decoder;
220 PyObject *errors;
221 unsigned int pendingcr: 1;
222 unsigned int translate: 1;
223 unsigned int seennl: 3;
224 };
225
226 /*[clinic input]
227 _io.IncrementalNewlineDecoder.__init__
228 decoder: object
229 translate: bool
230 errors: object(c_default="NULL") = "strict"
231
232 Codec used when reading a file in universal newlines mode.
233
234 It wraps another incremental decoder, translating \r\n and \r into \n.
235 It also records the types of newlines encountered. When used with
236 translate=False, it ensures that the newline sequence is returned in
237 one piece. When used with decoder=None, it expects unicode strings as
238 decode input and translates newlines without first invoking an external
239 decoder.
240 [clinic start generated code]*/
241
242 static int
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object * self,PyObject * decoder,int translate,PyObject * errors)243 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
244 PyObject *decoder, int translate,
245 PyObject *errors)
246 /*[clinic end generated code: output=fbd04d443e764ec2 input=ed547aa257616b0e]*/
247 {
248
249 if (errors == NULL) {
250 errors = &_Py_ID(strict);
251 }
252 else {
253 errors = Py_NewRef(errors);
254 }
255
256 Py_XSETREF(self->errors, errors);
257 Py_XSETREF(self->decoder, Py_NewRef(decoder));
258 self->translate = translate ? 1 : 0;
259 self->seennl = 0;
260 self->pendingcr = 0;
261
262 return 0;
263 }
264
265 static int
incrementalnewlinedecoder_traverse(nldecoder_object * self,visitproc visit,void * arg)266 incrementalnewlinedecoder_traverse(nldecoder_object *self, visitproc visit,
267 void *arg)
268 {
269 Py_VISIT(Py_TYPE(self));
270 Py_VISIT(self->decoder);
271 Py_VISIT(self->errors);
272 return 0;
273 }
274
275 static int
incrementalnewlinedecoder_clear(nldecoder_object * self)276 incrementalnewlinedecoder_clear(nldecoder_object *self)
277 {
278 Py_CLEAR(self->decoder);
279 Py_CLEAR(self->errors);
280 return 0;
281 }
282
283 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)284 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
285 {
286 PyTypeObject *tp = Py_TYPE(self);
287 _PyObject_GC_UNTRACK(self);
288 (void)incrementalnewlinedecoder_clear(self);
289 tp->tp_free((PyObject *)self);
290 Py_DECREF(tp);
291 }
292
293 static int
check_decoded(PyObject * decoded)294 check_decoded(PyObject *decoded)
295 {
296 if (decoded == NULL)
297 return -1;
298 if (!PyUnicode_Check(decoded)) {
299 PyErr_Format(PyExc_TypeError,
300 "decoder should return a string result, not '%.200s'",
301 Py_TYPE(decoded)->tp_name);
302 Py_DECREF(decoded);
303 return -1;
304 }
305 return 0;
306 }
307
308 #define CHECK_INITIALIZED_DECODER(self) \
309 if (self->errors == NULL) { \
310 PyErr_SetString(PyExc_ValueError, \
311 "IncrementalNewlineDecoder.__init__() not called"); \
312 return NULL; \
313 }
314
315 #define SEEN_CR 1
316 #define SEEN_LF 2
317 #define SEEN_CRLF 4
318 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
319
320 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * myself,PyObject * input,int final)321 _PyIncrementalNewlineDecoder_decode(PyObject *myself,
322 PyObject *input, int final)
323 {
324 PyObject *output;
325 Py_ssize_t output_len;
326 nldecoder_object *self = (nldecoder_object *) myself;
327
328 CHECK_INITIALIZED_DECODER(self);
329
330 /* decode input (with the eventual \r from a previous pass) */
331 if (self->decoder != Py_None) {
332 output = PyObject_CallMethodObjArgs(self->decoder,
333 &_Py_ID(decode), input, final ? Py_True : Py_False, NULL);
334 }
335 else {
336 output = Py_NewRef(input);
337 }
338
339 if (check_decoded(output) < 0)
340 return NULL;
341
342 output_len = PyUnicode_GET_LENGTH(output);
343 if (self->pendingcr && (final || output_len > 0)) {
344 /* Prefix output with CR */
345 int kind;
346 PyObject *modified;
347 char *out;
348
349 modified = PyUnicode_New(output_len + 1,
350 PyUnicode_MAX_CHAR_VALUE(output));
351 if (modified == NULL)
352 goto error;
353 kind = PyUnicode_KIND(modified);
354 out = PyUnicode_DATA(modified);
355 PyUnicode_WRITE(kind, out, 0, '\r');
356 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
357 Py_SETREF(output, modified); /* output remains ready */
358 self->pendingcr = 0;
359 output_len++;
360 }
361
362 /* retain last \r even when not translating data:
363 * then readline() is sure to get \r\n in one pass
364 */
365 if (!final) {
366 if (output_len > 0
367 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
368 {
369 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
370 if (modified == NULL)
371 goto error;
372 Py_SETREF(output, modified);
373 self->pendingcr = 1;
374 }
375 }
376
377 /* Record which newlines are read and do newline translation if desired,
378 all in one pass. */
379 {
380 const void *in_str;
381 Py_ssize_t len;
382 int seennl = self->seennl;
383 int only_lf = 0;
384 int kind;
385
386 in_str = PyUnicode_DATA(output);
387 len = PyUnicode_GET_LENGTH(output);
388 kind = PyUnicode_KIND(output);
389
390 if (len == 0)
391 return output;
392
393 /* If, up to now, newlines are consistently \n, do a quick check
394 for the \r *byte* with the libc's optimized memchr.
395 */
396 if (seennl == SEEN_LF || seennl == 0) {
397 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
398 }
399
400 if (only_lf) {
401 /* If not already seen, quick scan for a possible "\n" character.
402 (there's nothing else to be done, even when in translation mode)
403 */
404 if (seennl == 0 &&
405 memchr(in_str, '\n', kind * len) != NULL) {
406 if (kind == PyUnicode_1BYTE_KIND)
407 seennl |= SEEN_LF;
408 else {
409 Py_ssize_t i = 0;
410 for (;;) {
411 Py_UCS4 c;
412 /* Fast loop for non-control characters */
413 while (PyUnicode_READ(kind, in_str, i) > '\n')
414 i++;
415 c = PyUnicode_READ(kind, in_str, i++);
416 if (c == '\n') {
417 seennl |= SEEN_LF;
418 break;
419 }
420 if (i >= len)
421 break;
422 }
423 }
424 }
425 /* Finished: we have scanned for newlines, and none of them
426 need translating */
427 }
428 else if (!self->translate) {
429 Py_ssize_t i = 0;
430 /* We have already seen all newline types, no need to scan again */
431 if (seennl == SEEN_ALL)
432 goto endscan;
433 for (;;) {
434 Py_UCS4 c;
435 /* Fast loop for non-control characters */
436 while (PyUnicode_READ(kind, in_str, i) > '\r')
437 i++;
438 c = PyUnicode_READ(kind, in_str, i++);
439 if (c == '\n')
440 seennl |= SEEN_LF;
441 else if (c == '\r') {
442 if (PyUnicode_READ(kind, in_str, i) == '\n') {
443 seennl |= SEEN_CRLF;
444 i++;
445 }
446 else
447 seennl |= SEEN_CR;
448 }
449 if (i >= len)
450 break;
451 if (seennl == SEEN_ALL)
452 break;
453 }
454 endscan:
455 ;
456 }
457 else {
458 void *translated;
459 int kind = PyUnicode_KIND(output);
460 const void *in_str = PyUnicode_DATA(output);
461 Py_ssize_t in, out;
462 /* XXX: Previous in-place translation here is disabled as
463 resizing is not possible anymore */
464 /* We could try to optimize this so that we only do a copy
465 when there is something to translate. On the other hand,
466 we already know there is a \r byte, so chances are high
467 that something needs to be done. */
468 translated = PyMem_Malloc(kind * len);
469 if (translated == NULL) {
470 PyErr_NoMemory();
471 goto error;
472 }
473 in = out = 0;
474 for (;;) {
475 Py_UCS4 c;
476 /* Fast loop for non-control characters */
477 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
478 PyUnicode_WRITE(kind, translated, out++, c);
479 if (c == '\n') {
480 PyUnicode_WRITE(kind, translated, out++, c);
481 seennl |= SEEN_LF;
482 continue;
483 }
484 if (c == '\r') {
485 if (PyUnicode_READ(kind, in_str, in) == '\n') {
486 in++;
487 seennl |= SEEN_CRLF;
488 }
489 else
490 seennl |= SEEN_CR;
491 PyUnicode_WRITE(kind, translated, out++, '\n');
492 continue;
493 }
494 if (in > len)
495 break;
496 PyUnicode_WRITE(kind, translated, out++, c);
497 }
498 Py_DECREF(output);
499 output = PyUnicode_FromKindAndData(kind, translated, out);
500 PyMem_Free(translated);
501 if (!output)
502 return NULL;
503 }
504 self->seennl |= seennl;
505 }
506
507 return output;
508
509 error:
510 Py_DECREF(output);
511 return NULL;
512 }
513
514 /*[clinic input]
515 _io.IncrementalNewlineDecoder.decode
516 input: object
517 final: bool = False
518 [clinic start generated code]*/
519
520 static PyObject *
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object * self,PyObject * input,int final)521 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
522 PyObject *input, int final)
523 /*[clinic end generated code: output=0d486755bb37a66e input=90e223c70322c5cd]*/
524 {
525 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
526 }
527
528 /*[clinic input]
529 _io.IncrementalNewlineDecoder.getstate
530 [clinic start generated code]*/
531
532 static PyObject *
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object * self)533 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
534 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
535 {
536 PyObject *buffer;
537 unsigned long long flag;
538
539 CHECK_INITIALIZED_DECODER(self);
540
541 if (self->decoder != Py_None) {
542 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
543 &_Py_ID(getstate));
544 if (state == NULL)
545 return NULL;
546 if (!PyTuple_Check(state)) {
547 PyErr_SetString(PyExc_TypeError,
548 "illegal decoder state");
549 Py_DECREF(state);
550 return NULL;
551 }
552 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
553 &buffer, &flag))
554 {
555 Py_DECREF(state);
556 return NULL;
557 }
558 Py_INCREF(buffer);
559 Py_DECREF(state);
560 }
561 else {
562 buffer = PyBytes_FromString("");
563 flag = 0;
564 }
565 flag <<= 1;
566 if (self->pendingcr)
567 flag |= 1;
568 return Py_BuildValue("NK", buffer, flag);
569 }
570
571 /*[clinic input]
572 _io.IncrementalNewlineDecoder.setstate
573 state: object
574 /
575 [clinic start generated code]*/
576
577 static PyObject *
_io_IncrementalNewlineDecoder_setstate(nldecoder_object * self,PyObject * state)578 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
579 PyObject *state)
580 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
581 {
582 PyObject *buffer;
583 unsigned long long flag;
584
585 CHECK_INITIALIZED_DECODER(self);
586
587 if (!PyTuple_Check(state)) {
588 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
589 return NULL;
590 }
591 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
592 &buffer, &flag))
593 {
594 return NULL;
595 }
596
597 self->pendingcr = (int) (flag & 1);
598 flag >>= 1;
599
600 if (self->decoder != Py_None) {
601 return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
602 "((OK))", buffer, flag);
603 }
604 else {
605 Py_RETURN_NONE;
606 }
607 }
608
609 /*[clinic input]
610 _io.IncrementalNewlineDecoder.reset
611 [clinic start generated code]*/
612
613 static PyObject *
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object * self)614 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
615 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
616 {
617 CHECK_INITIALIZED_DECODER(self);
618
619 self->seennl = 0;
620 self->pendingcr = 0;
621 if (self->decoder != Py_None)
622 return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
623 else
624 Py_RETURN_NONE;
625 }
626
627 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)628 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
629 {
630 CHECK_INITIALIZED_DECODER(self);
631
632 switch (self->seennl) {
633 case SEEN_CR:
634 return PyUnicode_FromString("\r");
635 case SEEN_LF:
636 return PyUnicode_FromString("\n");
637 case SEEN_CRLF:
638 return PyUnicode_FromString("\r\n");
639 case SEEN_CR | SEEN_LF:
640 return Py_BuildValue("ss", "\r", "\n");
641 case SEEN_CR | SEEN_CRLF:
642 return Py_BuildValue("ss", "\r", "\r\n");
643 case SEEN_LF | SEEN_CRLF:
644 return Py_BuildValue("ss", "\n", "\r\n");
645 case SEEN_CR | SEEN_LF | SEEN_CRLF:
646 return Py_BuildValue("sss", "\r", "\n", "\r\n");
647 default:
648 Py_RETURN_NONE;
649 }
650
651 }
652
653 /* TextIOWrapper */
654
655 typedef PyObject *
656 (*encodefunc_t)(PyObject *, PyObject *);
657
658 struct textio
659 {
660 PyObject_HEAD
661 int ok; /* initialized? */
662 int detached;
663 Py_ssize_t chunk_size;
664 PyObject *buffer;
665 PyObject *encoding;
666 PyObject *encoder;
667 PyObject *decoder;
668 PyObject *readnl;
669 PyObject *errors;
670 const char *writenl; /* ASCII-encoded; NULL stands for \n */
671 char line_buffering;
672 char write_through;
673 char readuniversal;
674 char readtranslate;
675 char writetranslate;
676 char seekable;
677 char has_read1;
678 char telling;
679 char finalizing;
680 /* Specialized encoding func (see below) */
681 encodefunc_t encodefunc;
682 /* Whether or not it's the start of the stream */
683 char encoding_start_of_stream;
684
685 /* Reads and writes are internally buffered in order to speed things up.
686 However, any read will first flush the write buffer if itsn't empty.
687
688 Please also note that text to be written is first encoded before being
689 buffered. This is necessary so that encoding errors are immediately
690 reported to the caller, but it unfortunately means that the
691 IncrementalEncoder (whose encode() method is always written in Python)
692 becomes a bottleneck for small writes.
693 */
694 PyObject *decoded_chars; /* buffer for text returned from decoder */
695 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
696 PyObject *pending_bytes; // data waiting to be written.
697 // ascii unicode, bytes, or list of them.
698 Py_ssize_t pending_bytes_count;
699
700 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
701 * dec_flags is the second (integer) item of the decoder state and
702 * next_input is the chunk of input bytes that comes next after the
703 * snapshot point. We use this to reconstruct decoder states in tell().
704 */
705 PyObject *snapshot;
706 /* Bytes-to-characters ratio for the current chunk. Serves as input for
707 the heuristic in tell(). */
708 double b2cratio;
709
710 /* Cache raw object if it's a FileIO object */
711 PyObject *raw;
712
713 PyObject *weakreflist;
714 PyObject *dict;
715
716 _PyIO_State *state;
717 };
718
719 static void
720 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
721
722 /* A couple of specialized cases in order to bypass the slow incremental
723 encoding methods for the most popular encodings. */
724
725 static PyObject *
ascii_encode(textio * self,PyObject * text)726 ascii_encode(textio *self, PyObject *text)
727 {
728 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
729 }
730
731 static PyObject *
utf16be_encode(textio * self,PyObject * text)732 utf16be_encode(textio *self, PyObject *text)
733 {
734 return _PyUnicode_EncodeUTF16(text,
735 PyUnicode_AsUTF8(self->errors), 1);
736 }
737
738 static PyObject *
utf16le_encode(textio * self,PyObject * text)739 utf16le_encode(textio *self, PyObject *text)
740 {
741 return _PyUnicode_EncodeUTF16(text,
742 PyUnicode_AsUTF8(self->errors), -1);
743 }
744
745 static PyObject *
utf16_encode(textio * self,PyObject * text)746 utf16_encode(textio *self, PyObject *text)
747 {
748 if (!self->encoding_start_of_stream) {
749 /* Skip the BOM and use native byte ordering */
750 #if PY_BIG_ENDIAN
751 return utf16be_encode(self, text);
752 #else
753 return utf16le_encode(self, text);
754 #endif
755 }
756 return _PyUnicode_EncodeUTF16(text,
757 PyUnicode_AsUTF8(self->errors), 0);
758 }
759
760 static PyObject *
utf32be_encode(textio * self,PyObject * text)761 utf32be_encode(textio *self, PyObject *text)
762 {
763 return _PyUnicode_EncodeUTF32(text,
764 PyUnicode_AsUTF8(self->errors), 1);
765 }
766
767 static PyObject *
utf32le_encode(textio * self,PyObject * text)768 utf32le_encode(textio *self, PyObject *text)
769 {
770 return _PyUnicode_EncodeUTF32(text,
771 PyUnicode_AsUTF8(self->errors), -1);
772 }
773
774 static PyObject *
utf32_encode(textio * self,PyObject * text)775 utf32_encode(textio *self, PyObject *text)
776 {
777 if (!self->encoding_start_of_stream) {
778 /* Skip the BOM and use native byte ordering */
779 #if PY_BIG_ENDIAN
780 return utf32be_encode(self, text);
781 #else
782 return utf32le_encode(self, text);
783 #endif
784 }
785 return _PyUnicode_EncodeUTF32(text,
786 PyUnicode_AsUTF8(self->errors), 0);
787 }
788
789 static PyObject *
utf8_encode(textio * self,PyObject * text)790 utf8_encode(textio *self, PyObject *text)
791 {
792 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
793 }
794
795 static PyObject *
latin1_encode(textio * self,PyObject * text)796 latin1_encode(textio *self, PyObject *text)
797 {
798 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
799 }
800
801 // Return true when encoding can be skipped when text is ascii.
802 static inline int
is_asciicompat_encoding(encodefunc_t f)803 is_asciicompat_encoding(encodefunc_t f)
804 {
805 return f == (encodefunc_t) ascii_encode
806 || f == (encodefunc_t) latin1_encode
807 || f == (encodefunc_t) utf8_encode;
808 }
809
810 /* Map normalized encoding names onto the specialized encoding funcs */
811
812 typedef struct {
813 const char *name;
814 encodefunc_t encodefunc;
815 } encodefuncentry;
816
817 static const encodefuncentry encodefuncs[] = {
818 {"ascii", (encodefunc_t) ascii_encode},
819 {"iso8859-1", (encodefunc_t) latin1_encode},
820 {"utf-8", (encodefunc_t) utf8_encode},
821 {"utf-16-be", (encodefunc_t) utf16be_encode},
822 {"utf-16-le", (encodefunc_t) utf16le_encode},
823 {"utf-16", (encodefunc_t) utf16_encode},
824 {"utf-32-be", (encodefunc_t) utf32be_encode},
825 {"utf-32-le", (encodefunc_t) utf32le_encode},
826 {"utf-32", (encodefunc_t) utf32_encode},
827 {NULL, NULL}
828 };
829
830 static int
validate_newline(const char * newline)831 validate_newline(const char *newline)
832 {
833 if (newline && newline[0] != '\0'
834 && !(newline[0] == '\n' && newline[1] == '\0')
835 && !(newline[0] == '\r' && newline[1] == '\0')
836 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
837 PyErr_Format(PyExc_ValueError,
838 "illegal newline value: %s", newline);
839 return -1;
840 }
841 return 0;
842 }
843
844 static int
set_newline(textio * self,const char * newline)845 set_newline(textio *self, const char *newline)
846 {
847 PyObject *old = self->readnl;
848 if (newline == NULL) {
849 self->readnl = NULL;
850 }
851 else {
852 self->readnl = PyUnicode_FromString(newline);
853 if (self->readnl == NULL) {
854 self->readnl = old;
855 return -1;
856 }
857 }
858 self->readuniversal = (newline == NULL || newline[0] == '\0');
859 self->readtranslate = (newline == NULL);
860 self->writetranslate = (newline == NULL || newline[0] != '\0');
861 if (!self->readuniversal && self->readnl != NULL) {
862 // validate_newline() accepts only ASCII newlines.
863 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
864 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
865 if (strcmp(self->writenl, "\n") == 0) {
866 self->writenl = NULL;
867 }
868 }
869 else {
870 #ifdef MS_WINDOWS
871 self->writenl = "\r\n";
872 #else
873 self->writenl = NULL;
874 #endif
875 }
876 Py_XDECREF(old);
877 return 0;
878 }
879
880 static int
_textiowrapper_set_decoder(textio * self,PyObject * codec_info,const char * errors)881 _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
882 const char *errors)
883 {
884 PyObject *res;
885 int r;
886
887 res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
888 if (res == NULL)
889 return -1;
890
891 r = PyObject_IsTrue(res);
892 Py_DECREF(res);
893 if (r == -1)
894 return -1;
895
896 if (r != 1)
897 return 0;
898
899 Py_CLEAR(self->decoder);
900 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
901 if (self->decoder == NULL)
902 return -1;
903
904 if (self->readuniversal) {
905 _PyIO_State *state = self->state;
906 PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
907 (PyObject *)state->PyIncrementalNewlineDecoder_Type,
908 self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
909 if (incrementalDecoder == NULL)
910 return -1;
911 Py_XSETREF(self->decoder, incrementalDecoder);
912 }
913
914 return 0;
915 }
916
917 static PyObject*
_textiowrapper_decode(_PyIO_State * state,PyObject * decoder,PyObject * bytes,int eof)918 _textiowrapper_decode(_PyIO_State *state, PyObject *decoder, PyObject *bytes,
919 int eof)
920 {
921 PyObject *chars;
922
923 if (Py_IS_TYPE(decoder, state->PyIncrementalNewlineDecoder_Type))
924 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
925 else
926 chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes,
927 eof ? Py_True : Py_False, NULL);
928
929 if (check_decoded(chars) < 0)
930 // check_decoded already decreases refcount
931 return NULL;
932
933 return chars;
934 }
935
936 static int
_textiowrapper_set_encoder(textio * self,PyObject * codec_info,const char * errors)937 _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
938 const char *errors)
939 {
940 PyObject *res;
941 int r;
942
943 res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
944 if (res == NULL)
945 return -1;
946
947 r = PyObject_IsTrue(res);
948 Py_DECREF(res);
949 if (r == -1)
950 return -1;
951
952 if (r != 1)
953 return 0;
954
955 Py_CLEAR(self->encoder);
956 self->encodefunc = NULL;
957 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
958 if (self->encoder == NULL)
959 return -1;
960
961 /* Get the normalized named of the codec */
962 if (PyObject_GetOptionalAttr(codec_info, &_Py_ID(name), &res) < 0) {
963 return -1;
964 }
965 if (res != NULL && PyUnicode_Check(res)) {
966 const encodefuncentry *e = encodefuncs;
967 while (e->name != NULL) {
968 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
969 self->encodefunc = e->encodefunc;
970 break;
971 }
972 e++;
973 }
974 }
975 Py_XDECREF(res);
976
977 return 0;
978 }
979
980 static int
_textiowrapper_fix_encoder_state(textio * self)981 _textiowrapper_fix_encoder_state(textio *self)
982 {
983 if (!self->seekable || !self->encoder) {
984 return 0;
985 }
986
987 self->encoding_start_of_stream = 1;
988
989 PyObject *cookieObj = PyObject_CallMethodNoArgs(
990 self->buffer, &_Py_ID(tell));
991 if (cookieObj == NULL) {
992 return -1;
993 }
994
995 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
996 Py_DECREF(cookieObj);
997 if (cmp < 0) {
998 return -1;
999 }
1000
1001 if (cmp == 0) {
1002 self->encoding_start_of_stream = 0;
1003 PyObject *res = PyObject_CallMethodOneArg(
1004 self->encoder, &_Py_ID(setstate), _PyLong_GetZero());
1005 if (res == NULL) {
1006 return -1;
1007 }
1008 Py_DECREF(res);
1009 }
1010
1011 return 0;
1012 }
1013
1014 static int
io_check_errors(PyObject * errors)1015 io_check_errors(PyObject *errors)
1016 {
1017 assert(errors != NULL && errors != Py_None);
1018
1019 PyInterpreterState *interp = _PyInterpreterState_GET();
1020 #ifndef Py_DEBUG
1021 /* In release mode, only check in development mode (-X dev) */
1022 if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
1023 return 0;
1024 }
1025 #else
1026 /* Always check in debug mode */
1027 #endif
1028
1029 /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1030 before_PyUnicode_InitEncodings() is called. */
1031 if (!interp->unicode.fs_codec.encoding) {
1032 return 0;
1033 }
1034
1035 const char *name = _PyUnicode_AsUTF8NoNUL(errors);
1036 if (name == NULL) {
1037 return -1;
1038 }
1039 PyObject *handler = PyCodec_LookupError(name);
1040 if (handler != NULL) {
1041 Py_DECREF(handler);
1042 return 0;
1043 }
1044 return -1;
1045 }
1046
1047
1048
1049 /*[clinic input]
1050 _io.TextIOWrapper.__init__
1051 buffer: object
1052 encoding: str(accept={str, NoneType}) = None
1053 errors: object = None
1054 newline: str(accept={str, NoneType}) = None
1055 line_buffering: bool = False
1056 write_through: bool = False
1057
1058 Character and line based layer over a BufferedIOBase object, buffer.
1059
1060 encoding gives the name of the encoding that the stream will be
1061 decoded or encoded with. It defaults to locale.getencoding().
1062
1063 errors determines the strictness of encoding and decoding (see
1064 help(codecs.Codec) or the documentation for codecs.register) and
1065 defaults to "strict".
1066
1067 newline controls how line endings are handled. It can be None, '',
1068 '\n', '\r', and '\r\n'. It works as follows:
1069
1070 * On input, if newline is None, universal newlines mode is
1071 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1072 these are translated into '\n' before being returned to the
1073 caller. If it is '', universal newline mode is enabled, but line
1074 endings are returned to the caller untranslated. If it has any of
1075 the other legal values, input lines are only terminated by the given
1076 string, and the line ending is returned to the caller untranslated.
1077
1078 * On output, if newline is None, any '\n' characters written are
1079 translated to the system default line separator, os.linesep. If
1080 newline is '' or '\n', no translation takes place. If newline is any
1081 of the other legal values, any '\n' characters written are translated
1082 to the given string.
1083
1084 If line_buffering is True, a call to flush is implied when a call to
1085 write contains a newline character.
1086 [clinic start generated code]*/
1087
1088 static int
_io_TextIOWrapper___init___impl(textio * self,PyObject * buffer,const char * encoding,PyObject * errors,const char * newline,int line_buffering,int write_through)1089 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1090 const char *encoding, PyObject *errors,
1091 const char *newline, int line_buffering,
1092 int write_through)
1093 /*[clinic end generated code: output=72267c0c01032ed2 input=e6cfaaaf6059d4f5]*/
1094 {
1095 PyObject *raw, *codec_info = NULL;
1096 PyObject *res;
1097 int r;
1098
1099 self->ok = 0;
1100 self->detached = 0;
1101
1102 if (encoding == NULL) {
1103 PyInterpreterState *interp = _PyInterpreterState_GET();
1104 if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
1105 if (PyErr_WarnEx(PyExc_EncodingWarning,
1106 "'encoding' argument not specified", 1)) {
1107 return -1;
1108 }
1109 }
1110 }
1111
1112 if (errors == Py_None) {
1113 errors = &_Py_ID(strict);
1114 }
1115 else if (!PyUnicode_Check(errors)) {
1116 // Check 'errors' argument here because Argument Clinic doesn't support
1117 // 'str(accept={str, NoneType})' converter.
1118 PyErr_Format(
1119 PyExc_TypeError,
1120 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1121 Py_TYPE(errors)->tp_name);
1122 return -1;
1123 }
1124 else if (io_check_errors(errors)) {
1125 return -1;
1126 }
1127 const char *errors_str = _PyUnicode_AsUTF8NoNUL(errors);
1128 if (errors_str == NULL) {
1129 return -1;
1130 }
1131
1132 if (validate_newline(newline) < 0) {
1133 return -1;
1134 }
1135
1136 Py_CLEAR(self->buffer);
1137 Py_CLEAR(self->encoding);
1138 Py_CLEAR(self->encoder);
1139 Py_CLEAR(self->decoder);
1140 Py_CLEAR(self->readnl);
1141 Py_CLEAR(self->decoded_chars);
1142 Py_CLEAR(self->pending_bytes);
1143 Py_CLEAR(self->snapshot);
1144 Py_CLEAR(self->errors);
1145 Py_CLEAR(self->raw);
1146 self->decoded_chars_used = 0;
1147 self->pending_bytes_count = 0;
1148 self->encodefunc = NULL;
1149 self->b2cratio = 0.0;
1150
1151 if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) {
1152 _Py_DECLARE_STR(utf_8, "utf-8");
1153 self->encoding = &_Py_STR(utf_8);
1154 }
1155 else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) {
1156 self->encoding = _Py_GetLocaleEncodingObject();
1157 if (self->encoding == NULL) {
1158 goto error;
1159 }
1160 assert(PyUnicode_Check(self->encoding));
1161 }
1162
1163 if (self->encoding != NULL) {
1164 encoding = PyUnicode_AsUTF8(self->encoding);
1165 if (encoding == NULL)
1166 goto error;
1167 }
1168 else if (encoding != NULL) {
1169 self->encoding = PyUnicode_FromString(encoding);
1170 if (self->encoding == NULL)
1171 goto error;
1172 }
1173 else {
1174 PyErr_SetString(PyExc_OSError,
1175 "could not determine default encoding");
1176 goto error;
1177 }
1178
1179 /* Check we have been asked for a real text encoding */
1180 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1181 if (codec_info == NULL) {
1182 Py_CLEAR(self->encoding);
1183 goto error;
1184 }
1185
1186 /* XXX: Failures beyond this point have the potential to leak elements
1187 * of the partially constructed object (like self->encoding)
1188 */
1189
1190 self->errors = Py_NewRef(errors);
1191 self->chunk_size = 8192;
1192 self->line_buffering = line_buffering;
1193 self->write_through = write_through;
1194 if (set_newline(self, newline) < 0) {
1195 goto error;
1196 }
1197
1198 self->buffer = Py_NewRef(buffer);
1199
1200 /* Build the decoder object */
1201 _PyIO_State *state = find_io_state_by_def(Py_TYPE(self));
1202 self->state = state;
1203 if (_textiowrapper_set_decoder(self, codec_info, errors_str) != 0)
1204 goto error;
1205
1206 /* Build the encoder object */
1207 if (_textiowrapper_set_encoder(self, codec_info, errors_str) != 0)
1208 goto error;
1209
1210 /* Finished sorting out the codec details */
1211 Py_CLEAR(codec_info);
1212
1213 if (Py_IS_TYPE(buffer, state->PyBufferedReader_Type) ||
1214 Py_IS_TYPE(buffer, state->PyBufferedWriter_Type) ||
1215 Py_IS_TYPE(buffer, state->PyBufferedRandom_Type))
1216 {
1217 if (PyObject_GetOptionalAttr(buffer, &_Py_ID(raw), &raw) < 0)
1218 goto error;
1219 /* Cache the raw FileIO object to speed up 'closed' checks */
1220 if (raw != NULL) {
1221 if (Py_IS_TYPE(raw, state->PyFileIO_Type))
1222 self->raw = raw;
1223 else
1224 Py_DECREF(raw);
1225 }
1226 }
1227
1228 res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable));
1229 if (res == NULL)
1230 goto error;
1231 r = PyObject_IsTrue(res);
1232 Py_DECREF(res);
1233 if (r < 0)
1234 goto error;
1235 self->seekable = self->telling = r;
1236
1237 r = PyObject_HasAttrWithError(buffer, &_Py_ID(read1));
1238 if (r < 0) {
1239 goto error;
1240 }
1241 self->has_read1 = r;
1242
1243 self->encoding_start_of_stream = 0;
1244 if (_textiowrapper_fix_encoder_state(self) < 0) {
1245 goto error;
1246 }
1247
1248 self->ok = 1;
1249 return 0;
1250
1251 error:
1252 Py_XDECREF(codec_info);
1253 return -1;
1254 }
1255
1256 /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1257 * -1 on error.
1258 */
1259 static int
convert_optional_bool(PyObject * obj,int default_value)1260 convert_optional_bool(PyObject *obj, int default_value)
1261 {
1262 long v;
1263 if (obj == Py_None) {
1264 v = default_value;
1265 }
1266 else {
1267 v = PyLong_AsLong(obj);
1268 if (v == -1 && PyErr_Occurred())
1269 return -1;
1270 }
1271 return v != 0;
1272 }
1273
1274 static int
textiowrapper_change_encoding(textio * self,PyObject * encoding,PyObject * errors,int newline_changed)1275 textiowrapper_change_encoding(textio *self, PyObject *encoding,
1276 PyObject *errors, int newline_changed)
1277 {
1278 /* Use existing settings where new settings are not specified */
1279 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1280 return 0; // no change
1281 }
1282
1283 if (encoding == Py_None) {
1284 encoding = self->encoding;
1285 if (errors == Py_None) {
1286 errors = self->errors;
1287 }
1288 Py_INCREF(encoding);
1289 }
1290 else {
1291 if (_PyUnicode_EqualToASCIIString(encoding, "locale")) {
1292 encoding = _Py_GetLocaleEncodingObject();
1293 if (encoding == NULL) {
1294 return -1;
1295 }
1296 } else {
1297 Py_INCREF(encoding);
1298 }
1299 if (errors == Py_None) {
1300 errors = &_Py_ID(strict);
1301 }
1302 }
1303 Py_INCREF(errors);
1304
1305 const char *c_encoding = PyUnicode_AsUTF8(encoding);
1306 if (c_encoding == NULL) {
1307 Py_DECREF(encoding);
1308 Py_DECREF(errors);
1309 return -1;
1310 }
1311 const char *c_errors = PyUnicode_AsUTF8(errors);
1312 if (c_errors == NULL) {
1313 Py_DECREF(encoding);
1314 Py_DECREF(errors);
1315 return -1;
1316 }
1317
1318 // Create new encoder & decoder
1319 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1320 c_encoding, "codecs.open()");
1321 if (codec_info == NULL) {
1322 Py_DECREF(encoding);
1323 Py_DECREF(errors);
1324 return -1;
1325 }
1326 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1327 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1328 Py_DECREF(codec_info);
1329 Py_DECREF(encoding);
1330 Py_DECREF(errors);
1331 return -1;
1332 }
1333 Py_DECREF(codec_info);
1334
1335 Py_SETREF(self->encoding, encoding);
1336 Py_SETREF(self->errors, errors);
1337
1338 return _textiowrapper_fix_encoder_state(self);
1339 }
1340
1341 /*[clinic input]
1342 @critical_section
1343 _io.TextIOWrapper.reconfigure
1344 *
1345 encoding: object = None
1346 errors: object = None
1347 newline as newline_obj: object(c_default="NULL") = None
1348 line_buffering as line_buffering_obj: object = None
1349 write_through as write_through_obj: object = None
1350
1351 Reconfigure the text stream with new parameters.
1352
1353 This also does an implicit stream flush.
1354
1355 [clinic start generated code]*/
1356
1357 static PyObject *
_io_TextIOWrapper_reconfigure_impl(textio * self,PyObject * encoding,PyObject * errors,PyObject * newline_obj,PyObject * line_buffering_obj,PyObject * write_through_obj)1358 _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1359 PyObject *errors, PyObject *newline_obj,
1360 PyObject *line_buffering_obj,
1361 PyObject *write_through_obj)
1362 /*[clinic end generated code: output=52b812ff4b3d4b0f input=dc3bd35ebda702a7]*/
1363 {
1364 int line_buffering;
1365 int write_through;
1366 const char *newline = NULL;
1367
1368 if (encoding != Py_None && !PyUnicode_Check(encoding)) {
1369 PyErr_Format(PyExc_TypeError,
1370 "reconfigure() argument 'encoding' must be str or None, not %s",
1371 Py_TYPE(encoding)->tp_name);
1372 return NULL;
1373 }
1374 if (errors != Py_None && !PyUnicode_Check(errors)) {
1375 PyErr_Format(PyExc_TypeError,
1376 "reconfigure() argument 'errors' must be str or None, not %s",
1377 Py_TYPE(errors)->tp_name);
1378 return NULL;
1379 }
1380 if (newline_obj != NULL && newline_obj != Py_None &&
1381 !PyUnicode_Check(newline_obj))
1382 {
1383 PyErr_Format(PyExc_TypeError,
1384 "reconfigure() argument 'newline' must be str or None, not %s",
1385 Py_TYPE(newline_obj)->tp_name);
1386 return NULL;
1387 }
1388 /* Check if something is in the read buffer */
1389 if (self->decoded_chars != NULL) {
1390 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1391 _unsupported(self->state,
1392 "It is not possible to set the encoding or newline "
1393 "of stream after the first read");
1394 return NULL;
1395 }
1396 }
1397
1398 if (newline_obj != NULL && newline_obj != Py_None) {
1399 newline = PyUnicode_AsUTF8(newline_obj);
1400 if (newline == NULL || validate_newline(newline) < 0) {
1401 return NULL;
1402 }
1403 }
1404
1405 line_buffering = convert_optional_bool(line_buffering_obj,
1406 self->line_buffering);
1407 if (line_buffering < 0) {
1408 return NULL;
1409 }
1410 write_through = convert_optional_bool(write_through_obj,
1411 self->write_through);
1412 if (write_through < 0) {
1413 return NULL;
1414 }
1415
1416 if (_PyFile_Flush((PyObject *)self) < 0) {
1417 return NULL;
1418 }
1419 self->b2cratio = 0;
1420
1421 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1422 return NULL;
1423 }
1424
1425 if (textiowrapper_change_encoding(
1426 self, encoding, errors, newline_obj != NULL) < 0) {
1427 return NULL;
1428 }
1429
1430 self->line_buffering = line_buffering;
1431 self->write_through = write_through;
1432 Py_RETURN_NONE;
1433 }
1434
1435 static int
textiowrapper_clear(textio * self)1436 textiowrapper_clear(textio *self)
1437 {
1438 self->ok = 0;
1439 Py_CLEAR(self->buffer);
1440 Py_CLEAR(self->encoding);
1441 Py_CLEAR(self->encoder);
1442 Py_CLEAR(self->decoder);
1443 Py_CLEAR(self->readnl);
1444 Py_CLEAR(self->decoded_chars);
1445 Py_CLEAR(self->pending_bytes);
1446 Py_CLEAR(self->snapshot);
1447 Py_CLEAR(self->errors);
1448 Py_CLEAR(self->raw);
1449
1450 Py_CLEAR(self->dict);
1451 return 0;
1452 }
1453
1454 static void
textiowrapper_dealloc(textio * self)1455 textiowrapper_dealloc(textio *self)
1456 {
1457 PyTypeObject *tp = Py_TYPE(self);
1458 self->finalizing = 1;
1459 if (_PyIOBase_finalize((PyObject *) self) < 0)
1460 return;
1461 self->ok = 0;
1462 _PyObject_GC_UNTRACK(self);
1463 if (self->weakreflist != NULL)
1464 PyObject_ClearWeakRefs((PyObject *)self);
1465 (void)textiowrapper_clear(self);
1466 tp->tp_free((PyObject *)self);
1467 Py_DECREF(tp);
1468 }
1469
1470 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1471 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1472 {
1473 Py_VISIT(Py_TYPE(self));
1474 Py_VISIT(self->buffer);
1475 Py_VISIT(self->encoding);
1476 Py_VISIT(self->encoder);
1477 Py_VISIT(self->decoder);
1478 Py_VISIT(self->readnl);
1479 Py_VISIT(self->decoded_chars);
1480 Py_VISIT(self->pending_bytes);
1481 Py_VISIT(self->snapshot);
1482 Py_VISIT(self->errors);
1483 Py_VISIT(self->raw);
1484
1485 Py_VISIT(self->dict);
1486 return 0;
1487 }
1488
1489 static PyObject *
1490 _io_TextIOWrapper_closed_get_impl(textio *self);
1491
1492 /* This macro takes some shortcuts to make the common case faster. */
1493 #define CHECK_CLOSED(self) \
1494 do { \
1495 int r; \
1496 PyObject *_res; \
1497 if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) { \
1498 if (self->raw != NULL) \
1499 r = _PyFileIO_closed(self->raw); \
1500 else { \
1501 _res = _io_TextIOWrapper_closed_get_impl(self); \
1502 if (_res == NULL) \
1503 return NULL; \
1504 r = PyObject_IsTrue(_res); \
1505 Py_DECREF(_res); \
1506 if (r < 0) \
1507 return NULL; \
1508 } \
1509 if (r > 0) { \
1510 PyErr_SetString(PyExc_ValueError, \
1511 "I/O operation on closed file."); \
1512 return NULL; \
1513 } \
1514 } \
1515 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1516 return NULL; \
1517 } while (0)
1518
1519 #define CHECK_INITIALIZED(self) \
1520 if (self->ok <= 0) { \
1521 PyErr_SetString(PyExc_ValueError, \
1522 "I/O operation on uninitialized object"); \
1523 return NULL; \
1524 }
1525
1526 #define CHECK_ATTACHED(self) \
1527 CHECK_INITIALIZED(self); \
1528 if (self->detached) { \
1529 PyErr_SetString(PyExc_ValueError, \
1530 "underlying buffer has been detached"); \
1531 return NULL; \
1532 }
1533
1534 #define CHECK_ATTACHED_INT(self) \
1535 if (self->ok <= 0) { \
1536 PyErr_SetString(PyExc_ValueError, \
1537 "I/O operation on uninitialized object"); \
1538 return -1; \
1539 } else if (self->detached) { \
1540 PyErr_SetString(PyExc_ValueError, \
1541 "underlying buffer has been detached"); \
1542 return -1; \
1543 }
1544
1545
1546 /*[clinic input]
1547 @critical_section
1548 _io.TextIOWrapper.detach
1549 [clinic start generated code]*/
1550
1551 static PyObject *
_io_TextIOWrapper_detach_impl(textio * self)1552 _io_TextIOWrapper_detach_impl(textio *self)
1553 /*[clinic end generated code: output=7ba3715cd032d5f2 input=c908a3b4ef203b0f]*/
1554 {
1555 PyObject *buffer;
1556 CHECK_ATTACHED(self);
1557 if (_PyFile_Flush((PyObject *)self) < 0) {
1558 return NULL;
1559 }
1560 buffer = self->buffer;
1561 self->buffer = NULL;
1562 self->detached = 1;
1563 return buffer;
1564 }
1565
1566 /* Flush the internal write buffer. This doesn't explicitly flush the
1567 underlying buffered object, though. */
1568 static int
_textiowrapper_writeflush(textio * self)1569 _textiowrapper_writeflush(textio *self)
1570 {
1571 if (self->pending_bytes == NULL)
1572 return 0;
1573
1574 PyObject *pending = self->pending_bytes;
1575 PyObject *b;
1576
1577 if (PyBytes_Check(pending)) {
1578 b = Py_NewRef(pending);
1579 }
1580 else if (PyUnicode_Check(pending)) {
1581 assert(PyUnicode_IS_ASCII(pending));
1582 assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1583 b = PyBytes_FromStringAndSize(
1584 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1585 if (b == NULL) {
1586 return -1;
1587 }
1588 }
1589 else {
1590 assert(PyList_Check(pending));
1591 b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1592 if (b == NULL) {
1593 return -1;
1594 }
1595
1596 char *buf = PyBytes_AsString(b);
1597 Py_ssize_t pos = 0;
1598
1599 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1600 PyObject *obj = PyList_GET_ITEM(pending, i);
1601 char *src;
1602 Py_ssize_t len;
1603 if (PyUnicode_Check(obj)) {
1604 assert(PyUnicode_IS_ASCII(obj));
1605 src = PyUnicode_DATA(obj);
1606 len = PyUnicode_GET_LENGTH(obj);
1607 }
1608 else {
1609 assert(PyBytes_Check(obj));
1610 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1611 Py_DECREF(b);
1612 return -1;
1613 }
1614 }
1615 memcpy(buf + pos, src, len);
1616 pos += len;
1617 }
1618 assert(pos == self->pending_bytes_count);
1619 }
1620
1621 self->pending_bytes_count = 0;
1622 self->pending_bytes = NULL;
1623 Py_DECREF(pending);
1624
1625 PyObject *ret;
1626 do {
1627 ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
1628 } while (ret == NULL && _PyIO_trap_eintr());
1629 Py_DECREF(b);
1630 // NOTE: We cleared buffer but we don't know how many bytes are actually written
1631 // when an error occurred.
1632 if (ret == NULL)
1633 return -1;
1634 Py_DECREF(ret);
1635 return 0;
1636 }
1637
1638 /*[clinic input]
1639 @critical_section
1640 _io.TextIOWrapper.write
1641 text: unicode
1642 /
1643 [clinic start generated code]*/
1644
1645 static PyObject *
_io_TextIOWrapper_write_impl(textio * self,PyObject * text)1646 _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1647 /*[clinic end generated code: output=d2deb0d50771fcec input=73ec95c5c4a3489c]*/
1648 {
1649 PyObject *ret;
1650 PyObject *b;
1651 Py_ssize_t textlen;
1652 int haslf = 0;
1653 int needflush = 0, text_needflush = 0;
1654
1655 CHECK_ATTACHED(self);
1656 CHECK_CLOSED(self);
1657
1658 if (self->encoder == NULL) {
1659 return _unsupported(self->state, "not writable");
1660 }
1661
1662 Py_INCREF(text);
1663
1664 textlen = PyUnicode_GET_LENGTH(text);
1665
1666 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1667 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1668 haslf = 1;
1669
1670 if (haslf && self->writetranslate && self->writenl != NULL) {
1671 PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace),
1672 "ss", "\n", self->writenl);
1673 Py_DECREF(text);
1674 if (newtext == NULL)
1675 return NULL;
1676 text = newtext;
1677 }
1678
1679 if (self->write_through)
1680 text_needflush = 1;
1681 if (self->line_buffering &&
1682 (haslf ||
1683 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1684 needflush = 1;
1685
1686 /* XXX What if we were just reading? */
1687 if (self->encodefunc != NULL) {
1688 if (PyUnicode_IS_ASCII(text) &&
1689 // See bpo-43260
1690 PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1691 is_asciicompat_encoding(self->encodefunc)) {
1692 b = Py_NewRef(text);
1693 }
1694 else {
1695 b = (*self->encodefunc)((PyObject *) self, text);
1696 }
1697 self->encoding_start_of_stream = 0;
1698 }
1699 else {
1700 b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text);
1701 }
1702
1703 Py_DECREF(text);
1704 if (b == NULL)
1705 return NULL;
1706 if (b != text && !PyBytes_Check(b)) {
1707 PyErr_Format(PyExc_TypeError,
1708 "encoder should return a bytes object, not '%.200s'",
1709 Py_TYPE(b)->tp_name);
1710 Py_DECREF(b);
1711 return NULL;
1712 }
1713
1714 Py_ssize_t bytes_len;
1715 if (b == text) {
1716 bytes_len = PyUnicode_GET_LENGTH(b);
1717 }
1718 else {
1719 bytes_len = PyBytes_GET_SIZE(b);
1720 }
1721
1722 // We should avoid concatinating huge data.
1723 // Flush the buffer before adding b to the buffer if b is not small.
1724 // https://github.com/python/cpython/issues/87426
1725 if (bytes_len >= self->chunk_size) {
1726 // _textiowrapper_writeflush() calls buffer.write().
1727 // self->pending_bytes can be appended during buffer->write()
1728 // or other thread.
1729 // We need to loop until buffer becomes empty.
1730 // https://github.com/python/cpython/issues/118138
1731 // https://github.com/python/cpython/issues/119506
1732 while (self->pending_bytes != NULL) {
1733 if (_textiowrapper_writeflush(self) < 0) {
1734 Py_DECREF(b);
1735 return NULL;
1736 }
1737 }
1738 }
1739
1740 if (self->pending_bytes == NULL) {
1741 assert(self->pending_bytes_count == 0);
1742 self->pending_bytes = b;
1743 }
1744 else if (!PyList_CheckExact(self->pending_bytes)) {
1745 PyObject *list = PyList_New(2);
1746 if (list == NULL) {
1747 Py_DECREF(b);
1748 return NULL;
1749 }
1750 // Since Python 3.12, allocating GC object won't trigger GC and release
1751 // GIL. See https://github.com/python/cpython/issues/97922
1752 assert(!PyList_CheckExact(self->pending_bytes));
1753 PyList_SET_ITEM(list, 0, self->pending_bytes);
1754 PyList_SET_ITEM(list, 1, b);
1755 self->pending_bytes = list;
1756 }
1757 else {
1758 if (PyList_Append(self->pending_bytes, b) < 0) {
1759 Py_DECREF(b);
1760 return NULL;
1761 }
1762 Py_DECREF(b);
1763 }
1764
1765 self->pending_bytes_count += bytes_len;
1766 if (self->pending_bytes_count >= self->chunk_size || needflush ||
1767 text_needflush) {
1768 if (_textiowrapper_writeflush(self) < 0)
1769 return NULL;
1770 }
1771
1772 if (needflush) {
1773 if (_PyFile_Flush(self->buffer) < 0) {
1774 return NULL;
1775 }
1776 }
1777
1778 if (self->snapshot != NULL) {
1779 textiowrapper_set_decoded_chars(self, NULL);
1780 Py_CLEAR(self->snapshot);
1781 }
1782
1783 if (self->decoder) {
1784 ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
1785 if (ret == NULL)
1786 return NULL;
1787 Py_DECREF(ret);
1788 }
1789
1790 return PyLong_FromSsize_t(textlen);
1791 }
1792
1793 /* Steal a reference to chars and store it in the decoded_char buffer;
1794 */
1795 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1796 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1797 {
1798 Py_XSETREF(self->decoded_chars, chars);
1799 self->decoded_chars_used = 0;
1800 }
1801
1802 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1803 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1804 {
1805 PyObject *chars;
1806 Py_ssize_t avail;
1807
1808 if (self->decoded_chars == NULL)
1809 return PyUnicode_FromStringAndSize(NULL, 0);
1810
1811 /* decoded_chars is guaranteed to be "ready". */
1812 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1813 - self->decoded_chars_used);
1814
1815 assert(avail >= 0);
1816
1817 if (n < 0 || n > avail)
1818 n = avail;
1819
1820 if (self->decoded_chars_used > 0 || n < avail) {
1821 chars = PyUnicode_Substring(self->decoded_chars,
1822 self->decoded_chars_used,
1823 self->decoded_chars_used + n);
1824 if (chars == NULL)
1825 return NULL;
1826 }
1827 else {
1828 chars = Py_NewRef(self->decoded_chars);
1829 }
1830
1831 self->decoded_chars_used += n;
1832 return chars;
1833 }
1834
1835 /* Read and decode the next chunk of data from the BufferedReader.
1836 */
1837 static int
textiowrapper_read_chunk(textio * self,Py_ssize_t size_hint)1838 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1839 {
1840 PyObject *dec_buffer = NULL;
1841 PyObject *dec_flags = NULL;
1842 PyObject *input_chunk = NULL;
1843 Py_buffer input_chunk_buf;
1844 PyObject *decoded_chars, *chunk_size;
1845 Py_ssize_t nbytes, nchars;
1846 int eof;
1847
1848 /* The return value is True unless EOF was reached. The decoded string is
1849 * placed in self._decoded_chars (replacing its previous value). The
1850 * entire input chunk is sent to the decoder, though some of it may remain
1851 * buffered in the decoder, yet to be converted.
1852 */
1853
1854 if (self->decoder == NULL) {
1855 _unsupported(self->state, "not readable");
1856 return -1;
1857 }
1858
1859 if (self->telling) {
1860 /* To prepare for tell(), we need to snapshot a point in the file
1861 * where the decoder's input buffer is empty.
1862 */
1863 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1864 &_Py_ID(getstate));
1865 if (state == NULL)
1866 return -1;
1867 /* Given this, we know there was a valid snapshot point
1868 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1869 */
1870 if (!PyTuple_Check(state)) {
1871 PyErr_SetString(PyExc_TypeError,
1872 "illegal decoder state");
1873 Py_DECREF(state);
1874 return -1;
1875 }
1876 if (!PyArg_ParseTuple(state,
1877 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1878 {
1879 Py_DECREF(state);
1880 return -1;
1881 }
1882
1883 if (!PyBytes_Check(dec_buffer)) {
1884 PyErr_Format(PyExc_TypeError,
1885 "illegal decoder state: the first item should be a "
1886 "bytes object, not '%.200s'",
1887 Py_TYPE(dec_buffer)->tp_name);
1888 Py_DECREF(state);
1889 return -1;
1890 }
1891 Py_INCREF(dec_buffer);
1892 Py_INCREF(dec_flags);
1893 Py_DECREF(state);
1894 }
1895
1896 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1897 if (size_hint > 0) {
1898 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1899 }
1900 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1901 if (chunk_size == NULL)
1902 goto fail;
1903
1904 input_chunk = PyObject_CallMethodOneArg(self->buffer,
1905 (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
1906 chunk_size);
1907 Py_DECREF(chunk_size);
1908 if (input_chunk == NULL)
1909 goto fail;
1910
1911 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1912 PyErr_Format(PyExc_TypeError,
1913 "underlying %s() should have returned a bytes-like object, "
1914 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1915 Py_TYPE(input_chunk)->tp_name);
1916 goto fail;
1917 }
1918
1919 nbytes = input_chunk_buf.len;
1920 eof = (nbytes == 0);
1921
1922 decoded_chars = _textiowrapper_decode(self->state, self->decoder,
1923 input_chunk, eof);
1924 PyBuffer_Release(&input_chunk_buf);
1925 if (decoded_chars == NULL)
1926 goto fail;
1927
1928 textiowrapper_set_decoded_chars(self, decoded_chars);
1929 nchars = PyUnicode_GET_LENGTH(decoded_chars);
1930 if (nchars > 0)
1931 self->b2cratio = (double) nbytes / nchars;
1932 else
1933 self->b2cratio = 0.0;
1934 if (nchars > 0)
1935 eof = 0;
1936
1937 if (self->telling) {
1938 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1939 * next input to be decoded is dec_buffer + input_chunk.
1940 */
1941 PyObject *next_input = dec_buffer;
1942 PyBytes_Concat(&next_input, input_chunk);
1943 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1944 if (next_input == NULL) {
1945 goto fail;
1946 }
1947 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1948 if (snapshot == NULL) {
1949 dec_flags = NULL;
1950 goto fail;
1951 }
1952 Py_XSETREF(self->snapshot, snapshot);
1953 }
1954 Py_DECREF(input_chunk);
1955
1956 return (eof == 0);
1957
1958 fail:
1959 Py_XDECREF(dec_buffer);
1960 Py_XDECREF(dec_flags);
1961 Py_XDECREF(input_chunk);
1962 return -1;
1963 }
1964
1965 /*[clinic input]
1966 @critical_section
1967 _io.TextIOWrapper.read
1968 size as n: Py_ssize_t(accept={int, NoneType}) = -1
1969 /
1970 [clinic start generated code]*/
1971
1972 static PyObject *
_io_TextIOWrapper_read_impl(textio * self,Py_ssize_t n)1973 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1974 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=67d14c5661121377]*/
1975 {
1976 PyObject *result = NULL, *chunks = NULL;
1977
1978 CHECK_ATTACHED(self);
1979 CHECK_CLOSED(self);
1980
1981 if (self->decoder == NULL) {
1982 return _unsupported(self->state, "not readable");
1983 }
1984
1985 if (_textiowrapper_writeflush(self) < 0)
1986 return NULL;
1987
1988 if (n < 0) {
1989 /* Read everything */
1990 PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
1991 PyObject *decoded;
1992 if (bytes == NULL)
1993 goto fail;
1994
1995 _PyIO_State *state = self->state;
1996 if (Py_IS_TYPE(self->decoder, state->PyIncrementalNewlineDecoder_Type))
1997 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1998 bytes, 1);
1999 else
2000 decoded = PyObject_CallMethodObjArgs(
2001 self->decoder, &_Py_ID(decode), bytes, Py_True, NULL);
2002 Py_DECREF(bytes);
2003 if (check_decoded(decoded) < 0)
2004 goto fail;
2005
2006 result = textiowrapper_get_decoded_chars(self, -1);
2007
2008 if (result == NULL) {
2009 Py_DECREF(decoded);
2010 return NULL;
2011 }
2012
2013 PyUnicode_AppendAndDel(&result, decoded);
2014 if (result == NULL)
2015 goto fail;
2016
2017 if (self->snapshot != NULL) {
2018 textiowrapper_set_decoded_chars(self, NULL);
2019 Py_CLEAR(self->snapshot);
2020 }
2021 return result;
2022 }
2023 else {
2024 int res = 1;
2025 Py_ssize_t remaining = n;
2026
2027 result = textiowrapper_get_decoded_chars(self, n);
2028 if (result == NULL)
2029 goto fail;
2030 remaining -= PyUnicode_GET_LENGTH(result);
2031
2032 /* Keep reading chunks until we have n characters to return */
2033 while (remaining > 0) {
2034 res = textiowrapper_read_chunk(self, remaining);
2035 if (res < 0) {
2036 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2037 when EINTR occurs so we needn't do it ourselves. */
2038 if (_PyIO_trap_eintr()) {
2039 continue;
2040 }
2041 goto fail;
2042 }
2043 if (res == 0) /* EOF */
2044 break;
2045 if (chunks == NULL) {
2046 chunks = PyList_New(0);
2047 if (chunks == NULL)
2048 goto fail;
2049 }
2050 if (PyUnicode_GET_LENGTH(result) > 0 &&
2051 PyList_Append(chunks, result) < 0)
2052 goto fail;
2053 Py_DECREF(result);
2054 result = textiowrapper_get_decoded_chars(self, remaining);
2055 if (result == NULL)
2056 goto fail;
2057 remaining -= PyUnicode_GET_LENGTH(result);
2058 }
2059 if (chunks != NULL) {
2060 if (result != NULL && PyList_Append(chunks, result) < 0)
2061 goto fail;
2062 _Py_DECLARE_STR(empty, "");
2063 Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks));
2064 if (result == NULL)
2065 goto fail;
2066 Py_CLEAR(chunks);
2067 }
2068 return result;
2069 }
2070 fail:
2071 Py_XDECREF(result);
2072 Py_XDECREF(chunks);
2073 return NULL;
2074 }
2075
2076
2077 /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2078 that is to the NUL character. Otherwise the function will produce
2079 incorrect results. */
2080 static const char *
find_control_char(int kind,const char * s,const char * end,Py_UCS4 ch)2081 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2082 {
2083 if (kind == PyUnicode_1BYTE_KIND) {
2084 assert(ch < 256);
2085 return (char *) memchr((const void *) s, (char) ch, end - s);
2086 }
2087 for (;;) {
2088 while (PyUnicode_READ(kind, s, 0) > ch)
2089 s += kind;
2090 if (PyUnicode_READ(kind, s, 0) == ch)
2091 return s;
2092 if (s == end)
2093 return NULL;
2094 s += kind;
2095 }
2096 }
2097
2098 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,int kind,const char * start,const char * end,Py_ssize_t * consumed)2099 _PyIO_find_line_ending(
2100 int translated, int universal, PyObject *readnl,
2101 int kind, const char *start, const char *end, Py_ssize_t *consumed)
2102 {
2103 Py_ssize_t len = (end - start)/kind;
2104
2105 if (translated) {
2106 /* Newlines are already translated, only search for \n */
2107 const char *pos = find_control_char(kind, start, end, '\n');
2108 if (pos != NULL)
2109 return (pos - start)/kind + 1;
2110 else {
2111 *consumed = len;
2112 return -1;
2113 }
2114 }
2115 else if (universal) {
2116 /* Universal newline search. Find any of \r, \r\n, \n
2117 * The decoder ensures that \r\n are not split in two pieces
2118 */
2119 const char *s = start;
2120 for (;;) {
2121 Py_UCS4 ch;
2122 /* Fast path for non-control chars. The loop always ends
2123 since the Unicode string is NUL-terminated. */
2124 while (PyUnicode_READ(kind, s, 0) > '\r')
2125 s += kind;
2126 if (s >= end) {
2127 *consumed = len;
2128 return -1;
2129 }
2130 ch = PyUnicode_READ(kind, s, 0);
2131 s += kind;
2132 if (ch == '\n')
2133 return (s - start)/kind;
2134 if (ch == '\r') {
2135 if (PyUnicode_READ(kind, s, 0) == '\n')
2136 return (s - start)/kind + 1;
2137 else
2138 return (s - start)/kind;
2139 }
2140 }
2141 }
2142 else {
2143 /* Non-universal mode. */
2144 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2145 const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2146 /* Assume that readnl is an ASCII character. */
2147 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2148 if (readnl_len == 1) {
2149 const char *pos = find_control_char(kind, start, end, nl[0]);
2150 if (pos != NULL)
2151 return (pos - start)/kind + 1;
2152 *consumed = len;
2153 return -1;
2154 }
2155 else {
2156 const char *s = start;
2157 const char *e = end - (readnl_len - 1)*kind;
2158 const char *pos;
2159 if (e < s)
2160 e = s;
2161 while (s < e) {
2162 Py_ssize_t i;
2163 const char *pos = find_control_char(kind, s, end, nl[0]);
2164 if (pos == NULL || pos >= e)
2165 break;
2166 for (i = 1; i < readnl_len; i++) {
2167 if (PyUnicode_READ(kind, pos, i) != nl[i])
2168 break;
2169 }
2170 if (i == readnl_len)
2171 return (pos - start)/kind + readnl_len;
2172 s = pos + kind;
2173 }
2174 pos = find_control_char(kind, e, end, nl[0]);
2175 if (pos == NULL)
2176 *consumed = len;
2177 else
2178 *consumed = (pos - start)/kind;
2179 return -1;
2180 }
2181 }
2182 }
2183
2184 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)2185 _textiowrapper_readline(textio *self, Py_ssize_t limit)
2186 {
2187 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2188 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2189 int res;
2190
2191 CHECK_CLOSED(self);
2192
2193 if (_textiowrapper_writeflush(self) < 0)
2194 return NULL;
2195
2196 chunked = 0;
2197
2198 while (1) {
2199 const char *ptr;
2200 Py_ssize_t line_len;
2201 int kind;
2202 Py_ssize_t consumed = 0;
2203
2204 /* First, get some data if necessary */
2205 res = 1;
2206 while (!self->decoded_chars ||
2207 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2208 res = textiowrapper_read_chunk(self, 0);
2209 if (res < 0) {
2210 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2211 when EINTR occurs so we needn't do it ourselves. */
2212 if (_PyIO_trap_eintr()) {
2213 continue;
2214 }
2215 goto error;
2216 }
2217 if (res == 0)
2218 break;
2219 }
2220 if (res == 0) {
2221 /* end of file */
2222 textiowrapper_set_decoded_chars(self, NULL);
2223 Py_CLEAR(self->snapshot);
2224 start = endpos = offset_to_buffer = 0;
2225 break;
2226 }
2227
2228 if (remaining == NULL) {
2229 line = Py_NewRef(self->decoded_chars);
2230 start = self->decoded_chars_used;
2231 offset_to_buffer = 0;
2232 }
2233 else {
2234 assert(self->decoded_chars_used == 0);
2235 line = PyUnicode_Concat(remaining, self->decoded_chars);
2236 start = 0;
2237 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2238 Py_CLEAR(remaining);
2239 if (line == NULL)
2240 goto error;
2241 }
2242
2243 ptr = PyUnicode_DATA(line);
2244 line_len = PyUnicode_GET_LENGTH(line);
2245 kind = PyUnicode_KIND(line);
2246
2247 endpos = _PyIO_find_line_ending(
2248 self->readtranslate, self->readuniversal, self->readnl,
2249 kind,
2250 ptr + kind * start,
2251 ptr + kind * line_len,
2252 &consumed);
2253 if (endpos >= 0) {
2254 endpos += start;
2255 if (limit >= 0 && (endpos - start) + chunked >= limit)
2256 endpos = start + limit - chunked;
2257 break;
2258 }
2259
2260 /* We can put aside up to `endpos` */
2261 endpos = consumed + start;
2262 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2263 /* Didn't find line ending, but reached length limit */
2264 endpos = start + limit - chunked;
2265 break;
2266 }
2267
2268 if (endpos > start) {
2269 /* No line ending seen yet - put aside current data */
2270 PyObject *s;
2271 if (chunks == NULL) {
2272 chunks = PyList_New(0);
2273 if (chunks == NULL)
2274 goto error;
2275 }
2276 s = PyUnicode_Substring(line, start, endpos);
2277 if (s == NULL)
2278 goto error;
2279 if (PyList_Append(chunks, s) < 0) {
2280 Py_DECREF(s);
2281 goto error;
2282 }
2283 chunked += PyUnicode_GET_LENGTH(s);
2284 Py_DECREF(s);
2285 }
2286 /* There may be some remaining bytes we'll have to prepend to the
2287 next chunk of data */
2288 if (endpos < line_len) {
2289 remaining = PyUnicode_Substring(line, endpos, line_len);
2290 if (remaining == NULL)
2291 goto error;
2292 }
2293 Py_CLEAR(line);
2294 /* We have consumed the buffer */
2295 textiowrapper_set_decoded_chars(self, NULL);
2296 }
2297
2298 if (line != NULL) {
2299 /* Our line ends in the current buffer */
2300 self->decoded_chars_used = endpos - offset_to_buffer;
2301 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2302 PyObject *s = PyUnicode_Substring(line, start, endpos);
2303 Py_CLEAR(line);
2304 if (s == NULL)
2305 goto error;
2306 line = s;
2307 }
2308 }
2309 if (remaining != NULL) {
2310 if (chunks == NULL) {
2311 chunks = PyList_New(0);
2312 if (chunks == NULL)
2313 goto error;
2314 }
2315 if (PyList_Append(chunks, remaining) < 0)
2316 goto error;
2317 Py_CLEAR(remaining);
2318 }
2319 if (chunks != NULL) {
2320 if (line != NULL) {
2321 if (PyList_Append(chunks, line) < 0)
2322 goto error;
2323 Py_DECREF(line);
2324 }
2325 line = PyUnicode_Join(&_Py_STR(empty), chunks);
2326 if (line == NULL)
2327 goto error;
2328 Py_CLEAR(chunks);
2329 }
2330 if (line == NULL) {
2331 line = &_Py_STR(empty);
2332 }
2333
2334 return line;
2335
2336 error:
2337 Py_XDECREF(chunks);
2338 Py_XDECREF(remaining);
2339 Py_XDECREF(line);
2340 return NULL;
2341 }
2342
2343 /*[clinic input]
2344 @critical_section
2345 _io.TextIOWrapper.readline
2346 size: Py_ssize_t = -1
2347 /
2348 [clinic start generated code]*/
2349
2350 static PyObject *
_io_TextIOWrapper_readline_impl(textio * self,Py_ssize_t size)2351 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2352 /*[clinic end generated code: output=344afa98804e8b25 input=b65bab871dc3ddba]*/
2353 {
2354 CHECK_ATTACHED(self);
2355 return _textiowrapper_readline(self, size);
2356 }
2357
2358 /* Seek and Tell */
2359
2360 typedef struct {
2361 Py_off_t start_pos;
2362 int dec_flags;
2363 int bytes_to_feed;
2364 int chars_to_skip;
2365 char need_eof;
2366 } cookie_type;
2367
2368 /*
2369 To speed up cookie packing/unpacking, we store the fields in a temporary
2370 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2371 The following macros define at which offsets in the intermediary byte
2372 string the various CookieStruct fields will be stored.
2373 */
2374
2375 #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2376
2377 #if PY_BIG_ENDIAN
2378 /* We want the least significant byte of start_pos to also be the least
2379 significant byte of the cookie, which means that in big-endian mode we
2380 must copy the fields in reverse order. */
2381
2382 # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2383 # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2384 # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2385 # define OFF_CHARS_TO_SKIP (sizeof(char))
2386 # define OFF_NEED_EOF 0
2387
2388 #else
2389 /* Little-endian mode: the least significant byte of start_pos will
2390 naturally end up the least significant byte of the cookie. */
2391
2392 # define OFF_START_POS 0
2393 # define OFF_DEC_FLAGS (sizeof(Py_off_t))
2394 # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2395 # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2396 # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2397
2398 #endif
2399
2400 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)2401 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2402 {
2403 unsigned char buffer[COOKIE_BUF_LEN];
2404 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2405 if (cookieLong == NULL)
2406 return -1;
2407
2408 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2409 PY_LITTLE_ENDIAN, 0, 1) < 0) {
2410 Py_DECREF(cookieLong);
2411 return -1;
2412 }
2413 Py_DECREF(cookieLong);
2414
2415 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2416 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2417 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2418 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2419 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2420
2421 return 0;
2422 }
2423
2424 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)2425 textiowrapper_build_cookie(cookie_type *cookie)
2426 {
2427 unsigned char buffer[COOKIE_BUF_LEN];
2428
2429 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2430 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2431 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2432 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2433 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2434
2435 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2436 PY_LITTLE_ENDIAN, 0);
2437 }
2438
2439 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)2440 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2441 {
2442 PyObject *res;
2443 /* When seeking to the start of the stream, we call decoder.reset()
2444 rather than decoder.getstate().
2445 This is for a few decoders such as utf-16 for which the state value
2446 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2447 utf-16, that we are expecting a BOM).
2448 */
2449 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2450 res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2451 }
2452 else {
2453 res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
2454 "((yi))", "", cookie->dec_flags);
2455 }
2456 if (res == NULL) {
2457 return -1;
2458 }
2459 Py_DECREF(res);
2460 return 0;
2461 }
2462
2463 static int
_textiowrapper_encoder_reset(textio * self,int start_of_stream)2464 _textiowrapper_encoder_reset(textio *self, int start_of_stream)
2465 {
2466 PyObject *res;
2467 if (start_of_stream) {
2468 res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset));
2469 self->encoding_start_of_stream = 1;
2470 }
2471 else {
2472 res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate),
2473 _PyLong_GetZero());
2474 self->encoding_start_of_stream = 0;
2475 }
2476 if (res == NULL)
2477 return -1;
2478 Py_DECREF(res);
2479 return 0;
2480 }
2481
2482 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)2483 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2484 {
2485 /* Same as _textiowrapper_decoder_setstate() above. */
2486 return _textiowrapper_encoder_reset(
2487 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2488 }
2489
2490 /*[clinic input]
2491 @critical_section
2492 _io.TextIOWrapper.seek
2493 cookie as cookieObj: object
2494 Zero or an opaque number returned by tell().
2495 whence: int(c_default='0') = os.SEEK_SET
2496 The relative position to seek from.
2497 /
2498
2499 Set the stream position, and return the new stream position.
2500
2501 Four operations are supported, given by the following argument
2502 combinations:
2503
2504 - seek(0, SEEK_SET): Rewind to the start of the stream.
2505 - seek(cookie, SEEK_SET): Restore a previous position;
2506 'cookie' must be a number returned by tell().
2507 - seek(0, SEEK_END): Fast-forward to the end of the stream.
2508 - seek(0, SEEK_CUR): Leave the current stream position unchanged.
2509
2510 Any other argument combinations are invalid,
2511 and may raise exceptions.
2512 [clinic start generated code]*/
2513
2514 static PyObject *
_io_TextIOWrapper_seek_impl(textio * self,PyObject * cookieObj,int whence)2515 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2516 /*[clinic end generated code: output=0a15679764e2d04d input=4bea78698be23d7e]*/
2517 {
2518 PyObject *posobj;
2519 cookie_type cookie;
2520 PyObject *res;
2521 int cmp;
2522 PyObject *snapshot;
2523
2524 CHECK_ATTACHED(self);
2525 CHECK_CLOSED(self);
2526
2527 Py_INCREF(cookieObj);
2528
2529 if (!self->seekable) {
2530 _unsupported(self->state, "underlying stream is not seekable");
2531 goto fail;
2532 }
2533
2534 PyObject *zero = _PyLong_GetZero(); // borrowed reference
2535
2536 switch (whence) {
2537 case SEEK_CUR:
2538 /* seek relative to current position */
2539 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2540 if (cmp < 0)
2541 goto fail;
2542
2543 if (cmp == 0) {
2544 _unsupported(self->state, "can't do nonzero cur-relative seeks");
2545 goto fail;
2546 }
2547
2548 /* Seeking to the current position should attempt to
2549 * sync the underlying buffer with the current position.
2550 */
2551 Py_DECREF(cookieObj);
2552 cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell));
2553 if (cookieObj == NULL)
2554 goto fail;
2555 break;
2556
2557 case SEEK_END:
2558 /* seek relative to end of file */
2559 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2560 if (cmp < 0)
2561 goto fail;
2562
2563 if (cmp == 0) {
2564 _unsupported(self->state, "can't do nonzero end-relative seeks");
2565 goto fail;
2566 }
2567
2568 if (_PyFile_Flush((PyObject *)self) < 0) {
2569 goto fail;
2570 }
2571
2572 textiowrapper_set_decoded_chars(self, NULL);
2573 Py_CLEAR(self->snapshot);
2574 if (self->decoder) {
2575 res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2576 if (res == NULL)
2577 goto fail;
2578 Py_DECREF(res);
2579 }
2580
2581 res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
2582 Py_CLEAR(cookieObj);
2583 if (res == NULL)
2584 goto fail;
2585 if (self->encoder) {
2586 /* If seek() == 0, we are at the start of stream, otherwise not */
2587 cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
2588 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2589 Py_DECREF(res);
2590 goto fail;
2591 }
2592 }
2593 return res;
2594
2595 case SEEK_SET:
2596 break;
2597
2598 default:
2599 PyErr_Format(PyExc_ValueError,
2600 "invalid whence (%d, should be %d, %d or %d)", whence,
2601 SEEK_SET, SEEK_CUR, SEEK_END);
2602 goto fail;
2603 }
2604
2605 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
2606 if (cmp < 0)
2607 goto fail;
2608
2609 if (cmp == 1) {
2610 PyErr_Format(PyExc_ValueError,
2611 "negative seek position %R", cookieObj);
2612 goto fail;
2613 }
2614
2615 if (_PyFile_Flush((PyObject *)self) < 0) {
2616 goto fail;
2617 }
2618
2619 /* The strategy of seek() is to go back to the safe start point
2620 * and replay the effect of read(chars_to_skip) from there.
2621 */
2622 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2623 goto fail;
2624
2625 /* Seek back to the safe start point. */
2626 posobj = PyLong_FromOff_t(cookie.start_pos);
2627 if (posobj == NULL)
2628 goto fail;
2629 res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
2630 Py_DECREF(posobj);
2631 if (res == NULL)
2632 goto fail;
2633 Py_DECREF(res);
2634
2635 textiowrapper_set_decoded_chars(self, NULL);
2636 Py_CLEAR(self->snapshot);
2637
2638 /* Restore the decoder to its state from the safe start point. */
2639 if (self->decoder) {
2640 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2641 goto fail;
2642 }
2643
2644 if (cookie.chars_to_skip) {
2645 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2646 PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
2647 "i", cookie.bytes_to_feed);
2648 PyObject *decoded;
2649
2650 if (input_chunk == NULL)
2651 goto fail;
2652
2653 if (!PyBytes_Check(input_chunk)) {
2654 PyErr_Format(PyExc_TypeError,
2655 "underlying read() should have returned a bytes "
2656 "object, not '%.200s'",
2657 Py_TYPE(input_chunk)->tp_name);
2658 Py_DECREF(input_chunk);
2659 goto fail;
2660 }
2661
2662 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2663 if (snapshot == NULL) {
2664 goto fail;
2665 }
2666 Py_XSETREF(self->snapshot, snapshot);
2667
2668 decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode),
2669 input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2670
2671 if (check_decoded(decoded) < 0)
2672 goto fail;
2673
2674 textiowrapper_set_decoded_chars(self, decoded);
2675
2676 /* Skip chars_to_skip of the decoded characters. */
2677 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2678 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2679 goto fail;
2680 }
2681 self->decoded_chars_used = cookie.chars_to_skip;
2682 }
2683 else {
2684 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2685 if (snapshot == NULL)
2686 goto fail;
2687 Py_XSETREF(self->snapshot, snapshot);
2688 }
2689
2690 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2691 if (self->encoder) {
2692 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2693 goto fail;
2694 }
2695 return cookieObj;
2696 fail:
2697 Py_XDECREF(cookieObj);
2698 return NULL;
2699
2700 }
2701
2702 /*[clinic input]
2703 @critical_section
2704 _io.TextIOWrapper.tell
2705
2706 Return the stream position as an opaque number.
2707
2708 The return value of tell() can be given as input to seek(), to restore a
2709 previous stream position.
2710 [clinic start generated code]*/
2711
2712 static PyObject *
_io_TextIOWrapper_tell_impl(textio * self)2713 _io_TextIOWrapper_tell_impl(textio *self)
2714 /*[clinic end generated code: output=4f168c08bf34ad5f input=415d6b4e4f8e6e8c]*/
2715 {
2716 PyObject *res;
2717 PyObject *posobj = NULL;
2718 cookie_type cookie = {0,0,0,0,0};
2719 PyObject *next_input;
2720 Py_ssize_t chars_to_skip, chars_decoded;
2721 Py_ssize_t skip_bytes, skip_back;
2722 PyObject *saved_state = NULL;
2723 const char *input, *input_end;
2724 Py_ssize_t dec_buffer_len;
2725 int dec_flags;
2726
2727 CHECK_ATTACHED(self);
2728 CHECK_CLOSED(self);
2729
2730 if (!self->seekable) {
2731 _unsupported(self->state, "underlying stream is not seekable");
2732 goto fail;
2733 }
2734 if (!self->telling) {
2735 PyErr_SetString(PyExc_OSError,
2736 "telling position disabled by next() call");
2737 goto fail;
2738 }
2739
2740 if (_textiowrapper_writeflush(self) < 0)
2741 return NULL;
2742 if (_PyFile_Flush((PyObject *)self) < 0) {
2743 goto fail;
2744 }
2745
2746 posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
2747 if (posobj == NULL)
2748 goto fail;
2749
2750 if (self->decoder == NULL || self->snapshot == NULL) {
2751 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2752 return posobj;
2753 }
2754
2755 #if defined(HAVE_LARGEFILE_SUPPORT)
2756 cookie.start_pos = PyLong_AsLongLong(posobj);
2757 #else
2758 cookie.start_pos = PyLong_AsLong(posobj);
2759 #endif
2760 Py_DECREF(posobj);
2761 if (PyErr_Occurred())
2762 goto fail;
2763
2764 /* Skip backward to the snapshot point (see _read_chunk). */
2765 assert(PyTuple_Check(self->snapshot));
2766 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2767 goto fail;
2768
2769 assert (PyBytes_Check(next_input));
2770
2771 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2772
2773 /* How many decoded characters have been used up since the snapshot? */
2774 if (self->decoded_chars_used == 0) {
2775 /* We haven't moved from the snapshot point. */
2776 return textiowrapper_build_cookie(&cookie);
2777 }
2778
2779 chars_to_skip = self->decoded_chars_used;
2780
2781 /* Decoder state will be restored at the end */
2782 saved_state = PyObject_CallMethodNoArgs(self->decoder,
2783 &_Py_ID(getstate));
2784 if (saved_state == NULL)
2785 goto fail;
2786
2787 #define DECODER_GETSTATE() do { \
2788 PyObject *dec_buffer; \
2789 PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2790 &_Py_ID(getstate)); \
2791 if (_state == NULL) \
2792 goto fail; \
2793 if (!PyTuple_Check(_state)) { \
2794 PyErr_SetString(PyExc_TypeError, \
2795 "illegal decoder state"); \
2796 Py_DECREF(_state); \
2797 goto fail; \
2798 } \
2799 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2800 &dec_buffer, &dec_flags)) \
2801 { \
2802 Py_DECREF(_state); \
2803 goto fail; \
2804 } \
2805 if (!PyBytes_Check(dec_buffer)) { \
2806 PyErr_Format(PyExc_TypeError, \
2807 "illegal decoder state: the first item should be a " \
2808 "bytes object, not '%.200s'", \
2809 Py_TYPE(dec_buffer)->tp_name); \
2810 Py_DECREF(_state); \
2811 goto fail; \
2812 } \
2813 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2814 Py_DECREF(_state); \
2815 } while (0)
2816
2817 #define DECODER_DECODE(start, len, res) do { \
2818 PyObject *_decoded = _PyObject_CallMethod( \
2819 self->decoder, &_Py_ID(decode), "y#", start, len); \
2820 if (check_decoded(_decoded) < 0) \
2821 goto fail; \
2822 res = PyUnicode_GET_LENGTH(_decoded); \
2823 Py_DECREF(_decoded); \
2824 } while (0)
2825
2826 /* Fast search for an acceptable start point, close to our
2827 current pos */
2828 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2829 skip_back = 1;
2830 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2831 input = PyBytes_AS_STRING(next_input);
2832 while (skip_bytes > 0) {
2833 /* Decode up to temptative start point */
2834 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2835 goto fail;
2836 DECODER_DECODE(input, skip_bytes, chars_decoded);
2837 if (chars_decoded <= chars_to_skip) {
2838 DECODER_GETSTATE();
2839 if (dec_buffer_len == 0) {
2840 /* Before pos and no bytes buffered in decoder => OK */
2841 cookie.dec_flags = dec_flags;
2842 chars_to_skip -= chars_decoded;
2843 break;
2844 }
2845 /* Skip back by buffered amount and reset heuristic */
2846 skip_bytes -= dec_buffer_len;
2847 skip_back = 1;
2848 }
2849 else {
2850 /* We're too far ahead, skip back a bit */
2851 skip_bytes -= skip_back;
2852 skip_back *= 2;
2853 }
2854 }
2855 if (skip_bytes <= 0) {
2856 skip_bytes = 0;
2857 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2858 goto fail;
2859 }
2860
2861 /* Note our initial start point. */
2862 cookie.start_pos += skip_bytes;
2863 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2864 if (chars_to_skip == 0)
2865 goto finally;
2866
2867 /* We should be close to the desired position. Now feed the decoder one
2868 * byte at a time until we reach the `chars_to_skip` target.
2869 * As we go, note the nearest "safe start point" before the current
2870 * location (a point where the decoder has nothing buffered, so seek()
2871 * can safely start from there and advance to this location).
2872 */
2873 chars_decoded = 0;
2874 input = PyBytes_AS_STRING(next_input);
2875 input_end = input + PyBytes_GET_SIZE(next_input);
2876 input += skip_bytes;
2877 while (input < input_end) {
2878 Py_ssize_t n;
2879
2880 DECODER_DECODE(input, (Py_ssize_t)1, n);
2881 /* We got n chars for 1 byte */
2882 chars_decoded += n;
2883 cookie.bytes_to_feed += 1;
2884 DECODER_GETSTATE();
2885
2886 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2887 /* Decoder buffer is empty, so this is a safe start point. */
2888 cookie.start_pos += cookie.bytes_to_feed;
2889 chars_to_skip -= chars_decoded;
2890 cookie.dec_flags = dec_flags;
2891 cookie.bytes_to_feed = 0;
2892 chars_decoded = 0;
2893 }
2894 if (chars_decoded >= chars_to_skip)
2895 break;
2896 input++;
2897 }
2898 if (input == input_end) {
2899 /* We didn't get enough decoded data; signal EOF to get more. */
2900 PyObject *decoded = _PyObject_CallMethod(
2901 self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True);
2902 if (check_decoded(decoded) < 0)
2903 goto fail;
2904 chars_decoded += PyUnicode_GET_LENGTH(decoded);
2905 Py_DECREF(decoded);
2906 cookie.need_eof = 1;
2907
2908 if (chars_decoded < chars_to_skip) {
2909 PyErr_SetString(PyExc_OSError,
2910 "can't reconstruct logical file position");
2911 goto fail;
2912 }
2913 }
2914
2915 finally:
2916 res = PyObject_CallMethodOneArg(
2917 self->decoder, &_Py_ID(setstate), saved_state);
2918 Py_DECREF(saved_state);
2919 if (res == NULL)
2920 return NULL;
2921 Py_DECREF(res);
2922
2923 /* The returned cookie corresponds to the last safe start point. */
2924 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2925 return textiowrapper_build_cookie(&cookie);
2926
2927 fail:
2928 if (saved_state) {
2929 PyObject *exc = PyErr_GetRaisedException();
2930 res = PyObject_CallMethodOneArg(
2931 self->decoder, &_Py_ID(setstate), saved_state);
2932 _PyErr_ChainExceptions1(exc);
2933 Py_DECREF(saved_state);
2934 Py_XDECREF(res);
2935 }
2936 return NULL;
2937 }
2938
2939 /*[clinic input]
2940 @critical_section
2941 _io.TextIOWrapper.truncate
2942 pos: object = None
2943 /
2944 [clinic start generated code]*/
2945
2946 static PyObject *
_io_TextIOWrapper_truncate_impl(textio * self,PyObject * pos)2947 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2948 /*[clinic end generated code: output=90ec2afb9bb7745f input=8bddb320834c93ee]*/
2949 {
2950 CHECK_ATTACHED(self)
2951
2952 if (_PyFile_Flush((PyObject *)self) < 0) {
2953 return NULL;
2954 }
2955
2956 return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
2957 }
2958
2959 static PyObject *
textiowrapper_repr(textio * self)2960 textiowrapper_repr(textio *self)
2961 {
2962 PyObject *nameobj, *modeobj, *res, *s;
2963 int status;
2964 const char *type_name = Py_TYPE(self)->tp_name;
2965
2966 CHECK_INITIALIZED(self);
2967
2968 res = PyUnicode_FromFormat("<%.100s", type_name);
2969 if (res == NULL)
2970 return NULL;
2971
2972 status = Py_ReprEnter((PyObject *)self);
2973 if (status != 0) {
2974 if (status > 0) {
2975 PyErr_Format(PyExc_RuntimeError,
2976 "reentrant call inside %.100s.__repr__",
2977 type_name);
2978 }
2979 goto error;
2980 }
2981 if (PyObject_GetOptionalAttr((PyObject *) self, &_Py_ID(name), &nameobj) < 0) {
2982 if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
2983 goto error;
2984 }
2985 /* Ignore ValueError raised if the underlying stream was detached */
2986 PyErr_Clear();
2987 }
2988 if (nameobj != NULL) {
2989 s = PyUnicode_FromFormat(" name=%R", nameobj);
2990 Py_DECREF(nameobj);
2991 if (s == NULL)
2992 goto error;
2993 PyUnicode_AppendAndDel(&res, s);
2994 if (res == NULL)
2995 goto error;
2996 }
2997 if (PyObject_GetOptionalAttr((PyObject *) self, &_Py_ID(mode), &modeobj) < 0) {
2998 goto error;
2999 }
3000 if (modeobj != NULL) {
3001 s = PyUnicode_FromFormat(" mode=%R", modeobj);
3002 Py_DECREF(modeobj);
3003 if (s == NULL)
3004 goto error;
3005 PyUnicode_AppendAndDel(&res, s);
3006 if (res == NULL)
3007 goto error;
3008 }
3009 s = PyUnicode_FromFormat("%U encoding=%R>",
3010 res, self->encoding);
3011 Py_DECREF(res);
3012 if (status == 0) {
3013 Py_ReprLeave((PyObject *)self);
3014 }
3015 return s;
3016
3017 error:
3018 Py_XDECREF(res);
3019 if (status == 0) {
3020 Py_ReprLeave((PyObject *)self);
3021 }
3022 return NULL;
3023 }
3024
3025
3026 /* Inquiries */
3027
3028 /*[clinic input]
3029 @critical_section
3030 _io.TextIOWrapper.fileno
3031 [clinic start generated code]*/
3032
3033 static PyObject *
_io_TextIOWrapper_fileno_impl(textio * self)3034 _io_TextIOWrapper_fileno_impl(textio *self)
3035 /*[clinic end generated code: output=21490a4c3da13e6c input=515e1196aceb97ab]*/
3036 {
3037 CHECK_ATTACHED(self);
3038 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
3039 }
3040
3041 /*[clinic input]
3042 @critical_section
3043 _io.TextIOWrapper.seekable
3044 [clinic start generated code]*/
3045
3046 static PyObject *
_io_TextIOWrapper_seekable_impl(textio * self)3047 _io_TextIOWrapper_seekable_impl(textio *self)
3048 /*[clinic end generated code: output=ab223dbbcffc0f00 input=71c4c092736c549b]*/
3049 {
3050 CHECK_ATTACHED(self);
3051 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
3052 }
3053
3054 /*[clinic input]
3055 @critical_section
3056 _io.TextIOWrapper.readable
3057 [clinic start generated code]*/
3058
3059 static PyObject *
_io_TextIOWrapper_readable_impl(textio * self)3060 _io_TextIOWrapper_readable_impl(textio *self)
3061 /*[clinic end generated code: output=72ff7ba289a8a91b input=80438d1f01b0a89b]*/
3062 {
3063 CHECK_ATTACHED(self);
3064 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
3065 }
3066
3067 /*[clinic input]
3068 @critical_section
3069 _io.TextIOWrapper.writable
3070 [clinic start generated code]*/
3071
3072 static PyObject *
_io_TextIOWrapper_writable_impl(textio * self)3073 _io_TextIOWrapper_writable_impl(textio *self)
3074 /*[clinic end generated code: output=a728c71790d03200 input=9d6c22befb0c340a]*/
3075 {
3076 CHECK_ATTACHED(self);
3077 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
3078 }
3079
3080 /*[clinic input]
3081 @critical_section
3082 _io.TextIOWrapper.isatty
3083 [clinic start generated code]*/
3084
3085 static PyObject *
_io_TextIOWrapper_isatty_impl(textio * self)3086 _io_TextIOWrapper_isatty_impl(textio *self)
3087 /*[clinic end generated code: output=12be1a35bace882e input=7f83ff04d4d1733d]*/
3088 {
3089 CHECK_ATTACHED(self);
3090 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
3091 }
3092
3093 /*[clinic input]
3094 @critical_section
3095 _io.TextIOWrapper.flush
3096 [clinic start generated code]*/
3097
3098 static PyObject *
_io_TextIOWrapper_flush_impl(textio * self)3099 _io_TextIOWrapper_flush_impl(textio *self)
3100 /*[clinic end generated code: output=59de9165f9c2e4d2 input=3ac3bf521bfed59d]*/
3101 {
3102 CHECK_ATTACHED(self);
3103 CHECK_CLOSED(self);
3104 self->telling = self->seekable;
3105 if (_textiowrapper_writeflush(self) < 0)
3106 return NULL;
3107 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
3108 }
3109
3110 /*[clinic input]
3111 @critical_section
3112 _io.TextIOWrapper.close
3113 [clinic start generated code]*/
3114
3115 static PyObject *
_io_TextIOWrapper_close_impl(textio * self)3116 _io_TextIOWrapper_close_impl(textio *self)
3117 /*[clinic end generated code: output=056ccf8b4876e4f4 input=8e12d7079d5ac5c1]*/
3118 {
3119 PyObject *res;
3120 int r;
3121 CHECK_ATTACHED(self);
3122
3123 res = _io_TextIOWrapper_closed_get_impl(self);
3124 if (res == NULL)
3125 return NULL;
3126 r = PyObject_IsTrue(res);
3127 Py_DECREF(res);
3128 if (r < 0)
3129 return NULL;
3130
3131 if (r > 0) {
3132 Py_RETURN_NONE; /* stream already closed */
3133 }
3134 else {
3135 PyObject *exc = NULL;
3136 if (self->finalizing) {
3137 res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
3138 (PyObject *)self);
3139 if (res) {
3140 Py_DECREF(res);
3141 }
3142 else {
3143 PyErr_Clear();
3144 }
3145 }
3146 if (_PyFile_Flush((PyObject *)self) < 0) {
3147 exc = PyErr_GetRaisedException();
3148 }
3149
3150 res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
3151 if (exc != NULL) {
3152 _PyErr_ChainExceptions1(exc);
3153 Py_CLEAR(res);
3154 }
3155 return res;
3156 }
3157 }
3158
3159 static PyObject *
textiowrapper_iternext(textio * self)3160 textiowrapper_iternext(textio *self)
3161 {
3162 PyObject *line;
3163
3164 CHECK_ATTACHED(self);
3165
3166 self->telling = 0;
3167 if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) {
3168 /* Skip method call overhead for speed */
3169 line = _textiowrapper_readline(self, -1);
3170 }
3171 else {
3172 line = PyObject_CallMethodNoArgs((PyObject *)self,
3173 &_Py_ID(readline));
3174 if (line && !PyUnicode_Check(line)) {
3175 PyErr_Format(PyExc_OSError,
3176 "readline() should have returned a str object, "
3177 "not '%.200s'", Py_TYPE(line)->tp_name);
3178 Py_DECREF(line);
3179 return NULL;
3180 }
3181 }
3182
3183 if (line == NULL)
3184 return NULL;
3185
3186 if (PyUnicode_GET_LENGTH(line) == 0) {
3187 /* Reached EOF or would have blocked */
3188 Py_DECREF(line);
3189 Py_CLEAR(self->snapshot);
3190 self->telling = self->seekable;
3191 return NULL;
3192 }
3193
3194 return line;
3195 }
3196
3197 /*[clinic input]
3198 @critical_section
3199 @getter
3200 _io.TextIOWrapper.name
3201 [clinic start generated code]*/
3202
3203 static PyObject *
_io_TextIOWrapper_name_get_impl(textio * self)3204 _io_TextIOWrapper_name_get_impl(textio *self)
3205 /*[clinic end generated code: output=8c2f1d6d8756af40 input=26ecec9b39e30e07]*/
3206 {
3207 CHECK_ATTACHED(self);
3208 return PyObject_GetAttr(self->buffer, &_Py_ID(name));
3209 }
3210
3211 /*[clinic input]
3212 @critical_section
3213 @getter
3214 _io.TextIOWrapper.closed
3215 [clinic start generated code]*/
3216
3217 static PyObject *
_io_TextIOWrapper_closed_get_impl(textio * self)3218 _io_TextIOWrapper_closed_get_impl(textio *self)
3219 /*[clinic end generated code: output=b49b68f443a85e3c input=7dfcf43f63c7003d]*/
3220 {
3221 CHECK_ATTACHED(self);
3222 return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
3223 }
3224
3225 /*[clinic input]
3226 @critical_section
3227 @getter
3228 _io.TextIOWrapper.newlines
3229 [clinic start generated code]*/
3230
3231 static PyObject *
_io_TextIOWrapper_newlines_get_impl(textio * self)3232 _io_TextIOWrapper_newlines_get_impl(textio *self)
3233 /*[clinic end generated code: output=53aa03ac35573180 input=610df647e514b3e8]*/
3234 {
3235 PyObject *res;
3236 CHECK_ATTACHED(self);
3237 if (self->decoder == NULL ||
3238 PyObject_GetOptionalAttr(self->decoder, &_Py_ID(newlines), &res) == 0)
3239 {
3240 Py_RETURN_NONE;
3241 }
3242 return res;
3243 }
3244
3245 /*[clinic input]
3246 @critical_section
3247 @getter
3248 _io.TextIOWrapper.errors
3249 [clinic start generated code]*/
3250
3251 static PyObject *
_io_TextIOWrapper_errors_get_impl(textio * self)3252 _io_TextIOWrapper_errors_get_impl(textio *self)
3253 /*[clinic end generated code: output=dca3a3ef21b09484 input=b45f983e6d43c4d8]*/
3254 {
3255 CHECK_INITIALIZED(self);
3256 return Py_NewRef(self->errors);
3257 }
3258
3259 /*[clinic input]
3260 @critical_section
3261 @getter
3262 _io.TextIOWrapper._CHUNK_SIZE
3263 [clinic start generated code]*/
3264
3265 static PyObject *
_io_TextIOWrapper__CHUNK_SIZE_get_impl(textio * self)3266 _io_TextIOWrapper__CHUNK_SIZE_get_impl(textio *self)
3267 /*[clinic end generated code: output=039925cd2df375bc input=e9715b0e06ff0fa6]*/
3268 {
3269 CHECK_ATTACHED(self);
3270 return PyLong_FromSsize_t(self->chunk_size);
3271 }
3272
3273 /*[clinic input]
3274 @critical_section
3275 @setter
3276 _io.TextIOWrapper._CHUNK_SIZE
3277 [clinic start generated code]*/
3278
3279 static int
_io_TextIOWrapper__CHUNK_SIZE_set_impl(textio * self,PyObject * value)3280 _io_TextIOWrapper__CHUNK_SIZE_set_impl(textio *self, PyObject *value)
3281 /*[clinic end generated code: output=edb86d2db660a5ab input=32fc99861db02a0a]*/
3282 {
3283 Py_ssize_t n;
3284 CHECK_ATTACHED_INT(self);
3285 if (value == NULL) {
3286 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3287 return -1;
3288 }
3289 n = PyNumber_AsSsize_t(value, PyExc_ValueError);
3290 if (n == -1 && PyErr_Occurred())
3291 return -1;
3292 if (n <= 0) {
3293 PyErr_SetString(PyExc_ValueError,
3294 "a strictly positive integer is required");
3295 return -1;
3296 }
3297 self->chunk_size = n;
3298 return 0;
3299 }
3300
3301 static PyMethodDef incrementalnewlinedecoder_methods[] = {
3302 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3303 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3304 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3305 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3306 {NULL}
3307 };
3308
3309 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3310 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3311 {NULL}
3312 };
3313
3314 static PyType_Slot nldecoder_slots[] = {
3315 {Py_tp_dealloc, incrementalnewlinedecoder_dealloc},
3316 {Py_tp_doc, (void *)_io_IncrementalNewlineDecoder___init____doc__},
3317 {Py_tp_methods, incrementalnewlinedecoder_methods},
3318 {Py_tp_getset, incrementalnewlinedecoder_getset},
3319 {Py_tp_traverse, incrementalnewlinedecoder_traverse},
3320 {Py_tp_clear, incrementalnewlinedecoder_clear},
3321 {Py_tp_init, _io_IncrementalNewlineDecoder___init__},
3322 {0, NULL},
3323 };
3324
3325 PyType_Spec nldecoder_spec = {
3326 .name = "_io.IncrementalNewlineDecoder",
3327 .basicsize = sizeof(nldecoder_object),
3328 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3329 Py_TPFLAGS_IMMUTABLETYPE),
3330 .slots = nldecoder_slots,
3331 };
3332
3333
3334 static PyMethodDef textiowrapper_methods[] = {
3335 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3336 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3337 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3338 _IO_TEXTIOWRAPPER_READ_METHODDEF
3339 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3340 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3341 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3342
3343 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3344 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3345 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3346 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3347 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3348
3349 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3350 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3351 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3352
3353 {"__reduce__", _PyIOBase_cannot_pickle, METH_NOARGS},
3354 {"__reduce_ex__", _PyIOBase_cannot_pickle, METH_O},
3355 {NULL, NULL}
3356 };
3357
3358 static PyMemberDef textiowrapper_members[] = {
3359 {"encoding", _Py_T_OBJECT, offsetof(textio, encoding), Py_READONLY},
3360 {"buffer", _Py_T_OBJECT, offsetof(textio, buffer), Py_READONLY},
3361 {"line_buffering", Py_T_BOOL, offsetof(textio, line_buffering), Py_READONLY},
3362 {"write_through", Py_T_BOOL, offsetof(textio, write_through), Py_READONLY},
3363 {"_finalizing", Py_T_BOOL, offsetof(textio, finalizing), 0},
3364 {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(textio, weakreflist), Py_READONLY},
3365 {"__dictoffset__", Py_T_PYSSIZET, offsetof(textio, dict), Py_READONLY},
3366 {NULL}
3367 };
3368
3369 static PyGetSetDef textiowrapper_getset[] = {
3370 _IO_TEXTIOWRAPPER_NAME_GETSETDEF
3371 _IO_TEXTIOWRAPPER_CLOSED_GETSETDEF
3372 /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3373 */
3374 _IO_TEXTIOWRAPPER_NEWLINES_GETSETDEF
3375 _IO_TEXTIOWRAPPER_ERRORS_GETSETDEF
3376 _IO_TEXTIOWRAPPER__CHUNK_SIZE_GETSETDEF
3377 {NULL}
3378 };
3379
3380 PyType_Slot textiowrapper_slots[] = {
3381 {Py_tp_dealloc, textiowrapper_dealloc},
3382 {Py_tp_repr, textiowrapper_repr},
3383 {Py_tp_doc, (void *)_io_TextIOWrapper___init____doc__},
3384 {Py_tp_traverse, textiowrapper_traverse},
3385 {Py_tp_clear, textiowrapper_clear},
3386 {Py_tp_iternext, textiowrapper_iternext},
3387 {Py_tp_methods, textiowrapper_methods},
3388 {Py_tp_members, textiowrapper_members},
3389 {Py_tp_getset, textiowrapper_getset},
3390 {Py_tp_init, _io_TextIOWrapper___init__},
3391 {0, NULL},
3392 };
3393
3394 PyType_Spec textiowrapper_spec = {
3395 .name = "_io.TextIOWrapper",
3396 .basicsize = sizeof(textio),
3397 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3398 Py_TPFLAGS_IMMUTABLETYPE),
3399 .slots = textiowrapper_slots,
3400 };
3401