1 /*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "pycore_object.h"
12 #include "structmember.h"
13 #include "_iomodule.h"
14
15 /*[clinic input]
16 module _io
17 class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
18 class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
21
22 _Py_IDENTIFIER(close);
23 _Py_IDENTIFIER(_dealloc_warn);
24 _Py_IDENTIFIER(decode);
25 _Py_IDENTIFIER(fileno);
26 _Py_IDENTIFIER(flush);
27 _Py_IDENTIFIER(getpreferredencoding);
28 _Py_IDENTIFIER(isatty);
29 _Py_IDENTIFIER(mode);
30 _Py_IDENTIFIER(name);
31 _Py_IDENTIFIER(raw);
32 _Py_IDENTIFIER(read);
33 _Py_IDENTIFIER(readable);
34 _Py_IDENTIFIER(replace);
35 _Py_IDENTIFIER(reset);
36 _Py_IDENTIFIER(seek);
37 _Py_IDENTIFIER(seekable);
38 _Py_IDENTIFIER(setstate);
39 _Py_IDENTIFIER(strict);
40 _Py_IDENTIFIER(tell);
41 _Py_IDENTIFIER(writable);
42
43 /* TextIOBase */
44
45 PyDoc_STRVAR(textiobase_doc,
46 "Base class for text I/O.\n"
47 "\n"
48 "This class provides a character and line based interface to stream\n"
49 "I/O. There is no readinto method because Python's character strings\n"
50 "are immutable. There is no public constructor.\n"
51 );
52
53 static PyObject *
_unsupported(const char * message)54 _unsupported(const char *message)
55 {
56 _PyIO_State *state = IO_STATE();
57 if (state != NULL)
58 PyErr_SetString(state->unsupported_operation, message);
59 return NULL;
60 }
61
62 PyDoc_STRVAR(textiobase_detach_doc,
63 "Separate the underlying buffer from the TextIOBase and return it.\n"
64 "\n"
65 "After the underlying buffer has been detached, the TextIO is in an\n"
66 "unusable state.\n"
67 );
68
69 static PyObject *
textiobase_detach(PyObject * self,PyObject * Py_UNUSED (ignored))70 textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
71 {
72 return _unsupported("detach");
73 }
74
75 PyDoc_STRVAR(textiobase_read_doc,
76 "Read at most n characters from stream.\n"
77 "\n"
78 "Read from underlying buffer until we have n characters or we hit EOF.\n"
79 "If n is negative or omitted, read until EOF.\n"
80 );
81
82 static PyObject *
textiobase_read(PyObject * self,PyObject * args)83 textiobase_read(PyObject *self, PyObject *args)
84 {
85 return _unsupported("read");
86 }
87
88 PyDoc_STRVAR(textiobase_readline_doc,
89 "Read until newline or EOF.\n"
90 "\n"
91 "Returns an empty string if EOF is hit immediately.\n"
92 );
93
94 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)95 textiobase_readline(PyObject *self, PyObject *args)
96 {
97 return _unsupported("readline");
98 }
99
100 PyDoc_STRVAR(textiobase_write_doc,
101 "Write string to stream.\n"
102 "Returns the number of characters written (which is always equal to\n"
103 "the length of the string).\n"
104 );
105
106 static PyObject *
textiobase_write(PyObject * self,PyObject * args)107 textiobase_write(PyObject *self, PyObject *args)
108 {
109 return _unsupported("write");
110 }
111
112 PyDoc_STRVAR(textiobase_encoding_doc,
113 "Encoding of the text stream.\n"
114 "\n"
115 "Subclasses should override.\n"
116 );
117
118 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)119 textiobase_encoding_get(PyObject *self, void *context)
120 {
121 Py_RETURN_NONE;
122 }
123
124 PyDoc_STRVAR(textiobase_newlines_doc,
125 "Line endings translated so far.\n"
126 "\n"
127 "Only line endings translated during reading are considered.\n"
128 "\n"
129 "Subclasses should override.\n"
130 );
131
132 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)133 textiobase_newlines_get(PyObject *self, void *context)
134 {
135 Py_RETURN_NONE;
136 }
137
138 PyDoc_STRVAR(textiobase_errors_doc,
139 "The error setting of the decoder or encoder.\n"
140 "\n"
141 "Subclasses should override.\n"
142 );
143
144 static PyObject *
textiobase_errors_get(PyObject * self,void * context)145 textiobase_errors_get(PyObject *self, void *context)
146 {
147 Py_RETURN_NONE;
148 }
149
150
151 static PyMethodDef textiobase_methods[] = {
152 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
153 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
154 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
155 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
156 {NULL, NULL}
157 };
158
159 static PyGetSetDef textiobase_getset[] = {
160 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
161 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
162 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
163 {NULL}
164 };
165
166 PyTypeObject PyTextIOBase_Type = {
167 PyVarObject_HEAD_INIT(NULL, 0)
168 "_io._TextIOBase", /*tp_name*/
169 0, /*tp_basicsize*/
170 0, /*tp_itemsize*/
171 0, /*tp_dealloc*/
172 0, /*tp_vectorcall_offset*/
173 0, /*tp_getattr*/
174 0, /*tp_setattr*/
175 0, /*tp_as_async*/
176 0, /*tp_repr*/
177 0, /*tp_as_number*/
178 0, /*tp_as_sequence*/
179 0, /*tp_as_mapping*/
180 0, /*tp_hash */
181 0, /*tp_call*/
182 0, /*tp_str*/
183 0, /*tp_getattro*/
184 0, /*tp_setattro*/
185 0, /*tp_as_buffer*/
186 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
187 textiobase_doc, /* tp_doc */
188 0, /* tp_traverse */
189 0, /* tp_clear */
190 0, /* tp_richcompare */
191 0, /* tp_weaklistoffset */
192 0, /* tp_iter */
193 0, /* tp_iternext */
194 textiobase_methods, /* tp_methods */
195 0, /* tp_members */
196 textiobase_getset, /* tp_getset */
197 &PyIOBase_Type, /* tp_base */
198 0, /* tp_dict */
199 0, /* tp_descr_get */
200 0, /* tp_descr_set */
201 0, /* tp_dictoffset */
202 0, /* tp_init */
203 0, /* tp_alloc */
204 0, /* tp_new */
205 0, /* tp_free */
206 0, /* tp_is_gc */
207 0, /* tp_bases */
208 0, /* tp_mro */
209 0, /* tp_cache */
210 0, /* tp_subclasses */
211 0, /* tp_weaklist */
212 0, /* tp_del */
213 0, /* tp_version_tag */
214 0, /* tp_finalize */
215 };
216
217
218 /* IncrementalNewlineDecoder */
219
220 typedef struct {
221 PyObject_HEAD
222 PyObject *decoder;
223 PyObject *errors;
224 unsigned int pendingcr: 1;
225 unsigned int translate: 1;
226 unsigned int seennl: 3;
227 } nldecoder_object;
228
229 /*[clinic input]
230 _io.IncrementalNewlineDecoder.__init__
231 decoder: object
232 translate: int
233 errors: object(c_default="NULL") = "strict"
234
235 Codec used when reading a file in universal newlines mode.
236
237 It wraps another incremental decoder, translating \r\n and \r into \n.
238 It also records the types of newlines encountered. When used with
239 translate=False, it ensures that the newline sequence is returned in
240 one piece. When used with decoder=None, it expects unicode strings as
241 decode input and translates newlines without first invoking an external
242 decoder.
243 [clinic start generated code]*/
244
245 static int
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object * self,PyObject * decoder,int translate,PyObject * errors)246 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
247 PyObject *decoder, int translate,
248 PyObject *errors)
249 /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
250 {
251 self->decoder = decoder;
252 Py_INCREF(decoder);
253
254 if (errors == NULL) {
255 self->errors = _PyUnicode_FromId(&PyId_strict);
256 if (self->errors == NULL)
257 return -1;
258 }
259 else {
260 self->errors = errors;
261 }
262 Py_INCREF(self->errors);
263
264 self->translate = translate ? 1 : 0;
265 self->seennl = 0;
266 self->pendingcr = 0;
267
268 return 0;
269 }
270
271 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)272 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
273 {
274 Py_CLEAR(self->decoder);
275 Py_CLEAR(self->errors);
276 Py_TYPE(self)->tp_free((PyObject *)self);
277 }
278
279 static int
check_decoded(PyObject * decoded)280 check_decoded(PyObject *decoded)
281 {
282 if (decoded == NULL)
283 return -1;
284 if (!PyUnicode_Check(decoded)) {
285 PyErr_Format(PyExc_TypeError,
286 "decoder should return a string result, not '%.200s'",
287 Py_TYPE(decoded)->tp_name);
288 Py_DECREF(decoded);
289 return -1;
290 }
291 if (PyUnicode_READY(decoded) < 0) {
292 Py_DECREF(decoded);
293 return -1;
294 }
295 return 0;
296 }
297
298 #define SEEN_CR 1
299 #define SEEN_LF 2
300 #define SEEN_CRLF 4
301 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
302
303 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * myself,PyObject * input,int final)304 _PyIncrementalNewlineDecoder_decode(PyObject *myself,
305 PyObject *input, int final)
306 {
307 PyObject *output;
308 Py_ssize_t output_len;
309 nldecoder_object *self = (nldecoder_object *) myself;
310
311 if (self->decoder == NULL) {
312 PyErr_SetString(PyExc_ValueError,
313 "IncrementalNewlineDecoder.__init__ not called");
314 return NULL;
315 }
316
317 /* decode input (with the eventual \r from a previous pass) */
318 if (self->decoder != Py_None) {
319 output = PyObject_CallMethodObjArgs(self->decoder,
320 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
321 }
322 else {
323 output = input;
324 Py_INCREF(output);
325 }
326
327 if (check_decoded(output) < 0)
328 return NULL;
329
330 output_len = PyUnicode_GET_LENGTH(output);
331 if (self->pendingcr && (final || output_len > 0)) {
332 /* Prefix output with CR */
333 int kind;
334 PyObject *modified;
335 char *out;
336
337 modified = PyUnicode_New(output_len + 1,
338 PyUnicode_MAX_CHAR_VALUE(output));
339 if (modified == NULL)
340 goto error;
341 kind = PyUnicode_KIND(modified);
342 out = PyUnicode_DATA(modified);
343 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
344 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
345 Py_DECREF(output);
346 output = modified; /* output remains ready */
347 self->pendingcr = 0;
348 output_len++;
349 }
350
351 /* retain last \r even when not translating data:
352 * then readline() is sure to get \r\n in one pass
353 */
354 if (!final) {
355 if (output_len > 0
356 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
357 {
358 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
359 if (modified == NULL)
360 goto error;
361 Py_DECREF(output);
362 output = modified;
363 self->pendingcr = 1;
364 }
365 }
366
367 /* Record which newlines are read and do newline translation if desired,
368 all in one pass. */
369 {
370 void *in_str;
371 Py_ssize_t len;
372 int seennl = self->seennl;
373 int only_lf = 0;
374 int kind;
375
376 in_str = PyUnicode_DATA(output);
377 len = PyUnicode_GET_LENGTH(output);
378 kind = PyUnicode_KIND(output);
379
380 if (len == 0)
381 return output;
382
383 /* If, up to now, newlines are consistently \n, do a quick check
384 for the \r *byte* with the libc's optimized memchr.
385 */
386 if (seennl == SEEN_LF || seennl == 0) {
387 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
388 }
389
390 if (only_lf) {
391 /* If not already seen, quick scan for a possible "\n" character.
392 (there's nothing else to be done, even when in translation mode)
393 */
394 if (seennl == 0 &&
395 memchr(in_str, '\n', kind * len) != NULL) {
396 if (kind == PyUnicode_1BYTE_KIND)
397 seennl |= SEEN_LF;
398 else {
399 Py_ssize_t i = 0;
400 for (;;) {
401 Py_UCS4 c;
402 /* Fast loop for non-control characters */
403 while (PyUnicode_READ(kind, in_str, i) > '\n')
404 i++;
405 c = PyUnicode_READ(kind, in_str, i++);
406 if (c == '\n') {
407 seennl |= SEEN_LF;
408 break;
409 }
410 if (i >= len)
411 break;
412 }
413 }
414 }
415 /* Finished: we have scanned for newlines, and none of them
416 need translating */
417 }
418 else if (!self->translate) {
419 Py_ssize_t i = 0;
420 /* We have already seen all newline types, no need to scan again */
421 if (seennl == SEEN_ALL)
422 goto endscan;
423 for (;;) {
424 Py_UCS4 c;
425 /* Fast loop for non-control characters */
426 while (PyUnicode_READ(kind, in_str, i) > '\r')
427 i++;
428 c = PyUnicode_READ(kind, in_str, i++);
429 if (c == '\n')
430 seennl |= SEEN_LF;
431 else if (c == '\r') {
432 if (PyUnicode_READ(kind, in_str, i) == '\n') {
433 seennl |= SEEN_CRLF;
434 i++;
435 }
436 else
437 seennl |= SEEN_CR;
438 }
439 if (i >= len)
440 break;
441 if (seennl == SEEN_ALL)
442 break;
443 }
444 endscan:
445 ;
446 }
447 else {
448 void *translated;
449 int kind = PyUnicode_KIND(output);
450 void *in_str = PyUnicode_DATA(output);
451 Py_ssize_t in, out;
452 /* XXX: Previous in-place translation here is disabled as
453 resizing is not possible anymore */
454 /* We could try to optimize this so that we only do a copy
455 when there is something to translate. On the other hand,
456 we already know there is a \r byte, so chances are high
457 that something needs to be done. */
458 translated = PyMem_Malloc(kind * len);
459 if (translated == NULL) {
460 PyErr_NoMemory();
461 goto error;
462 }
463 in = out = 0;
464 for (;;) {
465 Py_UCS4 c;
466 /* Fast loop for non-control characters */
467 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
468 PyUnicode_WRITE(kind, translated, out++, c);
469 if (c == '\n') {
470 PyUnicode_WRITE(kind, translated, out++, c);
471 seennl |= SEEN_LF;
472 continue;
473 }
474 if (c == '\r') {
475 if (PyUnicode_READ(kind, in_str, in) == '\n') {
476 in++;
477 seennl |= SEEN_CRLF;
478 }
479 else
480 seennl |= SEEN_CR;
481 PyUnicode_WRITE(kind, translated, out++, '\n');
482 continue;
483 }
484 if (in > len)
485 break;
486 PyUnicode_WRITE(kind, translated, out++, c);
487 }
488 Py_DECREF(output);
489 output = PyUnicode_FromKindAndData(kind, translated, out);
490 PyMem_Free(translated);
491 if (!output)
492 return NULL;
493 }
494 self->seennl |= seennl;
495 }
496
497 return output;
498
499 error:
500 Py_DECREF(output);
501 return NULL;
502 }
503
504 /*[clinic input]
505 _io.IncrementalNewlineDecoder.decode
506 input: object
507 final: bool(accept={int}) = False
508 [clinic start generated code]*/
509
510 static PyObject *
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object * self,PyObject * input,int final)511 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
512 PyObject *input, int final)
513 /*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
514 {
515 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
516 }
517
518 /*[clinic input]
519 _io.IncrementalNewlineDecoder.getstate
520 [clinic start generated code]*/
521
522 static PyObject *
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object * self)523 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
524 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
525 {
526 PyObject *buffer;
527 unsigned long long flag;
528
529 if (self->decoder != Py_None) {
530 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
531 _PyIO_str_getstate, NULL);
532 if (state == NULL)
533 return NULL;
534 if (!PyTuple_Check(state)) {
535 PyErr_SetString(PyExc_TypeError,
536 "illegal decoder state");
537 Py_DECREF(state);
538 return NULL;
539 }
540 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
541 &buffer, &flag))
542 {
543 Py_DECREF(state);
544 return NULL;
545 }
546 Py_INCREF(buffer);
547 Py_DECREF(state);
548 }
549 else {
550 buffer = PyBytes_FromString("");
551 flag = 0;
552 }
553 flag <<= 1;
554 if (self->pendingcr)
555 flag |= 1;
556 return Py_BuildValue("NK", buffer, flag);
557 }
558
559 /*[clinic input]
560 _io.IncrementalNewlineDecoder.setstate
561 state: object
562 /
563 [clinic start generated code]*/
564
565 static PyObject *
_io_IncrementalNewlineDecoder_setstate(nldecoder_object * self,PyObject * state)566 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
567 PyObject *state)
568 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
569 {
570 PyObject *buffer;
571 unsigned long long flag;
572
573 if (!PyTuple_Check(state)) {
574 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
575 return NULL;
576 }
577 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
578 &buffer, &flag))
579 {
580 return NULL;
581 }
582
583 self->pendingcr = (int) (flag & 1);
584 flag >>= 1;
585
586 if (self->decoder != Py_None)
587 return _PyObject_CallMethodId(self->decoder,
588 &PyId_setstate, "((OK))", buffer, flag);
589 else
590 Py_RETURN_NONE;
591 }
592
593 /*[clinic input]
594 _io.IncrementalNewlineDecoder.reset
595 [clinic start generated code]*/
596
597 static PyObject *
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object * self)598 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
599 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
600 {
601 self->seennl = 0;
602 self->pendingcr = 0;
603 if (self->decoder != Py_None)
604 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
605 else
606 Py_RETURN_NONE;
607 }
608
609 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)610 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
611 {
612 switch (self->seennl) {
613 case SEEN_CR:
614 return PyUnicode_FromString("\r");
615 case SEEN_LF:
616 return PyUnicode_FromString("\n");
617 case SEEN_CRLF:
618 return PyUnicode_FromString("\r\n");
619 case SEEN_CR | SEEN_LF:
620 return Py_BuildValue("ss", "\r", "\n");
621 case SEEN_CR | SEEN_CRLF:
622 return Py_BuildValue("ss", "\r", "\r\n");
623 case SEEN_LF | SEEN_CRLF:
624 return Py_BuildValue("ss", "\n", "\r\n");
625 case SEEN_CR | SEEN_LF | SEEN_CRLF:
626 return Py_BuildValue("sss", "\r", "\n", "\r\n");
627 default:
628 Py_RETURN_NONE;
629 }
630
631 }
632
633 /* TextIOWrapper */
634
635 typedef PyObject *
636 (*encodefunc_t)(PyObject *, PyObject *);
637
638 typedef struct
639 {
640 PyObject_HEAD
641 int ok; /* initialized? */
642 int detached;
643 Py_ssize_t chunk_size;
644 PyObject *buffer;
645 PyObject *encoding;
646 PyObject *encoder;
647 PyObject *decoder;
648 PyObject *readnl;
649 PyObject *errors;
650 const char *writenl; /* ASCII-encoded; NULL stands for \n */
651 char line_buffering;
652 char write_through;
653 char readuniversal;
654 char readtranslate;
655 char writetranslate;
656 char seekable;
657 char has_read1;
658 char telling;
659 char finalizing;
660 /* Specialized encoding func (see below) */
661 encodefunc_t encodefunc;
662 /* Whether or not it's the start of the stream */
663 char encoding_start_of_stream;
664
665 /* Reads and writes are internally buffered in order to speed things up.
666 However, any read will first flush the write buffer if itsn't empty.
667
668 Please also note that text to be written is first encoded before being
669 buffered. This is necessary so that encoding errors are immediately
670 reported to the caller, but it unfortunately means that the
671 IncrementalEncoder (whose encode() method is always written in Python)
672 becomes a bottleneck for small writes.
673 */
674 PyObject *decoded_chars; /* buffer for text returned from decoder */
675 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
676 PyObject *pending_bytes; // data waiting to be written.
677 // ascii unicode, bytes, or list of them.
678 Py_ssize_t pending_bytes_count;
679
680 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
681 * dec_flags is the second (integer) item of the decoder state and
682 * next_input is the chunk of input bytes that comes next after the
683 * snapshot point. We use this to reconstruct decoder states in tell().
684 */
685 PyObject *snapshot;
686 /* Bytes-to-characters ratio for the current chunk. Serves as input for
687 the heuristic in tell(). */
688 double b2cratio;
689
690 /* Cache raw object if it's a FileIO object */
691 PyObject *raw;
692
693 PyObject *weakreflist;
694 PyObject *dict;
695 } textio;
696
697 static void
698 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
699
700 /* A couple of specialized cases in order to bypass the slow incremental
701 encoding methods for the most popular encodings. */
702
703 static PyObject *
ascii_encode(textio * self,PyObject * text)704 ascii_encode(textio *self, PyObject *text)
705 {
706 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
707 }
708
709 static PyObject *
utf16be_encode(textio * self,PyObject * text)710 utf16be_encode(textio *self, PyObject *text)
711 {
712 return _PyUnicode_EncodeUTF16(text,
713 PyUnicode_AsUTF8(self->errors), 1);
714 }
715
716 static PyObject *
utf16le_encode(textio * self,PyObject * text)717 utf16le_encode(textio *self, PyObject *text)
718 {
719 return _PyUnicode_EncodeUTF16(text,
720 PyUnicode_AsUTF8(self->errors), -1);
721 }
722
723 static PyObject *
utf16_encode(textio * self,PyObject * text)724 utf16_encode(textio *self, PyObject *text)
725 {
726 if (!self->encoding_start_of_stream) {
727 /* Skip the BOM and use native byte ordering */
728 #if PY_BIG_ENDIAN
729 return utf16be_encode(self, text);
730 #else
731 return utf16le_encode(self, text);
732 #endif
733 }
734 return _PyUnicode_EncodeUTF16(text,
735 PyUnicode_AsUTF8(self->errors), 0);
736 }
737
738 static PyObject *
utf32be_encode(textio * self,PyObject * text)739 utf32be_encode(textio *self, PyObject *text)
740 {
741 return _PyUnicode_EncodeUTF32(text,
742 PyUnicode_AsUTF8(self->errors), 1);
743 }
744
745 static PyObject *
utf32le_encode(textio * self,PyObject * text)746 utf32le_encode(textio *self, PyObject *text)
747 {
748 return _PyUnicode_EncodeUTF32(text,
749 PyUnicode_AsUTF8(self->errors), -1);
750 }
751
752 static PyObject *
utf32_encode(textio * self,PyObject * text)753 utf32_encode(textio *self, PyObject *text)
754 {
755 if (!self->encoding_start_of_stream) {
756 /* Skip the BOM and use native byte ordering */
757 #if PY_BIG_ENDIAN
758 return utf32be_encode(self, text);
759 #else
760 return utf32le_encode(self, text);
761 #endif
762 }
763 return _PyUnicode_EncodeUTF32(text,
764 PyUnicode_AsUTF8(self->errors), 0);
765 }
766
767 static PyObject *
utf8_encode(textio * self,PyObject * text)768 utf8_encode(textio *self, PyObject *text)
769 {
770 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
771 }
772
773 static PyObject *
latin1_encode(textio * self,PyObject * text)774 latin1_encode(textio *self, PyObject *text)
775 {
776 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
777 }
778
779 // Return true when encoding can be skipped when text is ascii.
780 static inline int
is_asciicompat_encoding(encodefunc_t f)781 is_asciicompat_encoding(encodefunc_t f)
782 {
783 return f == (encodefunc_t) ascii_encode
784 || f == (encodefunc_t) latin1_encode
785 || f == (encodefunc_t) utf8_encode;
786 }
787
788 /* Map normalized encoding names onto the specialized encoding funcs */
789
790 typedef struct {
791 const char *name;
792 encodefunc_t encodefunc;
793 } encodefuncentry;
794
795 static const encodefuncentry encodefuncs[] = {
796 {"ascii", (encodefunc_t) ascii_encode},
797 {"iso8859-1", (encodefunc_t) latin1_encode},
798 {"utf-8", (encodefunc_t) utf8_encode},
799 {"utf-16-be", (encodefunc_t) utf16be_encode},
800 {"utf-16-le", (encodefunc_t) utf16le_encode},
801 {"utf-16", (encodefunc_t) utf16_encode},
802 {"utf-32-be", (encodefunc_t) utf32be_encode},
803 {"utf-32-le", (encodefunc_t) utf32le_encode},
804 {"utf-32", (encodefunc_t) utf32_encode},
805 {NULL, NULL}
806 };
807
808 static int
validate_newline(const char * newline)809 validate_newline(const char *newline)
810 {
811 if (newline && newline[0] != '\0'
812 && !(newline[0] == '\n' && newline[1] == '\0')
813 && !(newline[0] == '\r' && newline[1] == '\0')
814 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
815 PyErr_Format(PyExc_ValueError,
816 "illegal newline value: %s", newline);
817 return -1;
818 }
819 return 0;
820 }
821
822 static int
set_newline(textio * self,const char * newline)823 set_newline(textio *self, const char *newline)
824 {
825 PyObject *old = self->readnl;
826 if (newline == NULL) {
827 self->readnl = NULL;
828 }
829 else {
830 self->readnl = PyUnicode_FromString(newline);
831 if (self->readnl == NULL) {
832 self->readnl = old;
833 return -1;
834 }
835 }
836 self->readuniversal = (newline == NULL || newline[0] == '\0');
837 self->readtranslate = (newline == NULL);
838 self->writetranslate = (newline == NULL || newline[0] != '\0');
839 if (!self->readuniversal && self->readnl != NULL) {
840 // validate_newline() accepts only ASCII newlines.
841 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
842 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
843 if (strcmp(self->writenl, "\n") == 0) {
844 self->writenl = NULL;
845 }
846 }
847 else {
848 #ifdef MS_WINDOWS
849 self->writenl = "\r\n";
850 #else
851 self->writenl = NULL;
852 #endif
853 }
854 Py_XDECREF(old);
855 return 0;
856 }
857
858 static int
_textiowrapper_set_decoder(textio * self,PyObject * codec_info,const char * errors)859 _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
860 const char *errors)
861 {
862 PyObject *res;
863 int r;
864
865 res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
866 if (res == NULL)
867 return -1;
868
869 r = PyObject_IsTrue(res);
870 Py_DECREF(res);
871 if (r == -1)
872 return -1;
873
874 if (r != 1)
875 return 0;
876
877 Py_CLEAR(self->decoder);
878 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
879 if (self->decoder == NULL)
880 return -1;
881
882 if (self->readuniversal) {
883 PyObject *incrementalDecoder = PyObject_CallFunction(
884 (PyObject *)&PyIncrementalNewlineDecoder_Type,
885 "Oi", self->decoder, (int)self->readtranslate);
886 if (incrementalDecoder == NULL)
887 return -1;
888 Py_CLEAR(self->decoder);
889 self->decoder = incrementalDecoder;
890 }
891
892 return 0;
893 }
894
895 static PyObject*
_textiowrapper_decode(PyObject * decoder,PyObject * bytes,int eof)896 _textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
897 {
898 PyObject *chars;
899
900 if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type)
901 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
902 else
903 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
904 eof ? Py_True : Py_False, NULL);
905
906 if (check_decoded(chars) < 0)
907 // check_decoded already decreases refcount
908 return NULL;
909
910 return chars;
911 }
912
913 static int
_textiowrapper_set_encoder(textio * self,PyObject * codec_info,const char * errors)914 _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
915 const char *errors)
916 {
917 PyObject *res;
918 int r;
919
920 res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
921 if (res == NULL)
922 return -1;
923
924 r = PyObject_IsTrue(res);
925 Py_DECREF(res);
926 if (r == -1)
927 return -1;
928
929 if (r != 1)
930 return 0;
931
932 Py_CLEAR(self->encoder);
933 self->encodefunc = NULL;
934 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
935 if (self->encoder == NULL)
936 return -1;
937
938 /* Get the normalized named of the codec */
939 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
940 return -1;
941 }
942 if (res != NULL && PyUnicode_Check(res)) {
943 const encodefuncentry *e = encodefuncs;
944 while (e->name != NULL) {
945 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
946 self->encodefunc = e->encodefunc;
947 break;
948 }
949 e++;
950 }
951 }
952 Py_XDECREF(res);
953
954 return 0;
955 }
956
957 static int
_textiowrapper_fix_encoder_state(textio * self)958 _textiowrapper_fix_encoder_state(textio *self)
959 {
960 if (!self->seekable || !self->encoder) {
961 return 0;
962 }
963
964 self->encoding_start_of_stream = 1;
965
966 PyObject *cookieObj = PyObject_CallMethodObjArgs(
967 self->buffer, _PyIO_str_tell, NULL);
968 if (cookieObj == NULL) {
969 return -1;
970 }
971
972 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
973 Py_DECREF(cookieObj);
974 if (cmp < 0) {
975 return -1;
976 }
977
978 if (cmp == 0) {
979 self->encoding_start_of_stream = 0;
980 PyObject *res = PyObject_CallMethodObjArgs(
981 self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL);
982 if (res == NULL) {
983 return -1;
984 }
985 Py_DECREF(res);
986 }
987
988 return 0;
989 }
990
991 /*[clinic input]
992 _io.TextIOWrapper.__init__
993 buffer: object
994 encoding: str(accept={str, NoneType}) = None
995 errors: object = None
996 newline: str(accept={str, NoneType}) = None
997 line_buffering: bool(accept={int}) = False
998 write_through: bool(accept={int}) = False
999
1000 Character and line based layer over a BufferedIOBase object, buffer.
1001
1002 encoding gives the name of the encoding that the stream will be
1003 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1004
1005 errors determines the strictness of encoding and decoding (see
1006 help(codecs.Codec) or the documentation for codecs.register) and
1007 defaults to "strict".
1008
1009 newline controls how line endings are handled. It can be None, '',
1010 '\n', '\r', and '\r\n'. It works as follows:
1011
1012 * On input, if newline is None, universal newlines mode is
1013 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1014 these are translated into '\n' before being returned to the
1015 caller. If it is '', universal newline mode is enabled, but line
1016 endings are returned to the caller untranslated. If it has any of
1017 the other legal values, input lines are only terminated by the given
1018 string, and the line ending is returned to the caller untranslated.
1019
1020 * On output, if newline is None, any '\n' characters written are
1021 translated to the system default line separator, os.linesep. If
1022 newline is '' or '\n', no translation takes place. If newline is any
1023 of the other legal values, any '\n' characters written are translated
1024 to the given string.
1025
1026 If line_buffering is True, a call to flush is implied when a call to
1027 write contains a newline character.
1028 [clinic start generated code]*/
1029
1030 static int
_io_TextIOWrapper___init___impl(textio * self,PyObject * buffer,const char * encoding,PyObject * errors,const char * newline,int line_buffering,int write_through)1031 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1032 const char *encoding, PyObject *errors,
1033 const char *newline, int line_buffering,
1034 int write_through)
1035 /*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/
1036 {
1037 PyObject *raw, *codec_info = NULL;
1038 _PyIO_State *state = NULL;
1039 PyObject *res;
1040 int r;
1041
1042 self->ok = 0;
1043 self->detached = 0;
1044
1045 if (errors == Py_None) {
1046 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
1047 if (errors == NULL) {
1048 return -1;
1049 }
1050 }
1051 else if (!PyUnicode_Check(errors)) {
1052 // Check 'errors' argument here because Argument Clinic doesn't support
1053 // 'str(accept={str, NoneType})' converter.
1054 PyErr_Format(
1055 PyExc_TypeError,
1056 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1057 errors->ob_type->tp_name);
1058 return -1;
1059 }
1060
1061 if (validate_newline(newline) < 0) {
1062 return -1;
1063 }
1064
1065 Py_CLEAR(self->buffer);
1066 Py_CLEAR(self->encoding);
1067 Py_CLEAR(self->encoder);
1068 Py_CLEAR(self->decoder);
1069 Py_CLEAR(self->readnl);
1070 Py_CLEAR(self->decoded_chars);
1071 Py_CLEAR(self->pending_bytes);
1072 Py_CLEAR(self->snapshot);
1073 Py_CLEAR(self->errors);
1074 Py_CLEAR(self->raw);
1075 self->decoded_chars_used = 0;
1076 self->pending_bytes_count = 0;
1077 self->encodefunc = NULL;
1078 self->b2cratio = 0.0;
1079
1080 if (encoding == NULL) {
1081 /* Try os.device_encoding(fileno) */
1082 PyObject *fileno;
1083 state = IO_STATE();
1084 if (state == NULL)
1085 goto error;
1086 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
1087 /* Ignore only AttributeError and UnsupportedOperation */
1088 if (fileno == NULL) {
1089 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1090 PyErr_ExceptionMatches(state->unsupported_operation)) {
1091 PyErr_Clear();
1092 }
1093 else {
1094 goto error;
1095 }
1096 }
1097 else {
1098 int fd = _PyLong_AsInt(fileno);
1099 Py_DECREF(fileno);
1100 if (fd == -1 && PyErr_Occurred()) {
1101 goto error;
1102 }
1103
1104 self->encoding = _Py_device_encoding(fd);
1105 if (self->encoding == NULL)
1106 goto error;
1107 else if (!PyUnicode_Check(self->encoding))
1108 Py_CLEAR(self->encoding);
1109 }
1110 }
1111 if (encoding == NULL && self->encoding == NULL) {
1112 PyObject *locale_module = _PyIO_get_locale_module(state);
1113 if (locale_module == NULL)
1114 goto catch_ImportError;
1115 self->encoding = _PyObject_CallMethodIdObjArgs(
1116 locale_module, &PyId_getpreferredencoding, Py_False, NULL);
1117 Py_DECREF(locale_module);
1118 if (self->encoding == NULL) {
1119 catch_ImportError:
1120 /*
1121 Importing locale can raise an ImportError because of
1122 _functools, and locale.getpreferredencoding can raise an
1123 ImportError if _locale is not available. These will happen
1124 during module building.
1125 */
1126 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1127 PyErr_Clear();
1128 self->encoding = PyUnicode_FromString("ascii");
1129 }
1130 else
1131 goto error;
1132 }
1133 else if (!PyUnicode_Check(self->encoding))
1134 Py_CLEAR(self->encoding);
1135 }
1136 if (self->encoding != NULL) {
1137 encoding = PyUnicode_AsUTF8(self->encoding);
1138 if (encoding == NULL)
1139 goto error;
1140 }
1141 else if (encoding != NULL) {
1142 self->encoding = PyUnicode_FromString(encoding);
1143 if (self->encoding == NULL)
1144 goto error;
1145 }
1146 else {
1147 PyErr_SetString(PyExc_OSError,
1148 "could not determine default encoding");
1149 goto error;
1150 }
1151
1152 /* Check we have been asked for a real text encoding */
1153 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1154 if (codec_info == NULL) {
1155 Py_CLEAR(self->encoding);
1156 goto error;
1157 }
1158
1159 /* XXX: Failures beyond this point have the potential to leak elements
1160 * of the partially constructed object (like self->encoding)
1161 */
1162
1163 Py_INCREF(errors);
1164 self->errors = errors;
1165 self->chunk_size = 8192;
1166 self->line_buffering = line_buffering;
1167 self->write_through = write_through;
1168 if (set_newline(self, newline) < 0) {
1169 goto error;
1170 }
1171
1172 self->buffer = buffer;
1173 Py_INCREF(buffer);
1174
1175 /* Build the decoder object */
1176 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1177 goto error;
1178
1179 /* Build the encoder object */
1180 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1181 goto error;
1182
1183 /* Finished sorting out the codec details */
1184 Py_CLEAR(codec_info);
1185
1186 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1187 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1188 Py_TYPE(buffer) == &PyBufferedRandom_Type)
1189 {
1190 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1191 goto error;
1192 /* Cache the raw FileIO object to speed up 'closed' checks */
1193 if (raw != NULL) {
1194 if (Py_TYPE(raw) == &PyFileIO_Type)
1195 self->raw = raw;
1196 else
1197 Py_DECREF(raw);
1198 }
1199 }
1200
1201 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
1202 if (res == NULL)
1203 goto error;
1204 r = PyObject_IsTrue(res);
1205 Py_DECREF(res);
1206 if (r < 0)
1207 goto error;
1208 self->seekable = self->telling = r;
1209
1210 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1211 if (r < 0) {
1212 goto error;
1213 }
1214 Py_XDECREF(res);
1215 self->has_read1 = r;
1216
1217 self->encoding_start_of_stream = 0;
1218 if (_textiowrapper_fix_encoder_state(self) < 0) {
1219 goto error;
1220 }
1221
1222 self->ok = 1;
1223 return 0;
1224
1225 error:
1226 Py_XDECREF(codec_info);
1227 return -1;
1228 }
1229
1230 /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1231 * -1 on error.
1232 */
1233 static int
convert_optional_bool(PyObject * obj,int default_value)1234 convert_optional_bool(PyObject *obj, int default_value)
1235 {
1236 long v;
1237 if (obj == Py_None) {
1238 v = default_value;
1239 }
1240 else {
1241 v = PyLong_AsLong(obj);
1242 if (v == -1 && PyErr_Occurred())
1243 return -1;
1244 }
1245 return v != 0;
1246 }
1247
1248 static int
textiowrapper_change_encoding(textio * self,PyObject * encoding,PyObject * errors,int newline_changed)1249 textiowrapper_change_encoding(textio *self, PyObject *encoding,
1250 PyObject *errors, int newline_changed)
1251 {
1252 /* Use existing settings where new settings are not specified */
1253 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1254 return 0; // no change
1255 }
1256
1257 if (encoding == Py_None) {
1258 encoding = self->encoding;
1259 if (errors == Py_None) {
1260 errors = self->errors;
1261 }
1262 }
1263 else if (errors == Py_None) {
1264 errors = _PyUnicode_FromId(&PyId_strict);
1265 if (errors == NULL) {
1266 return -1;
1267 }
1268 }
1269
1270 const char *c_errors = PyUnicode_AsUTF8(errors);
1271 if (c_errors == NULL) {
1272 return -1;
1273 }
1274
1275 // Create new encoder & decoder
1276 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1277 PyUnicode_AsUTF8(encoding), "codecs.open()");
1278 if (codec_info == NULL) {
1279 return -1;
1280 }
1281 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1282 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1283 Py_DECREF(codec_info);
1284 return -1;
1285 }
1286 Py_DECREF(codec_info);
1287
1288 Py_INCREF(encoding);
1289 Py_INCREF(errors);
1290 Py_SETREF(self->encoding, encoding);
1291 Py_SETREF(self->errors, errors);
1292
1293 return _textiowrapper_fix_encoder_state(self);
1294 }
1295
1296 /*[clinic input]
1297 _io.TextIOWrapper.reconfigure
1298 *
1299 encoding: object = None
1300 errors: object = None
1301 newline as newline_obj: object(c_default="NULL") = None
1302 line_buffering as line_buffering_obj: object = None
1303 write_through as write_through_obj: object = None
1304
1305 Reconfigure the text stream with new parameters.
1306
1307 This also does an implicit stream flush.
1308
1309 [clinic start generated code]*/
1310
1311 static PyObject *
_io_TextIOWrapper_reconfigure_impl(textio * self,PyObject * encoding,PyObject * errors,PyObject * newline_obj,PyObject * line_buffering_obj,PyObject * write_through_obj)1312 _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1313 PyObject *errors, PyObject *newline_obj,
1314 PyObject *line_buffering_obj,
1315 PyObject *write_through_obj)
1316 /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
1317 {
1318 int line_buffering;
1319 int write_through;
1320 const char *newline = NULL;
1321
1322 /* Check if something is in the read buffer */
1323 if (self->decoded_chars != NULL) {
1324 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1325 _unsupported("It is not possible to set the encoding or newline "
1326 "of stream after the first read");
1327 return NULL;
1328 }
1329 }
1330
1331 if (newline_obj != NULL && newline_obj != Py_None) {
1332 newline = PyUnicode_AsUTF8(newline_obj);
1333 if (newline == NULL || validate_newline(newline) < 0) {
1334 return NULL;
1335 }
1336 }
1337
1338 line_buffering = convert_optional_bool(line_buffering_obj,
1339 self->line_buffering);
1340 write_through = convert_optional_bool(write_through_obj,
1341 self->write_through);
1342 if (line_buffering < 0 || write_through < 0) {
1343 return NULL;
1344 }
1345
1346 PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1347 if (res == NULL) {
1348 return NULL;
1349 }
1350 Py_DECREF(res);
1351 self->b2cratio = 0;
1352
1353 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1354 return NULL;
1355 }
1356
1357 if (textiowrapper_change_encoding(
1358 self, encoding, errors, newline_obj != NULL) < 0) {
1359 return NULL;
1360 }
1361
1362 self->line_buffering = line_buffering;
1363 self->write_through = write_through;
1364 Py_RETURN_NONE;
1365 }
1366
1367 static int
textiowrapper_clear(textio * self)1368 textiowrapper_clear(textio *self)
1369 {
1370 self->ok = 0;
1371 Py_CLEAR(self->buffer);
1372 Py_CLEAR(self->encoding);
1373 Py_CLEAR(self->encoder);
1374 Py_CLEAR(self->decoder);
1375 Py_CLEAR(self->readnl);
1376 Py_CLEAR(self->decoded_chars);
1377 Py_CLEAR(self->pending_bytes);
1378 Py_CLEAR(self->snapshot);
1379 Py_CLEAR(self->errors);
1380 Py_CLEAR(self->raw);
1381
1382 Py_CLEAR(self->dict);
1383 return 0;
1384 }
1385
1386 static void
textiowrapper_dealloc(textio * self)1387 textiowrapper_dealloc(textio *self)
1388 {
1389 self->finalizing = 1;
1390 if (_PyIOBase_finalize((PyObject *) self) < 0)
1391 return;
1392 self->ok = 0;
1393 _PyObject_GC_UNTRACK(self);
1394 if (self->weakreflist != NULL)
1395 PyObject_ClearWeakRefs((PyObject *)self);
1396 textiowrapper_clear(self);
1397 Py_TYPE(self)->tp_free((PyObject *)self);
1398 }
1399
1400 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1401 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1402 {
1403 Py_VISIT(self->buffer);
1404 Py_VISIT(self->encoding);
1405 Py_VISIT(self->encoder);
1406 Py_VISIT(self->decoder);
1407 Py_VISIT(self->readnl);
1408 Py_VISIT(self->decoded_chars);
1409 Py_VISIT(self->pending_bytes);
1410 Py_VISIT(self->snapshot);
1411 Py_VISIT(self->errors);
1412 Py_VISIT(self->raw);
1413
1414 Py_VISIT(self->dict);
1415 return 0;
1416 }
1417
1418 static PyObject *
1419 textiowrapper_closed_get(textio *self, void *context);
1420
1421 /* This macro takes some shortcuts to make the common case faster. */
1422 #define CHECK_CLOSED(self) \
1423 do { \
1424 int r; \
1425 PyObject *_res; \
1426 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1427 if (self->raw != NULL) \
1428 r = _PyFileIO_closed(self->raw); \
1429 else { \
1430 _res = textiowrapper_closed_get(self, NULL); \
1431 if (_res == NULL) \
1432 return NULL; \
1433 r = PyObject_IsTrue(_res); \
1434 Py_DECREF(_res); \
1435 if (r < 0) \
1436 return NULL; \
1437 } \
1438 if (r > 0) { \
1439 PyErr_SetString(PyExc_ValueError, \
1440 "I/O operation on closed file."); \
1441 return NULL; \
1442 } \
1443 } \
1444 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1445 return NULL; \
1446 } while (0)
1447
1448 #define CHECK_INITIALIZED(self) \
1449 if (self->ok <= 0) { \
1450 PyErr_SetString(PyExc_ValueError, \
1451 "I/O operation on uninitialized object"); \
1452 return NULL; \
1453 }
1454
1455 #define CHECK_ATTACHED(self) \
1456 CHECK_INITIALIZED(self); \
1457 if (self->detached) { \
1458 PyErr_SetString(PyExc_ValueError, \
1459 "underlying buffer has been detached"); \
1460 return NULL; \
1461 }
1462
1463 #define CHECK_ATTACHED_INT(self) \
1464 if (self->ok <= 0) { \
1465 PyErr_SetString(PyExc_ValueError, \
1466 "I/O operation on uninitialized object"); \
1467 return -1; \
1468 } else if (self->detached) { \
1469 PyErr_SetString(PyExc_ValueError, \
1470 "underlying buffer has been detached"); \
1471 return -1; \
1472 }
1473
1474
1475 /*[clinic input]
1476 _io.TextIOWrapper.detach
1477 [clinic start generated code]*/
1478
1479 static PyObject *
_io_TextIOWrapper_detach_impl(textio * self)1480 _io_TextIOWrapper_detach_impl(textio *self)
1481 /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1482 {
1483 PyObject *buffer, *res;
1484 CHECK_ATTACHED(self);
1485 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1486 if (res == NULL)
1487 return NULL;
1488 Py_DECREF(res);
1489 buffer = self->buffer;
1490 self->buffer = NULL;
1491 self->detached = 1;
1492 return buffer;
1493 }
1494
1495 /* Flush the internal write buffer. This doesn't explicitly flush the
1496 underlying buffered object, though. */
1497 static int
_textiowrapper_writeflush(textio * self)1498 _textiowrapper_writeflush(textio *self)
1499 {
1500 if (self->pending_bytes == NULL)
1501 return 0;
1502
1503 PyObject *pending = self->pending_bytes;
1504 PyObject *b;
1505
1506 if (PyBytes_Check(pending)) {
1507 b = pending;
1508 Py_INCREF(b);
1509 }
1510 else if (PyUnicode_Check(pending)) {
1511 assert(PyUnicode_IS_ASCII(pending));
1512 assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1513 b = PyBytes_FromStringAndSize(
1514 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1515 if (b == NULL) {
1516 return -1;
1517 }
1518 }
1519 else {
1520 assert(PyList_Check(pending));
1521 b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1522 if (b == NULL) {
1523 return -1;
1524 }
1525
1526 char *buf = PyBytes_AsString(b);
1527 Py_ssize_t pos = 0;
1528
1529 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1530 PyObject *obj = PyList_GET_ITEM(pending, i);
1531 char *src;
1532 Py_ssize_t len;
1533 if (PyUnicode_Check(obj)) {
1534 assert(PyUnicode_IS_ASCII(obj));
1535 src = PyUnicode_DATA(obj);
1536 len = PyUnicode_GET_LENGTH(obj);
1537 }
1538 else {
1539 assert(PyBytes_Check(obj));
1540 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1541 Py_DECREF(b);
1542 return -1;
1543 }
1544 }
1545 memcpy(buf + pos, src, len);
1546 pos += len;
1547 }
1548 assert(pos == self->pending_bytes_count);
1549 }
1550
1551 self->pending_bytes_count = 0;
1552 self->pending_bytes = NULL;
1553 Py_DECREF(pending);
1554
1555 PyObject *ret;
1556 do {
1557 ret = PyObject_CallMethodObjArgs(self->buffer,
1558 _PyIO_str_write, b, NULL);
1559 } while (ret == NULL && _PyIO_trap_eintr());
1560 Py_DECREF(b);
1561 if (ret == NULL)
1562 return -1;
1563 Py_DECREF(ret);
1564 return 0;
1565 }
1566
1567 /*[clinic input]
1568 _io.TextIOWrapper.write
1569 text: unicode
1570 /
1571 [clinic start generated code]*/
1572
1573 static PyObject *
_io_TextIOWrapper_write_impl(textio * self,PyObject * text)1574 _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1575 /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1576 {
1577 PyObject *ret;
1578 PyObject *b;
1579 Py_ssize_t textlen;
1580 int haslf = 0;
1581 int needflush = 0, text_needflush = 0;
1582
1583 if (PyUnicode_READY(text) == -1)
1584 return NULL;
1585
1586 CHECK_ATTACHED(self);
1587 CHECK_CLOSED(self);
1588
1589 if (self->encoder == NULL)
1590 return _unsupported("not writable");
1591
1592 Py_INCREF(text);
1593
1594 textlen = PyUnicode_GET_LENGTH(text);
1595
1596 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1597 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1598 haslf = 1;
1599
1600 if (haslf && self->writetranslate && self->writenl != NULL) {
1601 PyObject *newtext = _PyObject_CallMethodId(
1602 text, &PyId_replace, "ss", "\n", self->writenl);
1603 Py_DECREF(text);
1604 if (newtext == NULL)
1605 return NULL;
1606 text = newtext;
1607 }
1608
1609 if (self->write_through)
1610 text_needflush = 1;
1611 if (self->line_buffering &&
1612 (haslf ||
1613 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1614 needflush = 1;
1615
1616 /* XXX What if we were just reading? */
1617 if (self->encodefunc != NULL) {
1618 if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) {
1619 b = text;
1620 Py_INCREF(b);
1621 }
1622 else {
1623 b = (*self->encodefunc)((PyObject *) self, text);
1624 }
1625 self->encoding_start_of_stream = 0;
1626 }
1627 else
1628 b = PyObject_CallMethodObjArgs(self->encoder,
1629 _PyIO_str_encode, text, NULL);
1630
1631 Py_DECREF(text);
1632 if (b == NULL)
1633 return NULL;
1634 if (b != text && !PyBytes_Check(b)) {
1635 PyErr_Format(PyExc_TypeError,
1636 "encoder should return a bytes object, not '%.200s'",
1637 Py_TYPE(b)->tp_name);
1638 Py_DECREF(b);
1639 return NULL;
1640 }
1641
1642 Py_ssize_t bytes_len;
1643 if (b == text) {
1644 bytes_len = PyUnicode_GET_LENGTH(b);
1645 }
1646 else {
1647 bytes_len = PyBytes_GET_SIZE(b);
1648 }
1649
1650 if (self->pending_bytes == NULL) {
1651 self->pending_bytes_count = 0;
1652 self->pending_bytes = b;
1653 }
1654 else if (!PyList_CheckExact(self->pending_bytes)) {
1655 PyObject *list = PyList_New(2);
1656 if (list == NULL) {
1657 Py_DECREF(b);
1658 return NULL;
1659 }
1660 PyList_SET_ITEM(list, 0, self->pending_bytes);
1661 PyList_SET_ITEM(list, 1, b);
1662 self->pending_bytes = list;
1663 }
1664 else {
1665 if (PyList_Append(self->pending_bytes, b) < 0) {
1666 Py_DECREF(b);
1667 return NULL;
1668 }
1669 Py_DECREF(b);
1670 }
1671
1672 self->pending_bytes_count += bytes_len;
1673 if (self->pending_bytes_count > self->chunk_size || needflush ||
1674 text_needflush) {
1675 if (_textiowrapper_writeflush(self) < 0)
1676 return NULL;
1677 }
1678
1679 if (needflush) {
1680 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1681 if (ret == NULL)
1682 return NULL;
1683 Py_DECREF(ret);
1684 }
1685
1686 textiowrapper_set_decoded_chars(self, NULL);
1687 Py_CLEAR(self->snapshot);
1688
1689 if (self->decoder) {
1690 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
1691 if (ret == NULL)
1692 return NULL;
1693 Py_DECREF(ret);
1694 }
1695
1696 return PyLong_FromSsize_t(textlen);
1697 }
1698
1699 /* Steal a reference to chars and store it in the decoded_char buffer;
1700 */
1701 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1702 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1703 {
1704 Py_XSETREF(self->decoded_chars, chars);
1705 self->decoded_chars_used = 0;
1706 }
1707
1708 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1709 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1710 {
1711 PyObject *chars;
1712 Py_ssize_t avail;
1713
1714 if (self->decoded_chars == NULL)
1715 return PyUnicode_FromStringAndSize(NULL, 0);
1716
1717 /* decoded_chars is guaranteed to be "ready". */
1718 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1719 - self->decoded_chars_used);
1720
1721 assert(avail >= 0);
1722
1723 if (n < 0 || n > avail)
1724 n = avail;
1725
1726 if (self->decoded_chars_used > 0 || n < avail) {
1727 chars = PyUnicode_Substring(self->decoded_chars,
1728 self->decoded_chars_used,
1729 self->decoded_chars_used + n);
1730 if (chars == NULL)
1731 return NULL;
1732 }
1733 else {
1734 chars = self->decoded_chars;
1735 Py_INCREF(chars);
1736 }
1737
1738 self->decoded_chars_used += n;
1739 return chars;
1740 }
1741
1742 /* Read and decode the next chunk of data from the BufferedReader.
1743 */
1744 static int
textiowrapper_read_chunk(textio * self,Py_ssize_t size_hint)1745 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1746 {
1747 PyObject *dec_buffer = NULL;
1748 PyObject *dec_flags = NULL;
1749 PyObject *input_chunk = NULL;
1750 Py_buffer input_chunk_buf;
1751 PyObject *decoded_chars, *chunk_size;
1752 Py_ssize_t nbytes, nchars;
1753 int eof;
1754
1755 /* The return value is True unless EOF was reached. The decoded string is
1756 * placed in self._decoded_chars (replacing its previous value). The
1757 * entire input chunk is sent to the decoder, though some of it may remain
1758 * buffered in the decoder, yet to be converted.
1759 */
1760
1761 if (self->decoder == NULL) {
1762 _unsupported("not readable");
1763 return -1;
1764 }
1765
1766 if (self->telling) {
1767 /* To prepare for tell(), we need to snapshot a point in the file
1768 * where the decoder's input buffer is empty.
1769 */
1770
1771 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1772 _PyIO_str_getstate, NULL);
1773 if (state == NULL)
1774 return -1;
1775 /* Given this, we know there was a valid snapshot point
1776 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1777 */
1778 if (!PyTuple_Check(state)) {
1779 PyErr_SetString(PyExc_TypeError,
1780 "illegal decoder state");
1781 Py_DECREF(state);
1782 return -1;
1783 }
1784 if (!PyArg_ParseTuple(state,
1785 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1786 {
1787 Py_DECREF(state);
1788 return -1;
1789 }
1790
1791 if (!PyBytes_Check(dec_buffer)) {
1792 PyErr_Format(PyExc_TypeError,
1793 "illegal decoder state: the first item should be a "
1794 "bytes object, not '%.200s'",
1795 Py_TYPE(dec_buffer)->tp_name);
1796 Py_DECREF(state);
1797 return -1;
1798 }
1799 Py_INCREF(dec_buffer);
1800 Py_INCREF(dec_flags);
1801 Py_DECREF(state);
1802 }
1803
1804 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1805 if (size_hint > 0) {
1806 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1807 }
1808 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1809 if (chunk_size == NULL)
1810 goto fail;
1811
1812 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1813 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1814 chunk_size, NULL);
1815 Py_DECREF(chunk_size);
1816 if (input_chunk == NULL)
1817 goto fail;
1818
1819 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1820 PyErr_Format(PyExc_TypeError,
1821 "underlying %s() should have returned a bytes-like object, "
1822 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1823 Py_TYPE(input_chunk)->tp_name);
1824 goto fail;
1825 }
1826
1827 nbytes = input_chunk_buf.len;
1828 eof = (nbytes == 0);
1829
1830 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1831 PyBuffer_Release(&input_chunk_buf);
1832 if (decoded_chars == NULL)
1833 goto fail;
1834
1835 textiowrapper_set_decoded_chars(self, decoded_chars);
1836 nchars = PyUnicode_GET_LENGTH(decoded_chars);
1837 if (nchars > 0)
1838 self->b2cratio = (double) nbytes / nchars;
1839 else
1840 self->b2cratio = 0.0;
1841 if (nchars > 0)
1842 eof = 0;
1843
1844 if (self->telling) {
1845 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1846 * next input to be decoded is dec_buffer + input_chunk.
1847 */
1848 PyObject *next_input = dec_buffer;
1849 PyBytes_Concat(&next_input, input_chunk);
1850 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1851 if (next_input == NULL) {
1852 goto fail;
1853 }
1854 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1855 if (snapshot == NULL) {
1856 dec_flags = NULL;
1857 goto fail;
1858 }
1859 Py_XSETREF(self->snapshot, snapshot);
1860 }
1861 Py_DECREF(input_chunk);
1862
1863 return (eof == 0);
1864
1865 fail:
1866 Py_XDECREF(dec_buffer);
1867 Py_XDECREF(dec_flags);
1868 Py_XDECREF(input_chunk);
1869 return -1;
1870 }
1871
1872 /*[clinic input]
1873 _io.TextIOWrapper.read
1874 size as n: Py_ssize_t(accept={int, NoneType}) = -1
1875 /
1876 [clinic start generated code]*/
1877
1878 static PyObject *
_io_TextIOWrapper_read_impl(textio * self,Py_ssize_t n)1879 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1880 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
1881 {
1882 PyObject *result = NULL, *chunks = NULL;
1883
1884 CHECK_ATTACHED(self);
1885 CHECK_CLOSED(self);
1886
1887 if (self->decoder == NULL)
1888 return _unsupported("not readable");
1889
1890 if (_textiowrapper_writeflush(self) < 0)
1891 return NULL;
1892
1893 if (n < 0) {
1894 /* Read everything */
1895 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
1896 PyObject *decoded;
1897 if (bytes == NULL)
1898 goto fail;
1899
1900 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1901 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1902 bytes, 1);
1903 else
1904 decoded = PyObject_CallMethodObjArgs(
1905 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
1906 Py_DECREF(bytes);
1907 if (check_decoded(decoded) < 0)
1908 goto fail;
1909
1910 result = textiowrapper_get_decoded_chars(self, -1);
1911
1912 if (result == NULL) {
1913 Py_DECREF(decoded);
1914 return NULL;
1915 }
1916
1917 PyUnicode_AppendAndDel(&result, decoded);
1918 if (result == NULL)
1919 goto fail;
1920
1921 textiowrapper_set_decoded_chars(self, NULL);
1922 Py_CLEAR(self->snapshot);
1923 return result;
1924 }
1925 else {
1926 int res = 1;
1927 Py_ssize_t remaining = n;
1928
1929 result = textiowrapper_get_decoded_chars(self, n);
1930 if (result == NULL)
1931 goto fail;
1932 if (PyUnicode_READY(result) == -1)
1933 goto fail;
1934 remaining -= PyUnicode_GET_LENGTH(result);
1935
1936 /* Keep reading chunks until we have n characters to return */
1937 while (remaining > 0) {
1938 res = textiowrapper_read_chunk(self, remaining);
1939 if (res < 0) {
1940 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1941 when EINTR occurs so we needn't do it ourselves. */
1942 if (_PyIO_trap_eintr()) {
1943 continue;
1944 }
1945 goto fail;
1946 }
1947 if (res == 0) /* EOF */
1948 break;
1949 if (chunks == NULL) {
1950 chunks = PyList_New(0);
1951 if (chunks == NULL)
1952 goto fail;
1953 }
1954 if (PyUnicode_GET_LENGTH(result) > 0 &&
1955 PyList_Append(chunks, result) < 0)
1956 goto fail;
1957 Py_DECREF(result);
1958 result = textiowrapper_get_decoded_chars(self, remaining);
1959 if (result == NULL)
1960 goto fail;
1961 remaining -= PyUnicode_GET_LENGTH(result);
1962 }
1963 if (chunks != NULL) {
1964 if (result != NULL && PyList_Append(chunks, result) < 0)
1965 goto fail;
1966 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
1967 if (result == NULL)
1968 goto fail;
1969 Py_CLEAR(chunks);
1970 }
1971 return result;
1972 }
1973 fail:
1974 Py_XDECREF(result);
1975 Py_XDECREF(chunks);
1976 return NULL;
1977 }
1978
1979
1980 /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
1981 that is to the NUL character. Otherwise the function will produce
1982 incorrect results. */
1983 static const char *
find_control_char(int kind,const char * s,const char * end,Py_UCS4 ch)1984 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
1985 {
1986 if (kind == PyUnicode_1BYTE_KIND) {
1987 assert(ch < 256);
1988 return (char *) memchr((void *) s, (char) ch, end - s);
1989 }
1990 for (;;) {
1991 while (PyUnicode_READ(kind, s, 0) > ch)
1992 s += kind;
1993 if (PyUnicode_READ(kind, s, 0) == ch)
1994 return s;
1995 if (s == end)
1996 return NULL;
1997 s += kind;
1998 }
1999 }
2000
2001 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,int kind,const char * start,const char * end,Py_ssize_t * consumed)2002 _PyIO_find_line_ending(
2003 int translated, int universal, PyObject *readnl,
2004 int kind, const char *start, const char *end, Py_ssize_t *consumed)
2005 {
2006 Py_ssize_t len = ((char*)end - (char*)start)/kind;
2007
2008 if (translated) {
2009 /* Newlines are already translated, only search for \n */
2010 const char *pos = find_control_char(kind, start, end, '\n');
2011 if (pos != NULL)
2012 return (pos - start)/kind + 1;
2013 else {
2014 *consumed = len;
2015 return -1;
2016 }
2017 }
2018 else if (universal) {
2019 /* Universal newline search. Find any of \r, \r\n, \n
2020 * The decoder ensures that \r\n are not split in two pieces
2021 */
2022 const char *s = start;
2023 for (;;) {
2024 Py_UCS4 ch;
2025 /* Fast path for non-control chars. The loop always ends
2026 since the Unicode string is NUL-terminated. */
2027 while (PyUnicode_READ(kind, s, 0) > '\r')
2028 s += kind;
2029 if (s >= end) {
2030 *consumed = len;
2031 return -1;
2032 }
2033 ch = PyUnicode_READ(kind, s, 0);
2034 s += kind;
2035 if (ch == '\n')
2036 return (s - start)/kind;
2037 if (ch == '\r') {
2038 if (PyUnicode_READ(kind, s, 0) == '\n')
2039 return (s - start)/kind + 1;
2040 else
2041 return (s - start)/kind;
2042 }
2043 }
2044 }
2045 else {
2046 /* Non-universal mode. */
2047 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2048 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2049 /* Assume that readnl is an ASCII character. */
2050 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2051 if (readnl_len == 1) {
2052 const char *pos = find_control_char(kind, start, end, nl[0]);
2053 if (pos != NULL)
2054 return (pos - start)/kind + 1;
2055 *consumed = len;
2056 return -1;
2057 }
2058 else {
2059 const char *s = start;
2060 const char *e = end - (readnl_len - 1)*kind;
2061 const char *pos;
2062 if (e < s)
2063 e = s;
2064 while (s < e) {
2065 Py_ssize_t i;
2066 const char *pos = find_control_char(kind, s, end, nl[0]);
2067 if (pos == NULL || pos >= e)
2068 break;
2069 for (i = 1; i < readnl_len; i++) {
2070 if (PyUnicode_READ(kind, pos, i) != nl[i])
2071 break;
2072 }
2073 if (i == readnl_len)
2074 return (pos - start)/kind + readnl_len;
2075 s = pos + kind;
2076 }
2077 pos = find_control_char(kind, e, end, nl[0]);
2078 if (pos == NULL)
2079 *consumed = len;
2080 else
2081 *consumed = (pos - start)/kind;
2082 return -1;
2083 }
2084 }
2085 }
2086
2087 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)2088 _textiowrapper_readline(textio *self, Py_ssize_t limit)
2089 {
2090 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2091 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2092 int res;
2093
2094 CHECK_CLOSED(self);
2095
2096 if (_textiowrapper_writeflush(self) < 0)
2097 return NULL;
2098
2099 chunked = 0;
2100
2101 while (1) {
2102 char *ptr;
2103 Py_ssize_t line_len;
2104 int kind;
2105 Py_ssize_t consumed = 0;
2106
2107 /* First, get some data if necessary */
2108 res = 1;
2109 while (!self->decoded_chars ||
2110 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2111 res = textiowrapper_read_chunk(self, 0);
2112 if (res < 0) {
2113 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2114 when EINTR occurs so we needn't do it ourselves. */
2115 if (_PyIO_trap_eintr()) {
2116 continue;
2117 }
2118 goto error;
2119 }
2120 if (res == 0)
2121 break;
2122 }
2123 if (res == 0) {
2124 /* end of file */
2125 textiowrapper_set_decoded_chars(self, NULL);
2126 Py_CLEAR(self->snapshot);
2127 start = endpos = offset_to_buffer = 0;
2128 break;
2129 }
2130
2131 if (remaining == NULL) {
2132 line = self->decoded_chars;
2133 start = self->decoded_chars_used;
2134 offset_to_buffer = 0;
2135 Py_INCREF(line);
2136 }
2137 else {
2138 assert(self->decoded_chars_used == 0);
2139 line = PyUnicode_Concat(remaining, self->decoded_chars);
2140 start = 0;
2141 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2142 Py_CLEAR(remaining);
2143 if (line == NULL)
2144 goto error;
2145 if (PyUnicode_READY(line) == -1)
2146 goto error;
2147 }
2148
2149 ptr = PyUnicode_DATA(line);
2150 line_len = PyUnicode_GET_LENGTH(line);
2151 kind = PyUnicode_KIND(line);
2152
2153 endpos = _PyIO_find_line_ending(
2154 self->readtranslate, self->readuniversal, self->readnl,
2155 kind,
2156 ptr + kind * start,
2157 ptr + kind * line_len,
2158 &consumed);
2159 if (endpos >= 0) {
2160 endpos += start;
2161 if (limit >= 0 && (endpos - start) + chunked >= limit)
2162 endpos = start + limit - chunked;
2163 break;
2164 }
2165
2166 /* We can put aside up to `endpos` */
2167 endpos = consumed + start;
2168 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2169 /* Didn't find line ending, but reached length limit */
2170 endpos = start + limit - chunked;
2171 break;
2172 }
2173
2174 if (endpos > start) {
2175 /* No line ending seen yet - put aside current data */
2176 PyObject *s;
2177 if (chunks == NULL) {
2178 chunks = PyList_New(0);
2179 if (chunks == NULL)
2180 goto error;
2181 }
2182 s = PyUnicode_Substring(line, start, endpos);
2183 if (s == NULL)
2184 goto error;
2185 if (PyList_Append(chunks, s) < 0) {
2186 Py_DECREF(s);
2187 goto error;
2188 }
2189 chunked += PyUnicode_GET_LENGTH(s);
2190 Py_DECREF(s);
2191 }
2192 /* There may be some remaining bytes we'll have to prepend to the
2193 next chunk of data */
2194 if (endpos < line_len) {
2195 remaining = PyUnicode_Substring(line, endpos, line_len);
2196 if (remaining == NULL)
2197 goto error;
2198 }
2199 Py_CLEAR(line);
2200 /* We have consumed the buffer */
2201 textiowrapper_set_decoded_chars(self, NULL);
2202 }
2203
2204 if (line != NULL) {
2205 /* Our line ends in the current buffer */
2206 self->decoded_chars_used = endpos - offset_to_buffer;
2207 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2208 PyObject *s = PyUnicode_Substring(line, start, endpos);
2209 Py_CLEAR(line);
2210 if (s == NULL)
2211 goto error;
2212 line = s;
2213 }
2214 }
2215 if (remaining != NULL) {
2216 if (chunks == NULL) {
2217 chunks = PyList_New(0);
2218 if (chunks == NULL)
2219 goto error;
2220 }
2221 if (PyList_Append(chunks, remaining) < 0)
2222 goto error;
2223 Py_CLEAR(remaining);
2224 }
2225 if (chunks != NULL) {
2226 if (line != NULL) {
2227 if (PyList_Append(chunks, line) < 0)
2228 goto error;
2229 Py_DECREF(line);
2230 }
2231 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2232 if (line == NULL)
2233 goto error;
2234 Py_CLEAR(chunks);
2235 }
2236 if (line == NULL) {
2237 Py_INCREF(_PyIO_empty_str);
2238 line = _PyIO_empty_str;
2239 }
2240
2241 return line;
2242
2243 error:
2244 Py_XDECREF(chunks);
2245 Py_XDECREF(remaining);
2246 Py_XDECREF(line);
2247 return NULL;
2248 }
2249
2250 /*[clinic input]
2251 _io.TextIOWrapper.readline
2252 size: Py_ssize_t = -1
2253 /
2254 [clinic start generated code]*/
2255
2256 static PyObject *
_io_TextIOWrapper_readline_impl(textio * self,Py_ssize_t size)2257 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2258 /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2259 {
2260 CHECK_ATTACHED(self);
2261 return _textiowrapper_readline(self, size);
2262 }
2263
2264 /* Seek and Tell */
2265
2266 typedef struct {
2267 Py_off_t start_pos;
2268 int dec_flags;
2269 int bytes_to_feed;
2270 int chars_to_skip;
2271 char need_eof;
2272 } cookie_type;
2273
2274 /*
2275 To speed up cookie packing/unpacking, we store the fields in a temporary
2276 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2277 The following macros define at which offsets in the intermediary byte
2278 string the various CookieStruct fields will be stored.
2279 */
2280
2281 #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2282
2283 #if PY_BIG_ENDIAN
2284 /* We want the least significant byte of start_pos to also be the least
2285 significant byte of the cookie, which means that in big-endian mode we
2286 must copy the fields in reverse order. */
2287
2288 # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2289 # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2290 # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2291 # define OFF_CHARS_TO_SKIP (sizeof(char))
2292 # define OFF_NEED_EOF 0
2293
2294 #else
2295 /* Little-endian mode: the least significant byte of start_pos will
2296 naturally end up the least significant byte of the cookie. */
2297
2298 # define OFF_START_POS 0
2299 # define OFF_DEC_FLAGS (sizeof(Py_off_t))
2300 # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2301 # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2302 # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2303
2304 #endif
2305
2306 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)2307 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2308 {
2309 unsigned char buffer[COOKIE_BUF_LEN];
2310 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2311 if (cookieLong == NULL)
2312 return -1;
2313
2314 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2315 PY_LITTLE_ENDIAN, 0) < 0) {
2316 Py_DECREF(cookieLong);
2317 return -1;
2318 }
2319 Py_DECREF(cookieLong);
2320
2321 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2322 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2323 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2324 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2325 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2326
2327 return 0;
2328 }
2329
2330 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)2331 textiowrapper_build_cookie(cookie_type *cookie)
2332 {
2333 unsigned char buffer[COOKIE_BUF_LEN];
2334
2335 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2336 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2337 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2338 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2339 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2340
2341 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2342 PY_LITTLE_ENDIAN, 0);
2343 }
2344
2345 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)2346 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2347 {
2348 PyObject *res;
2349 /* When seeking to the start of the stream, we call decoder.reset()
2350 rather than decoder.getstate().
2351 This is for a few decoders such as utf-16 for which the state value
2352 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2353 utf-16, that we are expecting a BOM).
2354 */
2355 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2356 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2357 else
2358 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2359 "((yi))", "", cookie->dec_flags);
2360 if (res == NULL)
2361 return -1;
2362 Py_DECREF(res);
2363 return 0;
2364 }
2365
2366 static int
_textiowrapper_encoder_reset(textio * self,int start_of_stream)2367 _textiowrapper_encoder_reset(textio *self, int start_of_stream)
2368 {
2369 PyObject *res;
2370 if (start_of_stream) {
2371 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2372 self->encoding_start_of_stream = 1;
2373 }
2374 else {
2375 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2376 _PyLong_Zero, NULL);
2377 self->encoding_start_of_stream = 0;
2378 }
2379 if (res == NULL)
2380 return -1;
2381 Py_DECREF(res);
2382 return 0;
2383 }
2384
2385 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)2386 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2387 {
2388 /* Same as _textiowrapper_decoder_setstate() above. */
2389 return _textiowrapper_encoder_reset(
2390 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2391 }
2392
2393 /*[clinic input]
2394 _io.TextIOWrapper.seek
2395 cookie as cookieObj: object
2396 whence: int = 0
2397 /
2398 [clinic start generated code]*/
2399
2400 static PyObject *
_io_TextIOWrapper_seek_impl(textio * self,PyObject * cookieObj,int whence)2401 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2402 /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2403 {
2404 PyObject *posobj;
2405 cookie_type cookie;
2406 PyObject *res;
2407 int cmp;
2408 PyObject *snapshot;
2409
2410 CHECK_ATTACHED(self);
2411 CHECK_CLOSED(self);
2412
2413 Py_INCREF(cookieObj);
2414
2415 if (!self->seekable) {
2416 _unsupported("underlying stream is not seekable");
2417 goto fail;
2418 }
2419
2420 switch (whence) {
2421 case SEEK_CUR:
2422 /* seek relative to current position */
2423 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2424 if (cmp < 0)
2425 goto fail;
2426
2427 if (cmp == 0) {
2428 _unsupported("can't do nonzero cur-relative seeks");
2429 goto fail;
2430 }
2431
2432 /* Seeking to the current position should attempt to
2433 * sync the underlying buffer with the current position.
2434 */
2435 Py_DECREF(cookieObj);
2436 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
2437 if (cookieObj == NULL)
2438 goto fail;
2439 break;
2440
2441 case SEEK_END:
2442 /* seek relative to end of file */
2443 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2444 if (cmp < 0)
2445 goto fail;
2446
2447 if (cmp == 0) {
2448 _unsupported("can't do nonzero end-relative seeks");
2449 goto fail;
2450 }
2451
2452 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2453 if (res == NULL)
2454 goto fail;
2455 Py_DECREF(res);
2456
2457 textiowrapper_set_decoded_chars(self, NULL);
2458 Py_CLEAR(self->snapshot);
2459 if (self->decoder) {
2460 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
2461 if (res == NULL)
2462 goto fail;
2463 Py_DECREF(res);
2464 }
2465
2466 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
2467 Py_CLEAR(cookieObj);
2468 if (res == NULL)
2469 goto fail;
2470 if (self->encoder) {
2471 /* If seek() == 0, we are at the start of stream, otherwise not */
2472 cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
2473 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2474 Py_DECREF(res);
2475 goto fail;
2476 }
2477 }
2478 return res;
2479
2480 case SEEK_SET:
2481 break;
2482
2483 default:
2484 PyErr_Format(PyExc_ValueError,
2485 "invalid whence (%d, should be %d, %d or %d)", whence,
2486 SEEK_SET, SEEK_CUR, SEEK_END);
2487 goto fail;
2488 }
2489
2490 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
2491 if (cmp < 0)
2492 goto fail;
2493
2494 if (cmp == 1) {
2495 PyErr_Format(PyExc_ValueError,
2496 "negative seek position %R", cookieObj);
2497 goto fail;
2498 }
2499
2500 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2501 if (res == NULL)
2502 goto fail;
2503 Py_DECREF(res);
2504
2505 /* The strategy of seek() is to go back to the safe start point
2506 * and replay the effect of read(chars_to_skip) from there.
2507 */
2508 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2509 goto fail;
2510
2511 /* Seek back to the safe start point. */
2512 posobj = PyLong_FromOff_t(cookie.start_pos);
2513 if (posobj == NULL)
2514 goto fail;
2515 res = PyObject_CallMethodObjArgs(self->buffer,
2516 _PyIO_str_seek, posobj, NULL);
2517 Py_DECREF(posobj);
2518 if (res == NULL)
2519 goto fail;
2520 Py_DECREF(res);
2521
2522 textiowrapper_set_decoded_chars(self, NULL);
2523 Py_CLEAR(self->snapshot);
2524
2525 /* Restore the decoder to its state from the safe start point. */
2526 if (self->decoder) {
2527 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2528 goto fail;
2529 }
2530
2531 if (cookie.chars_to_skip) {
2532 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2533 PyObject *input_chunk = _PyObject_CallMethodId(
2534 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
2535 PyObject *decoded;
2536
2537 if (input_chunk == NULL)
2538 goto fail;
2539
2540 if (!PyBytes_Check(input_chunk)) {
2541 PyErr_Format(PyExc_TypeError,
2542 "underlying read() should have returned a bytes "
2543 "object, not '%.200s'",
2544 Py_TYPE(input_chunk)->tp_name);
2545 Py_DECREF(input_chunk);
2546 goto fail;
2547 }
2548
2549 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2550 if (snapshot == NULL) {
2551 goto fail;
2552 }
2553 Py_XSETREF(self->snapshot, snapshot);
2554
2555 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2556 "Oi", input_chunk, (int)cookie.need_eof);
2557
2558 if (check_decoded(decoded) < 0)
2559 goto fail;
2560
2561 textiowrapper_set_decoded_chars(self, decoded);
2562
2563 /* Skip chars_to_skip of the decoded characters. */
2564 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2565 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2566 goto fail;
2567 }
2568 self->decoded_chars_used = cookie.chars_to_skip;
2569 }
2570 else {
2571 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2572 if (snapshot == NULL)
2573 goto fail;
2574 Py_XSETREF(self->snapshot, snapshot);
2575 }
2576
2577 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2578 if (self->encoder) {
2579 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2580 goto fail;
2581 }
2582 return cookieObj;
2583 fail:
2584 Py_XDECREF(cookieObj);
2585 return NULL;
2586
2587 }
2588
2589 /*[clinic input]
2590 _io.TextIOWrapper.tell
2591 [clinic start generated code]*/
2592
2593 static PyObject *
_io_TextIOWrapper_tell_impl(textio * self)2594 _io_TextIOWrapper_tell_impl(textio *self)
2595 /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2596 {
2597 PyObject *res;
2598 PyObject *posobj = NULL;
2599 cookie_type cookie = {0,0,0,0,0};
2600 PyObject *next_input;
2601 Py_ssize_t chars_to_skip, chars_decoded;
2602 Py_ssize_t skip_bytes, skip_back;
2603 PyObject *saved_state = NULL;
2604 char *input, *input_end;
2605 Py_ssize_t dec_buffer_len;
2606 int dec_flags;
2607
2608 CHECK_ATTACHED(self);
2609 CHECK_CLOSED(self);
2610
2611 if (!self->seekable) {
2612 _unsupported("underlying stream is not seekable");
2613 goto fail;
2614 }
2615 if (!self->telling) {
2616 PyErr_SetString(PyExc_OSError,
2617 "telling position disabled by next() call");
2618 goto fail;
2619 }
2620
2621 if (_textiowrapper_writeflush(self) < 0)
2622 return NULL;
2623 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2624 if (res == NULL)
2625 goto fail;
2626 Py_DECREF(res);
2627
2628 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
2629 if (posobj == NULL)
2630 goto fail;
2631
2632 if (self->decoder == NULL || self->snapshot == NULL) {
2633 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2634 return posobj;
2635 }
2636
2637 #if defined(HAVE_LARGEFILE_SUPPORT)
2638 cookie.start_pos = PyLong_AsLongLong(posobj);
2639 #else
2640 cookie.start_pos = PyLong_AsLong(posobj);
2641 #endif
2642 Py_DECREF(posobj);
2643 if (PyErr_Occurred())
2644 goto fail;
2645
2646 /* Skip backward to the snapshot point (see _read_chunk). */
2647 assert(PyTuple_Check(self->snapshot));
2648 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2649 goto fail;
2650
2651 assert (PyBytes_Check(next_input));
2652
2653 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2654
2655 /* How many decoded characters have been used up since the snapshot? */
2656 if (self->decoded_chars_used == 0) {
2657 /* We haven't moved from the snapshot point. */
2658 return textiowrapper_build_cookie(&cookie);
2659 }
2660
2661 chars_to_skip = self->decoded_chars_used;
2662
2663 /* Decoder state will be restored at the end */
2664 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2665 _PyIO_str_getstate, NULL);
2666 if (saved_state == NULL)
2667 goto fail;
2668
2669 #define DECODER_GETSTATE() do { \
2670 PyObject *dec_buffer; \
2671 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2672 _PyIO_str_getstate, NULL); \
2673 if (_state == NULL) \
2674 goto fail; \
2675 if (!PyTuple_Check(_state)) { \
2676 PyErr_SetString(PyExc_TypeError, \
2677 "illegal decoder state"); \
2678 Py_DECREF(_state); \
2679 goto fail; \
2680 } \
2681 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2682 &dec_buffer, &dec_flags)) \
2683 { \
2684 Py_DECREF(_state); \
2685 goto fail; \
2686 } \
2687 if (!PyBytes_Check(dec_buffer)) { \
2688 PyErr_Format(PyExc_TypeError, \
2689 "illegal decoder state: the first item should be a " \
2690 "bytes object, not '%.200s'", \
2691 Py_TYPE(dec_buffer)->tp_name); \
2692 Py_DECREF(_state); \
2693 goto fail; \
2694 } \
2695 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2696 Py_DECREF(_state); \
2697 } while (0)
2698
2699 #define DECODER_DECODE(start, len, res) do { \
2700 PyObject *_decoded = _PyObject_CallMethodId( \
2701 self->decoder, &PyId_decode, "y#", start, len); \
2702 if (check_decoded(_decoded) < 0) \
2703 goto fail; \
2704 res = PyUnicode_GET_LENGTH(_decoded); \
2705 Py_DECREF(_decoded); \
2706 } while (0)
2707
2708 /* Fast search for an acceptable start point, close to our
2709 current pos */
2710 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2711 skip_back = 1;
2712 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2713 input = PyBytes_AS_STRING(next_input);
2714 while (skip_bytes > 0) {
2715 /* Decode up to temptative start point */
2716 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2717 goto fail;
2718 DECODER_DECODE(input, skip_bytes, chars_decoded);
2719 if (chars_decoded <= chars_to_skip) {
2720 DECODER_GETSTATE();
2721 if (dec_buffer_len == 0) {
2722 /* Before pos and no bytes buffered in decoder => OK */
2723 cookie.dec_flags = dec_flags;
2724 chars_to_skip -= chars_decoded;
2725 break;
2726 }
2727 /* Skip back by buffered amount and reset heuristic */
2728 skip_bytes -= dec_buffer_len;
2729 skip_back = 1;
2730 }
2731 else {
2732 /* We're too far ahead, skip back a bit */
2733 skip_bytes -= skip_back;
2734 skip_back *= 2;
2735 }
2736 }
2737 if (skip_bytes <= 0) {
2738 skip_bytes = 0;
2739 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2740 goto fail;
2741 }
2742
2743 /* Note our initial start point. */
2744 cookie.start_pos += skip_bytes;
2745 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2746 if (chars_to_skip == 0)
2747 goto finally;
2748
2749 /* We should be close to the desired position. Now feed the decoder one
2750 * byte at a time until we reach the `chars_to_skip` target.
2751 * As we go, note the nearest "safe start point" before the current
2752 * location (a point where the decoder has nothing buffered, so seek()
2753 * can safely start from there and advance to this location).
2754 */
2755 chars_decoded = 0;
2756 input = PyBytes_AS_STRING(next_input);
2757 input_end = input + PyBytes_GET_SIZE(next_input);
2758 input += skip_bytes;
2759 while (input < input_end) {
2760 Py_ssize_t n;
2761
2762 DECODER_DECODE(input, (Py_ssize_t)1, n);
2763 /* We got n chars for 1 byte */
2764 chars_decoded += n;
2765 cookie.bytes_to_feed += 1;
2766 DECODER_GETSTATE();
2767
2768 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2769 /* Decoder buffer is empty, so this is a safe start point. */
2770 cookie.start_pos += cookie.bytes_to_feed;
2771 chars_to_skip -= chars_decoded;
2772 cookie.dec_flags = dec_flags;
2773 cookie.bytes_to_feed = 0;
2774 chars_decoded = 0;
2775 }
2776 if (chars_decoded >= chars_to_skip)
2777 break;
2778 input++;
2779 }
2780 if (input == input_end) {
2781 /* We didn't get enough decoded data; signal EOF to get more. */
2782 PyObject *decoded = _PyObject_CallMethodId(
2783 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
2784 if (check_decoded(decoded) < 0)
2785 goto fail;
2786 chars_decoded += PyUnicode_GET_LENGTH(decoded);
2787 Py_DECREF(decoded);
2788 cookie.need_eof = 1;
2789
2790 if (chars_decoded < chars_to_skip) {
2791 PyErr_SetString(PyExc_OSError,
2792 "can't reconstruct logical file position");
2793 goto fail;
2794 }
2795 }
2796
2797 finally:
2798 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
2799 Py_DECREF(saved_state);
2800 if (res == NULL)
2801 return NULL;
2802 Py_DECREF(res);
2803
2804 /* The returned cookie corresponds to the last safe start point. */
2805 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2806 return textiowrapper_build_cookie(&cookie);
2807
2808 fail:
2809 if (saved_state) {
2810 PyObject *type, *value, *traceback;
2811 PyErr_Fetch(&type, &value, &traceback);
2812 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
2813 _PyErr_ChainExceptions(type, value, traceback);
2814 Py_DECREF(saved_state);
2815 Py_XDECREF(res);
2816 }
2817 return NULL;
2818 }
2819
2820 /*[clinic input]
2821 _io.TextIOWrapper.truncate
2822 pos: object = None
2823 /
2824 [clinic start generated code]*/
2825
2826 static PyObject *
_io_TextIOWrapper_truncate_impl(textio * self,PyObject * pos)2827 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2828 /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2829 {
2830 PyObject *res;
2831
2832 CHECK_ATTACHED(self)
2833
2834 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2835 if (res == NULL)
2836 return NULL;
2837 Py_DECREF(res);
2838
2839 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2840 }
2841
2842 static PyObject *
textiowrapper_repr(textio * self)2843 textiowrapper_repr(textio *self)
2844 {
2845 PyObject *nameobj, *modeobj, *res, *s;
2846 int status;
2847
2848 CHECK_INITIALIZED(self);
2849
2850 res = PyUnicode_FromString("<_io.TextIOWrapper");
2851 if (res == NULL)
2852 return NULL;
2853
2854 status = Py_ReprEnter((PyObject *)self);
2855 if (status != 0) {
2856 if (status > 0) {
2857 PyErr_Format(PyExc_RuntimeError,
2858 "reentrant call inside %s.__repr__",
2859 Py_TYPE(self)->tp_name);
2860 }
2861 goto error;
2862 }
2863 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
2864 if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
2865 goto error;
2866 }
2867 /* Ignore ValueError raised if the underlying stream was detached */
2868 PyErr_Clear();
2869 }
2870 if (nameobj != NULL) {
2871 s = PyUnicode_FromFormat(" name=%R", nameobj);
2872 Py_DECREF(nameobj);
2873 if (s == NULL)
2874 goto error;
2875 PyUnicode_AppendAndDel(&res, s);
2876 if (res == NULL)
2877 goto error;
2878 }
2879 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
2880 goto error;
2881 }
2882 if (modeobj != NULL) {
2883 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2884 Py_DECREF(modeobj);
2885 if (s == NULL)
2886 goto error;
2887 PyUnicode_AppendAndDel(&res, s);
2888 if (res == NULL)
2889 goto error;
2890 }
2891 s = PyUnicode_FromFormat("%U encoding=%R>",
2892 res, self->encoding);
2893 Py_DECREF(res);
2894 if (status == 0) {
2895 Py_ReprLeave((PyObject *)self);
2896 }
2897 return s;
2898
2899 error:
2900 Py_XDECREF(res);
2901 if (status == 0) {
2902 Py_ReprLeave((PyObject *)self);
2903 }
2904 return NULL;
2905 }
2906
2907
2908 /* Inquiries */
2909
2910 /*[clinic input]
2911 _io.TextIOWrapper.fileno
2912 [clinic start generated code]*/
2913
2914 static PyObject *
_io_TextIOWrapper_fileno_impl(textio * self)2915 _io_TextIOWrapper_fileno_impl(textio *self)
2916 /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2917 {
2918 CHECK_ATTACHED(self);
2919 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
2920 }
2921
2922 /*[clinic input]
2923 _io.TextIOWrapper.seekable
2924 [clinic start generated code]*/
2925
2926 static PyObject *
_io_TextIOWrapper_seekable_impl(textio * self)2927 _io_TextIOWrapper_seekable_impl(textio *self)
2928 /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2929 {
2930 CHECK_ATTACHED(self);
2931 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
2932 }
2933
2934 /*[clinic input]
2935 _io.TextIOWrapper.readable
2936 [clinic start generated code]*/
2937
2938 static PyObject *
_io_TextIOWrapper_readable_impl(textio * self)2939 _io_TextIOWrapper_readable_impl(textio *self)
2940 /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2941 {
2942 CHECK_ATTACHED(self);
2943 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
2944 }
2945
2946 /*[clinic input]
2947 _io.TextIOWrapper.writable
2948 [clinic start generated code]*/
2949
2950 static PyObject *
_io_TextIOWrapper_writable_impl(textio * self)2951 _io_TextIOWrapper_writable_impl(textio *self)
2952 /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
2953 {
2954 CHECK_ATTACHED(self);
2955 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
2956 }
2957
2958 /*[clinic input]
2959 _io.TextIOWrapper.isatty
2960 [clinic start generated code]*/
2961
2962 static PyObject *
_io_TextIOWrapper_isatty_impl(textio * self)2963 _io_TextIOWrapper_isatty_impl(textio *self)
2964 /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
2965 {
2966 CHECK_ATTACHED(self);
2967 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
2968 }
2969
2970 /*[clinic input]
2971 _io.TextIOWrapper.flush
2972 [clinic start generated code]*/
2973
2974 static PyObject *
_io_TextIOWrapper_flush_impl(textio * self)2975 _io_TextIOWrapper_flush_impl(textio *self)
2976 /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
2977 {
2978 CHECK_ATTACHED(self);
2979 CHECK_CLOSED(self);
2980 self->telling = self->seekable;
2981 if (_textiowrapper_writeflush(self) < 0)
2982 return NULL;
2983 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
2984 }
2985
2986 /*[clinic input]
2987 _io.TextIOWrapper.close
2988 [clinic start generated code]*/
2989
2990 static PyObject *
_io_TextIOWrapper_close_impl(textio * self)2991 _io_TextIOWrapper_close_impl(textio *self)
2992 /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
2993 {
2994 PyObject *res;
2995 int r;
2996 CHECK_ATTACHED(self);
2997
2998 res = textiowrapper_closed_get(self, NULL);
2999 if (res == NULL)
3000 return NULL;
3001 r = PyObject_IsTrue(res);
3002 Py_DECREF(res);
3003 if (r < 0)
3004 return NULL;
3005
3006 if (r > 0) {
3007 Py_RETURN_NONE; /* stream already closed */
3008 }
3009 else {
3010 PyObject *exc = NULL, *val, *tb;
3011 if (self->finalizing) {
3012 res = _PyObject_CallMethodIdObjArgs(self->buffer,
3013 &PyId__dealloc_warn,
3014 self, NULL);
3015 if (res)
3016 Py_DECREF(res);
3017 else
3018 PyErr_Clear();
3019 }
3020 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
3021 if (res == NULL)
3022 PyErr_Fetch(&exc, &val, &tb);
3023 else
3024 Py_DECREF(res);
3025
3026 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
3027 if (exc != NULL) {
3028 _PyErr_ChainExceptions(exc, val, tb);
3029 Py_CLEAR(res);
3030 }
3031 return res;
3032 }
3033 }
3034
3035 static PyObject *
textiowrapper_iternext(textio * self)3036 textiowrapper_iternext(textio *self)
3037 {
3038 PyObject *line;
3039
3040 CHECK_ATTACHED(self);
3041
3042 self->telling = 0;
3043 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
3044 /* Skip method call overhead for speed */
3045 line = _textiowrapper_readline(self, -1);
3046 }
3047 else {
3048 line = PyObject_CallMethodObjArgs((PyObject *)self,
3049 _PyIO_str_readline, NULL);
3050 if (line && !PyUnicode_Check(line)) {
3051 PyErr_Format(PyExc_OSError,
3052 "readline() should have returned a str object, "
3053 "not '%.200s'", Py_TYPE(line)->tp_name);
3054 Py_DECREF(line);
3055 return NULL;
3056 }
3057 }
3058
3059 if (line == NULL || PyUnicode_READY(line) == -1)
3060 return NULL;
3061
3062 if (PyUnicode_GET_LENGTH(line) == 0) {
3063 /* Reached EOF or would have blocked */
3064 Py_DECREF(line);
3065 Py_CLEAR(self->snapshot);
3066 self->telling = self->seekable;
3067 return NULL;
3068 }
3069
3070 return line;
3071 }
3072
3073 static PyObject *
textiowrapper_name_get(textio * self,void * context)3074 textiowrapper_name_get(textio *self, void *context)
3075 {
3076 CHECK_ATTACHED(self);
3077 return _PyObject_GetAttrId(self->buffer, &PyId_name);
3078 }
3079
3080 static PyObject *
textiowrapper_closed_get(textio * self,void * context)3081 textiowrapper_closed_get(textio *self, void *context)
3082 {
3083 CHECK_ATTACHED(self);
3084 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3085 }
3086
3087 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)3088 textiowrapper_newlines_get(textio *self, void *context)
3089 {
3090 PyObject *res;
3091 CHECK_ATTACHED(self);
3092 if (self->decoder == NULL ||
3093 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3094 {
3095 Py_RETURN_NONE;
3096 }
3097 return res;
3098 }
3099
3100 static PyObject *
textiowrapper_errors_get(textio * self,void * context)3101 textiowrapper_errors_get(textio *self, void *context)
3102 {
3103 CHECK_INITIALIZED(self);
3104 Py_INCREF(self->errors);
3105 return self->errors;
3106 }
3107
3108 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)3109 textiowrapper_chunk_size_get(textio *self, void *context)
3110 {
3111 CHECK_ATTACHED(self);
3112 return PyLong_FromSsize_t(self->chunk_size);
3113 }
3114
3115 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)3116 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
3117 {
3118 Py_ssize_t n;
3119 CHECK_ATTACHED_INT(self);
3120 if (arg == NULL) {
3121 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3122 return -1;
3123 }
3124 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
3125 if (n == -1 && PyErr_Occurred())
3126 return -1;
3127 if (n <= 0) {
3128 PyErr_SetString(PyExc_ValueError,
3129 "a strictly positive integer is required");
3130 return -1;
3131 }
3132 self->chunk_size = n;
3133 return 0;
3134 }
3135
3136 #include "clinic/textio.c.h"
3137
3138 static PyMethodDef incrementalnewlinedecoder_methods[] = {
3139 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3140 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3141 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3142 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3143 {NULL}
3144 };
3145
3146 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3147 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3148 {NULL}
3149 };
3150
3151 PyTypeObject PyIncrementalNewlineDecoder_Type = {
3152 PyVarObject_HEAD_INIT(NULL, 0)
3153 "_io.IncrementalNewlineDecoder", /*tp_name*/
3154 sizeof(nldecoder_object), /*tp_basicsize*/
3155 0, /*tp_itemsize*/
3156 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3157 0, /*tp_vectorcall_offset*/
3158 0, /*tp_getattr*/
3159 0, /*tp_setattr*/
3160 0, /*tp_as_async*/
3161 0, /*tp_repr*/
3162 0, /*tp_as_number*/
3163 0, /*tp_as_sequence*/
3164 0, /*tp_as_mapping*/
3165 0, /*tp_hash */
3166 0, /*tp_call*/
3167 0, /*tp_str*/
3168 0, /*tp_getattro*/
3169 0, /*tp_setattro*/
3170 0, /*tp_as_buffer*/
3171 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3172 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3173 0, /* tp_traverse */
3174 0, /* tp_clear */
3175 0, /* tp_richcompare */
3176 0, /*tp_weaklistoffset*/
3177 0, /* tp_iter */
3178 0, /* tp_iternext */
3179 incrementalnewlinedecoder_methods, /* tp_methods */
3180 0, /* tp_members */
3181 incrementalnewlinedecoder_getset, /* tp_getset */
3182 0, /* tp_base */
3183 0, /* tp_dict */
3184 0, /* tp_descr_get */
3185 0, /* tp_descr_set */
3186 0, /* tp_dictoffset */
3187 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3188 0, /* tp_alloc */
3189 PyType_GenericNew, /* tp_new */
3190 };
3191
3192
3193 static PyMethodDef textiowrapper_methods[] = {
3194 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3195 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3196 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3197 _IO_TEXTIOWRAPPER_READ_METHODDEF
3198 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3199 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3200 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3201
3202 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3203 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3204 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3205 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3206 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3207
3208 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3209 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3210 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3211 {NULL, NULL}
3212 };
3213
3214 static PyMemberDef textiowrapper_members[] = {
3215 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3216 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3217 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
3218 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
3219 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
3220 {NULL}
3221 };
3222
3223 static PyGetSetDef textiowrapper_getset[] = {
3224 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3225 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
3226 /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3227 */
3228 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3229 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3230 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3231 (setter)textiowrapper_chunk_size_set, NULL},
3232 {NULL}
3233 };
3234
3235 PyTypeObject PyTextIOWrapper_Type = {
3236 PyVarObject_HEAD_INIT(NULL, 0)
3237 "_io.TextIOWrapper", /*tp_name*/
3238 sizeof(textio), /*tp_basicsize*/
3239 0, /*tp_itemsize*/
3240 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
3241 0, /*tp_vectorcall_offset*/
3242 0, /*tp_getattr*/
3243 0, /*tps_etattr*/
3244 0, /*tp_as_async*/
3245 (reprfunc)textiowrapper_repr,/*tp_repr*/
3246 0, /*tp_as_number*/
3247 0, /*tp_as_sequence*/
3248 0, /*tp_as_mapping*/
3249 0, /*tp_hash */
3250 0, /*tp_call*/
3251 0, /*tp_str*/
3252 0, /*tp_getattro*/
3253 0, /*tp_setattro*/
3254 0, /*tp_as_buffer*/
3255 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
3256 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
3257 _io_TextIOWrapper___init____doc__, /* tp_doc */
3258 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3259 (inquiry)textiowrapper_clear, /* tp_clear */
3260 0, /* tp_richcompare */
3261 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
3262 0, /* tp_iter */
3263 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3264 textiowrapper_methods, /* tp_methods */
3265 textiowrapper_members, /* tp_members */
3266 textiowrapper_getset, /* tp_getset */
3267 0, /* tp_base */
3268 0, /* tp_dict */
3269 0, /* tp_descr_get */
3270 0, /* tp_descr_set */
3271 offsetof(textio, dict), /*tp_dictoffset*/
3272 _io_TextIOWrapper___init__, /* tp_init */
3273 0, /* tp_alloc */
3274 PyType_GenericNew, /* tp_new */
3275 0, /* tp_free */
3276 0, /* tp_is_gc */
3277 0, /* tp_bases */
3278 0, /* tp_mro */
3279 0, /* tp_cache */
3280 0, /* tp_subclasses */
3281 0, /* tp_weaklist */
3282 0, /* tp_del */
3283 0, /* tp_version_tag */
3284 0, /* tp_finalize */
3285 };
3286