1 /*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "structmember.h"
12 #include "_iomodule.h"
13
14 /* TextIOBase */
15
16 PyDoc_STRVAR(textiobase_doc,
17 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24 static PyObject *
_unsupported(const char * message)25 _unsupported(const char *message)
26 {
27 PyErr_SetString(_PyIO_unsupported_operation, message);
28 return NULL;
29 }
30
31 PyDoc_STRVAR(textiobase_detach_doc,
32 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38 static PyObject *
textiobase_detach(PyObject * self)39 textiobase_detach(PyObject *self)
40 {
41 return _unsupported("detach");
42 }
43
44 PyDoc_STRVAR(textiobase_read_doc,
45 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51 static PyObject *
textiobase_read(PyObject * self,PyObject * args)52 textiobase_read(PyObject *self, PyObject *args)
53 {
54 return _unsupported("read");
55 }
56
57 PyDoc_STRVAR(textiobase_readline_doc,
58 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)64 textiobase_readline(PyObject *self, PyObject *args)
65 {
66 return _unsupported("readline");
67 }
68
69 PyDoc_STRVAR(textiobase_write_doc,
70 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75 static PyObject *
textiobase_write(PyObject * self,PyObject * args)76 textiobase_write(PyObject *self, PyObject *args)
77 {
78 return _unsupported("write");
79 }
80
81 PyDoc_STRVAR(textiobase_encoding_doc,
82 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)88 textiobase_encoding_get(PyObject *self, void *context)
89 {
90 Py_RETURN_NONE;
91 }
92
93 PyDoc_STRVAR(textiobase_newlines_doc,
94 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)102 textiobase_newlines_get(PyObject *self, void *context)
103 {
104 Py_RETURN_NONE;
105 }
106
107 PyDoc_STRVAR(textiobase_errors_doc,
108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113 static PyObject *
textiobase_errors_get(PyObject * self,void * context)114 textiobase_errors_get(PyObject *self, void *context)
115 {
116 Py_RETURN_NONE;
117 }
118
119
120 static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
125 {NULL, NULL}
126 };
127
128 static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
132 {NULL}
133 };
134
135 PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
156 textiobase_doc, /* tp_doc */
157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
163 textiobase_methods, /* tp_methods */
164 0, /* tp_members */
165 textiobase_getset, /* tp_getset */
166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174 };
175
176
177 /* IncrementalNewlineDecoder */
178
179 PyDoc_STRVAR(incrementalnewlinedecoder_doc,
180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189 typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
196 } nldecoder_object;
197
198 static int
incrementalnewlinedecoder_init(nldecoder_object * self,PyObject * args,PyObject * kwds)199 incrementalnewlinedecoder_init(nldecoder_object *self,
200 PyObject *args, PyObject *kwds)
201 {
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229 }
230
231 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)232 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
233 {
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237 }
238
239 static int
check_decoded(PyObject * decoded)240 check_decoded(PyObject *decoded)
241 {
242 if (decoded == NULL)
243 return -1;
244 if (!PyUnicode_Check(decoded)) {
245 PyErr_Format(PyExc_TypeError,
246 "decoder should return a string result, not '%.200s'",
247 Py_TYPE(decoded)->tp_name);
248 Py_DECREF(decoded);
249 return -1;
250 }
251 return 0;
252 }
253
254 #define SEEN_CR 1
255 #define SEEN_LF 2
256 #define SEEN_CRLF 4
257 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
258
259 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * _self,PyObject * input,int final)260 _PyIncrementalNewlineDecoder_decode(PyObject *_self,
261 PyObject *input, int final)
262 {
263 PyObject *output;
264 Py_ssize_t output_len;
265 nldecoder_object *self = (nldecoder_object *) _self;
266
267 if (self->decoder == NULL) {
268 PyErr_SetString(PyExc_ValueError,
269 "IncrementalNewlineDecoder.__init__ not called");
270 return NULL;
271 }
272
273 /* decode input (with the eventual \r from a previous pass) */
274 if (self->decoder != Py_None) {
275 output = PyObject_CallMethodObjArgs(self->decoder,
276 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
277 }
278 else {
279 output = input;
280 Py_INCREF(output);
281 }
282
283 if (check_decoded(output) < 0)
284 return NULL;
285
286 output_len = PyUnicode_GET_SIZE(output);
287 if (self->pendingcr && (final || output_len > 0)) {
288 Py_UNICODE *out;
289 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
290 if (modified == NULL)
291 goto error;
292 out = PyUnicode_AS_UNICODE(modified);
293 out[0] = '\r';
294 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
295 output_len * sizeof(Py_UNICODE));
296 Py_DECREF(output);
297 output = modified;
298 self->pendingcr = 0;
299 output_len++;
300 }
301
302 /* retain last \r even when not translating data:
303 * then readline() is sure to get \r\n in one pass
304 */
305 if (!final) {
306 if (output_len > 0
307 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
308
309 if (Py_REFCNT(output) == 1) {
310 if (PyUnicode_Resize(&output, output_len - 1) < 0)
311 goto error;
312 }
313 else {
314 PyObject *modified = PyUnicode_FromUnicode(
315 PyUnicode_AS_UNICODE(output),
316 output_len - 1);
317 if (modified == NULL)
318 goto error;
319 Py_DECREF(output);
320 output = modified;
321 }
322 self->pendingcr = 1;
323 }
324 }
325
326 /* Record which newlines are read and do newline translation if desired,
327 all in one pass. */
328 {
329 Py_UNICODE *in_str;
330 Py_ssize_t len;
331 int seennl = self->seennl;
332 int only_lf = 0;
333
334 in_str = PyUnicode_AS_UNICODE(output);
335 len = PyUnicode_GET_SIZE(output);
336
337 if (len == 0)
338 return output;
339
340 /* If, up to now, newlines are consistently \n, do a quick check
341 for the \r *byte* with the libc's optimized memchr.
342 */
343 if (seennl == SEEN_LF || seennl == 0) {
344 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
345 }
346
347 if (only_lf) {
348 /* If not already seen, quick scan for a possible "\n" character.
349 (there's nothing else to be done, even when in translation mode)
350 */
351 if (seennl == 0 &&
352 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
353 Py_UNICODE *s, *end;
354 s = in_str;
355 end = in_str + len;
356 for (;;) {
357 Py_UNICODE c;
358 /* Fast loop for non-control characters */
359 while (*s > '\n')
360 s++;
361 c = *s++;
362 if (c == '\n') {
363 seennl |= SEEN_LF;
364 break;
365 }
366 if (s > end)
367 break;
368 }
369 }
370 /* Finished: we have scanned for newlines, and none of them
371 need translating */
372 }
373 else if (!self->translate) {
374 Py_UNICODE *s, *end;
375 /* We have already seen all newline types, no need to scan again */
376 if (seennl == SEEN_ALL)
377 goto endscan;
378 s = in_str;
379 end = in_str + len;
380 for (;;) {
381 Py_UNICODE c;
382 /* Fast loop for non-control characters */
383 while (*s > '\r')
384 s++;
385 c = *s++;
386 if (c == '\n')
387 seennl |= SEEN_LF;
388 else if (c == '\r') {
389 if (*s == '\n') {
390 seennl |= SEEN_CRLF;
391 s++;
392 }
393 else
394 seennl |= SEEN_CR;
395 }
396 if (s > end)
397 break;
398 if (seennl == SEEN_ALL)
399 break;
400 }
401 endscan:
402 ;
403 }
404 else {
405 PyObject *translated = NULL;
406 Py_UNICODE *out_str;
407 Py_UNICODE *in, *out, *end;
408 if (Py_REFCNT(output) != 1) {
409 /* We could try to optimize this so that we only do a copy
410 when there is something to translate. On the other hand,
411 most decoders should only output non-shared strings, i.e.
412 translation is done in place. */
413 translated = PyUnicode_FromUnicode(NULL, len);
414 if (translated == NULL)
415 goto error;
416 assert(Py_REFCNT(translated) == 1);
417 memcpy(PyUnicode_AS_UNICODE(translated),
418 PyUnicode_AS_UNICODE(output),
419 len * sizeof(Py_UNICODE));
420 }
421 else {
422 translated = output;
423 }
424 out_str = PyUnicode_AS_UNICODE(translated);
425 in = in_str;
426 out = out_str;
427 end = in_str + len;
428 for (;;) {
429 Py_UNICODE c;
430 /* Fast loop for non-control characters */
431 while ((c = *in++) > '\r')
432 *out++ = c;
433 if (c == '\n') {
434 *out++ = c;
435 seennl |= SEEN_LF;
436 continue;
437 }
438 if (c == '\r') {
439 if (*in == '\n') {
440 in++;
441 seennl |= SEEN_CRLF;
442 }
443 else
444 seennl |= SEEN_CR;
445 *out++ = '\n';
446 continue;
447 }
448 if (in > end)
449 break;
450 *out++ = c;
451 }
452 if (translated != output) {
453 Py_DECREF(output);
454 output = translated;
455 }
456 if (out - out_str != len) {
457 if (PyUnicode_Resize(&output, out - out_str) < 0)
458 goto error;
459 }
460 }
461 self->seennl |= seennl;
462 }
463
464 return output;
465
466 error:
467 Py_DECREF(output);
468 return NULL;
469 }
470
471 static PyObject *
incrementalnewlinedecoder_decode(nldecoder_object * self,PyObject * args,PyObject * kwds)472 incrementalnewlinedecoder_decode(nldecoder_object *self,
473 PyObject *args, PyObject *kwds)
474 {
475 char *kwlist[] = {"input", "final", NULL};
476 PyObject *input;
477 int final = 0;
478
479 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
480 kwlist, &input, &final))
481 return NULL;
482 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
483 }
484
485 static PyObject *
incrementalnewlinedecoder_getstate(nldecoder_object * self,PyObject * args)486 incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
487 {
488 PyObject *buffer;
489 unsigned PY_LONG_LONG flag;
490
491 if (self->decoder != Py_None) {
492 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
493 _PyIO_str_getstate, NULL);
494 if (state == NULL)
495 return NULL;
496 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
497 Py_DECREF(state);
498 return NULL;
499 }
500 Py_INCREF(buffer);
501 Py_DECREF(state);
502 }
503 else {
504 buffer = PyBytes_FromString("");
505 flag = 0;
506 }
507 flag <<= 1;
508 if (self->pendingcr)
509 flag |= 1;
510 return Py_BuildValue("NK", buffer, flag);
511 }
512
513 static PyObject *
incrementalnewlinedecoder_setstate(nldecoder_object * self,PyObject * state)514 incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
515 {
516 PyObject *buffer;
517 unsigned PY_LONG_LONG flag;
518
519 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
520 return NULL;
521
522 self->pendingcr = (int) flag & 1;
523 flag >>= 1;
524
525 if (self->decoder != Py_None)
526 return PyObject_CallMethod(self->decoder,
527 "setstate", "((OK))", buffer, flag);
528 else
529 Py_RETURN_NONE;
530 }
531
532 static PyObject *
incrementalnewlinedecoder_reset(nldecoder_object * self,PyObject * args)533 incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
534 {
535 self->seennl = 0;
536 self->pendingcr = 0;
537 if (self->decoder != Py_None)
538 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
539 else
540 Py_RETURN_NONE;
541 }
542
543 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)544 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
545 {
546 switch (self->seennl) {
547 case SEEN_CR:
548 return PyUnicode_FromString("\r");
549 case SEEN_LF:
550 return PyUnicode_FromString("\n");
551 case SEEN_CRLF:
552 return PyUnicode_FromString("\r\n");
553 case SEEN_CR | SEEN_LF:
554 return Py_BuildValue("ss", "\r", "\n");
555 case SEEN_CR | SEEN_CRLF:
556 return Py_BuildValue("ss", "\r", "\r\n");
557 case SEEN_LF | SEEN_CRLF:
558 return Py_BuildValue("ss", "\n", "\r\n");
559 case SEEN_CR | SEEN_LF | SEEN_CRLF:
560 return Py_BuildValue("sss", "\r", "\n", "\r\n");
561 default:
562 Py_RETURN_NONE;
563 }
564
565 }
566
567
568 static PyMethodDef incrementalnewlinedecoder_methods[] = {
569 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
570 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
571 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
572 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
573 {NULL}
574 };
575
576 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
577 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
578 {NULL}
579 };
580
581 PyTypeObject PyIncrementalNewlineDecoder_Type = {
582 PyVarObject_HEAD_INIT(NULL, 0)
583 "_io.IncrementalNewlineDecoder", /*tp_name*/
584 sizeof(nldecoder_object), /*tp_basicsize*/
585 0, /*tp_itemsize*/
586 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
587 0, /*tp_print*/
588 0, /*tp_getattr*/
589 0, /*tp_setattr*/
590 0, /*tp_compare */
591 0, /*tp_repr*/
592 0, /*tp_as_number*/
593 0, /*tp_as_sequence*/
594 0, /*tp_as_mapping*/
595 0, /*tp_hash */
596 0, /*tp_call*/
597 0, /*tp_str*/
598 0, /*tp_getattro*/
599 0, /*tp_setattro*/
600 0, /*tp_as_buffer*/
601 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
602 incrementalnewlinedecoder_doc, /* tp_doc */
603 0, /* tp_traverse */
604 0, /* tp_clear */
605 0, /* tp_richcompare */
606 0, /*tp_weaklistoffset*/
607 0, /* tp_iter */
608 0, /* tp_iternext */
609 incrementalnewlinedecoder_methods, /* tp_methods */
610 0, /* tp_members */
611 incrementalnewlinedecoder_getset, /* tp_getset */
612 0, /* tp_base */
613 0, /* tp_dict */
614 0, /* tp_descr_get */
615 0, /* tp_descr_set */
616 0, /* tp_dictoffset */
617 (initproc)incrementalnewlinedecoder_init, /* tp_init */
618 0, /* tp_alloc */
619 PyType_GenericNew, /* tp_new */
620 };
621
622
623 /* TextIOWrapper */
624
625 PyDoc_STRVAR(textiowrapper_doc,
626 "Character and line based layer over a BufferedIOBase object, buffer.\n"
627 "\n"
628 "encoding gives the name of the encoding that the stream will be\n"
629 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
630 "\n"
631 "errors determines the strictness of encoding and decoding (see the\n"
632 "codecs.register) and defaults to \"strict\".\n"
633 "\n"
634 "newline controls how line endings are handled. It can be None, '',\n"
635 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
636 "\n"
637 "* On input, if newline is None, universal newlines mode is\n"
638 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
639 " these are translated into '\\n' before being returned to the\n"
640 " caller. If it is '', universal newline mode is enabled, but line\n"
641 " endings are returned to the caller untranslated. If it has any of\n"
642 " the other legal values, input lines are only terminated by the given\n"
643 " string, and the line ending is returned to the caller untranslated.\n"
644 "\n"
645 "* On output, if newline is None, any '\\n' characters written are\n"
646 " translated to the system default line separator, os.linesep. If\n"
647 " newline is '', no translation takes place. If newline is any of the\n"
648 " other legal values, any '\\n' characters written are translated to\n"
649 " the given string.\n"
650 "\n"
651 "If line_buffering is True, a call to flush is implied when a call to\n"
652 "write contains a newline character."
653 );
654
655 typedef PyObject *
656 (*encodefunc_t)(PyObject *, PyObject *);
657
658 typedef struct
659 {
660 PyObject_HEAD
661 int ok; /* initialized? */
662 int detached;
663 Py_ssize_t chunk_size;
664 PyObject *buffer;
665 PyObject *encoding;
666 PyObject *encoder;
667 PyObject *decoder;
668 PyObject *readnl;
669 PyObject *errors;
670 const char *writenl; /* utf-8 encoded, NULL stands for \n */
671 char line_buffering;
672 char readuniversal;
673 char readtranslate;
674 char writetranslate;
675 char seekable;
676 char telling;
677 /* Specialized encoding func (see below) */
678 encodefunc_t encodefunc;
679 /* Whether or not it's the start of the stream */
680 char encoding_start_of_stream;
681
682 /* Reads and writes are internally buffered in order to speed things up.
683 However, any read will first flush the write buffer if itsn't empty.
684
685 Please also note that text to be written is first encoded before being
686 buffered. This is necessary so that encoding errors are immediately
687 reported to the caller, but it unfortunately means that the
688 IncrementalEncoder (whose encode() method is always written in Python)
689 becomes a bottleneck for small writes.
690 */
691 PyObject *decoded_chars; /* buffer for text returned from decoder */
692 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
693 PyObject *pending_bytes; /* list of bytes objects waiting to be
694 written, or NULL */
695 Py_ssize_t pending_bytes_count;
696 PyObject *snapshot;
697 /* snapshot is either None, or a tuple (dec_flags, next_input) where
698 * dec_flags is the second (integer) item of the decoder state and
699 * next_input is the chunk of input bytes that comes next after the
700 * snapshot point. We use this to reconstruct decoder states in tell().
701 */
702
703 /* Cache raw object if it's a FileIO object */
704 PyObject *raw;
705
706 PyObject *weakreflist;
707 PyObject *dict;
708 } textio;
709
710
711 /* A couple of specialized cases in order to bypass the slow incremental
712 encoding methods for the most popular encodings. */
713
714 static PyObject *
ascii_encode(textio * self,PyObject * text)715 ascii_encode(textio *self, PyObject *text)
716 {
717 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
718 PyUnicode_GET_SIZE(text),
719 PyBytes_AS_STRING(self->errors));
720 }
721
722 static PyObject *
utf16be_encode(textio * self,PyObject * text)723 utf16be_encode(textio *self, PyObject *text)
724 {
725 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
726 PyUnicode_GET_SIZE(text),
727 PyBytes_AS_STRING(self->errors), 1);
728 }
729
730 static PyObject *
utf16le_encode(textio * self,PyObject * text)731 utf16le_encode(textio *self, PyObject *text)
732 {
733 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
734 PyUnicode_GET_SIZE(text),
735 PyBytes_AS_STRING(self->errors), -1);
736 }
737
738 static PyObject *
utf16_encode(textio * self,PyObject * text)739 utf16_encode(textio *self, PyObject *text)
740 {
741 if (!self->encoding_start_of_stream) {
742 /* Skip the BOM and use native byte ordering */
743 #if defined(WORDS_BIGENDIAN)
744 return utf16be_encode(self, text);
745 #else
746 return utf16le_encode(self, text);
747 #endif
748 }
749 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
750 PyUnicode_GET_SIZE(text),
751 PyBytes_AS_STRING(self->errors), 0);
752 }
753
754 static PyObject *
utf32be_encode(textio * self,PyObject * text)755 utf32be_encode(textio *self, PyObject *text)
756 {
757 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
758 PyUnicode_GET_SIZE(text),
759 PyBytes_AS_STRING(self->errors), 1);
760 }
761
762 static PyObject *
utf32le_encode(textio * self,PyObject * text)763 utf32le_encode(textio *self, PyObject *text)
764 {
765 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
766 PyUnicode_GET_SIZE(text),
767 PyBytes_AS_STRING(self->errors), -1);
768 }
769
770 static PyObject *
utf32_encode(textio * self,PyObject * text)771 utf32_encode(textio *self, PyObject *text)
772 {
773 if (!self->encoding_start_of_stream) {
774 /* Skip the BOM and use native byte ordering */
775 #if defined(WORDS_BIGENDIAN)
776 return utf32be_encode(self, text);
777 #else
778 return utf32le_encode(self, text);
779 #endif
780 }
781 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
782 PyUnicode_GET_SIZE(text),
783 PyBytes_AS_STRING(self->errors), 0);
784 }
785
786 static PyObject *
utf8_encode(textio * self,PyObject * text)787 utf8_encode(textio *self, PyObject *text)
788 {
789 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
790 PyUnicode_GET_SIZE(text),
791 PyBytes_AS_STRING(self->errors));
792 }
793
794 static PyObject *
latin1_encode(textio * self,PyObject * text)795 latin1_encode(textio *self, PyObject *text)
796 {
797 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
798 PyUnicode_GET_SIZE(text),
799 PyBytes_AS_STRING(self->errors));
800 }
801
802 /* Map normalized encoding names onto the specialized encoding funcs */
803
804 typedef struct {
805 const char *name;
806 encodefunc_t encodefunc;
807 } encodefuncentry;
808
809 static encodefuncentry encodefuncs[] = {
810 {"ascii", (encodefunc_t) ascii_encode},
811 {"iso8859-1", (encodefunc_t) latin1_encode},
812 {"utf-8", (encodefunc_t) utf8_encode},
813 {"utf-16-be", (encodefunc_t) utf16be_encode},
814 {"utf-16-le", (encodefunc_t) utf16le_encode},
815 {"utf-16", (encodefunc_t) utf16_encode},
816 {"utf-32-be", (encodefunc_t) utf32be_encode},
817 {"utf-32-le", (encodefunc_t) utf32le_encode},
818 {"utf-32", (encodefunc_t) utf32_encode},
819 {NULL, NULL}
820 };
821
822
823 static int
textiowrapper_init(textio * self,PyObject * args,PyObject * kwds)824 textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
825 {
826 char *kwlist[] = {"buffer", "encoding", "errors",
827 "newline", "line_buffering",
828 NULL};
829 PyObject *buffer, *raw, *codec_info = NULL;
830 char *encoding = NULL;
831 char *errors = NULL;
832 char *newline = NULL;
833 int line_buffering = 0;
834
835 PyObject *res;
836 int r;
837
838 self->ok = 0;
839 self->detached = 0;
840 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
841 kwlist, &buffer, &encoding, &errors,
842 &newline, &line_buffering))
843 return -1;
844
845 if (newline && newline[0] != '\0'
846 && !(newline[0] == '\n' && newline[1] == '\0')
847 && !(newline[0] == '\r' && newline[1] == '\0')
848 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
849 PyErr_Format(PyExc_ValueError,
850 "illegal newline value: %s", newline);
851 return -1;
852 }
853
854 Py_CLEAR(self->buffer);
855 Py_CLEAR(self->encoding);
856 Py_CLEAR(self->encoder);
857 Py_CLEAR(self->decoder);
858 Py_CLEAR(self->readnl);
859 Py_CLEAR(self->decoded_chars);
860 Py_CLEAR(self->pending_bytes);
861 Py_CLEAR(self->snapshot);
862 Py_CLEAR(self->errors);
863 Py_CLEAR(self->raw);
864 self->decoded_chars_used = 0;
865 self->pending_bytes_count = 0;
866 self->encodefunc = NULL;
867 self->writenl = NULL;
868
869 if (encoding == NULL && self->encoding == NULL) {
870 if (_PyIO_locale_module == NULL) {
871 _PyIO_locale_module = PyImport_ImportModule("locale");
872 if (_PyIO_locale_module == NULL)
873 goto catch_ImportError;
874 else
875 goto use_locale;
876 }
877 else {
878 use_locale:
879 self->encoding = PyObject_CallMethod(
880 _PyIO_locale_module, "getpreferredencoding", NULL);
881 if (self->encoding == NULL) {
882 catch_ImportError:
883 /*
884 Importing locale can raise an ImportError because of
885 _functools, and locale.getpreferredencoding can raise an
886 ImportError if _locale is not available. These will happen
887 during module building.
888 */
889 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
890 PyErr_Clear();
891 self->encoding = PyString_FromString("ascii");
892 }
893 else
894 goto error;
895 }
896 else if (!PyString_Check(self->encoding))
897 Py_CLEAR(self->encoding);
898 }
899 }
900 if (self->encoding != NULL)
901 encoding = PyString_AsString(self->encoding);
902 else if (encoding != NULL) {
903 self->encoding = PyString_FromString(encoding);
904 if (self->encoding == NULL)
905 goto error;
906 }
907 else {
908 PyErr_SetString(PyExc_IOError,
909 "could not determine default encoding");
910 }
911
912 /* Check we have been asked for a real text encoding */
913 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
914 if (codec_info == NULL) {
915 Py_CLEAR(self->encoding);
916 goto error;
917 }
918
919 /* XXX: Failures beyond this point have the potential to leak elements
920 * of the partially constructed object (like self->encoding)
921 */
922
923 if (errors == NULL)
924 errors = "strict";
925 self->errors = PyBytes_FromString(errors);
926 if (self->errors == NULL)
927 goto error;
928
929 self->chunk_size = 8192;
930 self->readuniversal = (newline == NULL || newline[0] == '\0');
931 self->line_buffering = line_buffering;
932 self->readtranslate = (newline == NULL);
933 if (newline) {
934 self->readnl = PyString_FromString(newline);
935 if (self->readnl == NULL)
936 goto error;
937 }
938 self->writetranslate = (newline == NULL || newline[0] != '\0');
939 if (!self->readuniversal && self->writetranslate) {
940 self->writenl = PyString_AsString(self->readnl);
941 if (!strcmp(self->writenl, "\n"))
942 self->writenl = NULL;
943 }
944 #ifdef MS_WINDOWS
945 else
946 self->writenl = "\r\n";
947 #endif
948
949 /* Build the decoder object */
950 res = PyObject_CallMethod(buffer, "readable", NULL);
951 if (res == NULL)
952 goto error;
953 r = PyObject_IsTrue(res);
954 Py_DECREF(res);
955 if (r == -1)
956 goto error;
957 if (r == 1) {
958 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
959 errors);
960 if (self->decoder == NULL)
961 goto error;
962
963 if (self->readuniversal) {
964 PyObject *incrementalDecoder = PyObject_CallFunction(
965 (PyObject *)&PyIncrementalNewlineDecoder_Type,
966 "Oi", self->decoder, (int)self->readtranslate);
967 if (incrementalDecoder == NULL)
968 goto error;
969 Py_XSETREF(self->decoder, incrementalDecoder);
970 }
971 }
972
973 /* Build the encoder object */
974 res = PyObject_CallMethod(buffer, "writable", NULL);
975 if (res == NULL)
976 goto error;
977 r = PyObject_IsTrue(res);
978 Py_DECREF(res);
979 if (r == -1)
980 goto error;
981 if (r == 1) {
982 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
983 errors);
984 if (self->encoder == NULL)
985 goto error;
986 /* Get the normalized named of the codec */
987 res = PyObject_GetAttrString(codec_info, "name");
988 if (res == NULL) {
989 if (PyErr_ExceptionMatches(PyExc_AttributeError))
990 PyErr_Clear();
991 else
992 goto error;
993 }
994 else if (PyString_Check(res)) {
995 encodefuncentry *e = encodefuncs;
996 while (e->name != NULL) {
997 if (!strcmp(PyString_AS_STRING(res), e->name)) {
998 self->encodefunc = e->encodefunc;
999 break;
1000 }
1001 e++;
1002 }
1003 }
1004 Py_XDECREF(res);
1005 }
1006
1007 /* Finished sorting out the codec details */
1008 Py_DECREF(codec_info);
1009
1010 self->buffer = buffer;
1011 Py_INCREF(buffer);
1012
1013 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1014 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1015 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1016 raw = PyObject_GetAttrString(buffer, "raw");
1017 /* Cache the raw FileIO object to speed up 'closed' checks */
1018 if (raw == NULL) {
1019 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1020 PyErr_Clear();
1021 else
1022 goto error;
1023 }
1024 else if (Py_TYPE(raw) == &PyFileIO_Type)
1025 self->raw = raw;
1026 else
1027 Py_DECREF(raw);
1028 }
1029
1030 res = PyObject_CallMethod(buffer, "seekable", NULL);
1031 if (res == NULL)
1032 goto error;
1033 r = PyObject_IsTrue(res);
1034 Py_DECREF(res);
1035 if (r < 0)
1036 goto error;
1037 self->seekable = self->telling = r;
1038
1039 self->encoding_start_of_stream = 0;
1040 if (self->seekable && self->encoder) {
1041 PyObject *cookieObj;
1042 int cmp;
1043
1044 self->encoding_start_of_stream = 1;
1045
1046 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1047 if (cookieObj == NULL)
1048 goto error;
1049
1050 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1051 Py_DECREF(cookieObj);
1052 if (cmp < 0) {
1053 goto error;
1054 }
1055
1056 if (cmp == 0) {
1057 self->encoding_start_of_stream = 0;
1058 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1059 _PyIO_zero, NULL);
1060 if (res == NULL)
1061 goto error;
1062 Py_DECREF(res);
1063 }
1064 }
1065
1066 self->ok = 1;
1067 return 0;
1068
1069 error:
1070 Py_XDECREF(codec_info);
1071 return -1;
1072 }
1073
1074 static void
_textiowrapper_clear(textio * self)1075 _textiowrapper_clear(textio *self)
1076 {
1077 self->ok = 0;
1078 Py_CLEAR(self->buffer);
1079 Py_CLEAR(self->encoding);
1080 Py_CLEAR(self->encoder);
1081 Py_CLEAR(self->decoder);
1082 Py_CLEAR(self->readnl);
1083 Py_CLEAR(self->decoded_chars);
1084 Py_CLEAR(self->pending_bytes);
1085 Py_CLEAR(self->snapshot);
1086 Py_CLEAR(self->errors);
1087 Py_CLEAR(self->raw);
1088
1089 Py_CLEAR(self->dict);
1090 }
1091
1092 static void
textiowrapper_dealloc(textio * self)1093 textiowrapper_dealloc(textio *self)
1094 {
1095 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1096 return;
1097 _PyObject_GC_UNTRACK(self);
1098 if (self->weakreflist != NULL)
1099 PyObject_ClearWeakRefs((PyObject *)self);
1100 _textiowrapper_clear(self);
1101 Py_TYPE(self)->tp_free((PyObject *)self);
1102 }
1103
1104 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1105 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1106 {
1107 Py_VISIT(self->buffer);
1108 Py_VISIT(self->encoding);
1109 Py_VISIT(self->encoder);
1110 Py_VISIT(self->decoder);
1111 Py_VISIT(self->readnl);
1112 Py_VISIT(self->decoded_chars);
1113 Py_VISIT(self->pending_bytes);
1114 Py_VISIT(self->snapshot);
1115 Py_VISIT(self->errors);
1116 Py_VISIT(self->raw);
1117
1118 Py_VISIT(self->dict);
1119 return 0;
1120 }
1121
1122 static int
textiowrapper_clear(textio * self)1123 textiowrapper_clear(textio *self)
1124 {
1125 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1126 return -1;
1127 _textiowrapper_clear(self);
1128 return 0;
1129 }
1130
1131 static PyObject *
1132 textiowrapper_closed_get(textio *self, void *context);
1133
1134 /* This macro takes some shortcuts to make the common case faster. */
1135 #define CHECK_CLOSED(self) \
1136 do { \
1137 int r; \
1138 PyObject *_res; \
1139 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1140 if (self->raw != NULL) \
1141 r = _PyFileIO_closed(self->raw); \
1142 else { \
1143 _res = textiowrapper_closed_get(self, NULL); \
1144 if (_res == NULL) \
1145 return NULL; \
1146 r = PyObject_IsTrue(_res); \
1147 Py_DECREF(_res); \
1148 if (r < 0) \
1149 return NULL; \
1150 } \
1151 if (r > 0) { \
1152 PyErr_SetString(PyExc_ValueError, \
1153 "I/O operation on closed file."); \
1154 return NULL; \
1155 } \
1156 } \
1157 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1158 return NULL; \
1159 } while (0)
1160
1161 #define CHECK_INITIALIZED(self) \
1162 if (self->ok <= 0) { \
1163 PyErr_SetString(PyExc_ValueError, \
1164 "I/O operation on uninitialized object"); \
1165 return NULL; \
1166 }
1167
1168 #define CHECK_ATTACHED(self) \
1169 CHECK_INITIALIZED(self); \
1170 if (self->detached) { \
1171 PyErr_SetString(PyExc_ValueError, \
1172 "underlying buffer has been detached"); \
1173 return NULL; \
1174 }
1175
1176 #define CHECK_ATTACHED_INT(self) \
1177 if (self->ok <= 0) { \
1178 PyErr_SetString(PyExc_ValueError, \
1179 "I/O operation on uninitialized object"); \
1180 return -1; \
1181 } else if (self->detached) { \
1182 PyErr_SetString(PyExc_ValueError, \
1183 "underlying buffer has been detached"); \
1184 return -1; \
1185 }
1186
1187
1188 static PyObject *
textiowrapper_detach(textio * self)1189 textiowrapper_detach(textio *self)
1190 {
1191 PyObject *buffer, *res;
1192 CHECK_ATTACHED(self);
1193 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1194 if (res == NULL)
1195 return NULL;
1196 Py_DECREF(res);
1197 buffer = self->buffer;
1198 self->buffer = NULL;
1199 self->detached = 1;
1200 return buffer;
1201 }
1202
1203 Py_LOCAL_INLINE(const Py_UNICODE *)
findchar(const Py_UNICODE * s,Py_ssize_t size,Py_UNICODE ch)1204 findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1205 {
1206 /* like wcschr, but doesn't stop at NULL characters */
1207 while (size-- > 0) {
1208 if (*s == ch)
1209 return s;
1210 s++;
1211 }
1212 return NULL;
1213 }
1214
1215 /* Flush the internal write buffer. This doesn't explicitly flush the
1216 underlying buffered object, though. */
1217 static int
_textiowrapper_writeflush(textio * self)1218 _textiowrapper_writeflush(textio *self)
1219 {
1220 PyObject *pending, *b, *ret;
1221
1222 if (self->pending_bytes == NULL)
1223 return 0;
1224
1225 pending = self->pending_bytes;
1226 Py_INCREF(pending);
1227 self->pending_bytes_count = 0;
1228 Py_CLEAR(self->pending_bytes);
1229
1230 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1231 Py_DECREF(pending);
1232 if (b == NULL)
1233 return -1;
1234 ret = NULL;
1235 do {
1236 ret = PyObject_CallMethodObjArgs(self->buffer,
1237 _PyIO_str_write, b, NULL);
1238 } while (ret == NULL && _PyIO_trap_eintr());
1239 Py_DECREF(b);
1240 if (ret == NULL)
1241 return -1;
1242 Py_DECREF(ret);
1243 return 0;
1244 }
1245
1246 static PyObject *
textiowrapper_write(textio * self,PyObject * args)1247 textiowrapper_write(textio *self, PyObject *args)
1248 {
1249 PyObject *ret;
1250 PyObject *text; /* owned reference */
1251 PyObject *b;
1252 Py_ssize_t textlen;
1253 int haslf = 0;
1254 int needflush = 0;
1255
1256 CHECK_ATTACHED(self);
1257
1258 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1259 return NULL;
1260 }
1261
1262 CHECK_CLOSED(self);
1263
1264 if (self->encoder == NULL) {
1265 PyErr_SetString(PyExc_IOError, "not writable");
1266 return NULL;
1267 }
1268
1269 Py_INCREF(text);
1270
1271 textlen = PyUnicode_GetSize(text);
1272
1273 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1274 if (findchar(PyUnicode_AS_UNICODE(text),
1275 PyUnicode_GET_SIZE(text), '\n'))
1276 haslf = 1;
1277
1278 if (haslf && self->writetranslate && self->writenl != NULL) {
1279 PyObject *newtext = PyObject_CallMethod(
1280 text, "replace", "ss", "\n", self->writenl);
1281 Py_DECREF(text);
1282 if (newtext == NULL)
1283 return NULL;
1284 text = newtext;
1285 }
1286
1287 if (self->line_buffering &&
1288 (haslf ||
1289 findchar(PyUnicode_AS_UNICODE(text),
1290 PyUnicode_GET_SIZE(text), '\r')))
1291 needflush = 1;
1292
1293 /* XXX What if we were just reading? */
1294 if (self->encodefunc != NULL) {
1295 b = (*self->encodefunc)((PyObject *) self, text);
1296 self->encoding_start_of_stream = 0;
1297 }
1298 else
1299 b = PyObject_CallMethodObjArgs(self->encoder,
1300 _PyIO_str_encode, text, NULL);
1301 Py_DECREF(text);
1302 if (b == NULL)
1303 return NULL;
1304
1305 if (self->pending_bytes == NULL) {
1306 self->pending_bytes = PyList_New(0);
1307 if (self->pending_bytes == NULL) {
1308 Py_DECREF(b);
1309 return NULL;
1310 }
1311 self->pending_bytes_count = 0;
1312 }
1313 if (PyList_Append(self->pending_bytes, b) < 0) {
1314 Py_DECREF(b);
1315 return NULL;
1316 }
1317 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1318 Py_DECREF(b);
1319 if (self->pending_bytes_count > self->chunk_size || needflush) {
1320 if (_textiowrapper_writeflush(self) < 0)
1321 return NULL;
1322 }
1323
1324 if (needflush) {
1325 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1326 if (ret == NULL)
1327 return NULL;
1328 Py_DECREF(ret);
1329 }
1330
1331 Py_CLEAR(self->snapshot);
1332
1333 if (self->decoder) {
1334 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1335 if (ret == NULL)
1336 return NULL;
1337 Py_DECREF(ret);
1338 }
1339
1340 return PyLong_FromSsize_t(textlen);
1341 }
1342
1343 /* Steal a reference to chars and store it in the decoded_char buffer;
1344 */
1345 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1346 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1347 {
1348 Py_XSETREF(self->decoded_chars, chars);
1349 self->decoded_chars_used = 0;
1350 }
1351
1352 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1353 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1354 {
1355 PyObject *chars;
1356 Py_ssize_t avail;
1357
1358 if (self->decoded_chars == NULL)
1359 return PyUnicode_FromStringAndSize(NULL, 0);
1360
1361 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1362 - self->decoded_chars_used);
1363
1364 assert(avail >= 0);
1365
1366 if (n < 0 || n > avail)
1367 n = avail;
1368
1369 if (self->decoded_chars_used > 0 || n < avail) {
1370 chars = PyUnicode_FromUnicode(
1371 PyUnicode_AS_UNICODE(self->decoded_chars)
1372 + self->decoded_chars_used, n);
1373 if (chars == NULL)
1374 return NULL;
1375 }
1376 else {
1377 chars = self->decoded_chars;
1378 Py_INCREF(chars);
1379 }
1380
1381 self->decoded_chars_used += n;
1382 return chars;
1383 }
1384
1385 /* Read and decode the next chunk of data from the BufferedReader.
1386 */
1387 static int
textiowrapper_read_chunk(textio * self)1388 textiowrapper_read_chunk(textio *self)
1389 {
1390 PyObject *dec_buffer = NULL;
1391 PyObject *dec_flags = NULL;
1392 PyObject *input_chunk = NULL;
1393 PyObject *decoded_chars, *chunk_size;
1394 int eof;
1395
1396 /* The return value is True unless EOF was reached. The decoded string is
1397 * placed in self._decoded_chars (replacing its previous value). The
1398 * entire input chunk is sent to the decoder, though some of it may remain
1399 * buffered in the decoder, yet to be converted.
1400 */
1401
1402 if (self->decoder == NULL) {
1403 PyErr_SetString(PyExc_IOError, "not readable");
1404 return -1;
1405 }
1406
1407 if (self->telling) {
1408 /* To prepare for tell(), we need to snapshot a point in the file
1409 * where the decoder's input buffer is empty.
1410 */
1411
1412 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1413 _PyIO_str_getstate, NULL);
1414 if (state == NULL)
1415 return -1;
1416 /* Given this, we know there was a valid snapshot point
1417 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1418 */
1419 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1420 Py_DECREF(state);
1421 return -1;
1422 }
1423 Py_INCREF(dec_buffer);
1424 Py_INCREF(dec_flags);
1425 Py_DECREF(state);
1426 }
1427
1428 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1429 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1430 if (chunk_size == NULL)
1431 goto fail;
1432 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1433 _PyIO_str_read1, chunk_size, NULL);
1434 Py_DECREF(chunk_size);
1435 if (input_chunk == NULL)
1436 goto fail;
1437 if (!PyBytes_Check(input_chunk)) {
1438 PyErr_Format(PyExc_TypeError,
1439 "underlying read1() should have returned a bytes object, "
1440 "not '%.200s'", Py_TYPE(input_chunk)->tp_name);
1441 goto fail;
1442 }
1443
1444 eof = (PyBytes_Size(input_chunk) == 0);
1445
1446 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1447 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1448 self->decoder, input_chunk, eof);
1449 }
1450 else {
1451 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1452 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1453 }
1454
1455 if (check_decoded(decoded_chars) < 0)
1456 goto fail;
1457 textiowrapper_set_decoded_chars(self, decoded_chars);
1458 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1459 eof = 0;
1460
1461 if (self->telling) {
1462 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1463 * next input to be decoded is dec_buffer + input_chunk.
1464 */
1465 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1466 if (next_input == NULL)
1467 goto fail;
1468 if (!PyBytes_Check(next_input)) {
1469 PyErr_Format(PyExc_TypeError,
1470 "decoder getstate() should have returned a bytes "
1471 "object, not '%.200s'",
1472 Py_TYPE(next_input)->tp_name);
1473 Py_DECREF(next_input);
1474 goto fail;
1475 }
1476 Py_DECREF(dec_buffer);
1477 Py_XSETREF(self->snapshot, Py_BuildValue("NN", dec_flags, next_input));
1478 }
1479 Py_DECREF(input_chunk);
1480
1481 return (eof == 0);
1482
1483 fail:
1484 Py_XDECREF(dec_buffer);
1485 Py_XDECREF(dec_flags);
1486 Py_XDECREF(input_chunk);
1487 return -1;
1488 }
1489
1490 static PyObject *
textiowrapper_read(textio * self,PyObject * args)1491 textiowrapper_read(textio *self, PyObject *args)
1492 {
1493 Py_ssize_t n = -1;
1494 PyObject *result = NULL, *chunks = NULL;
1495
1496 CHECK_ATTACHED(self);
1497
1498 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
1499 return NULL;
1500
1501 CHECK_CLOSED(self);
1502
1503 if (self->decoder == NULL) {
1504 PyErr_SetString(PyExc_IOError, "not readable");
1505 return NULL;
1506 }
1507
1508 if (_textiowrapper_writeflush(self) < 0)
1509 return NULL;
1510
1511 if (n < 0) {
1512 /* Read everything */
1513 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1514 PyObject *decoded, *final;
1515 if (bytes == NULL)
1516 goto fail;
1517 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1518 bytes, Py_True, NULL);
1519 Py_DECREF(bytes);
1520 if (check_decoded(decoded) < 0)
1521 goto fail;
1522
1523 result = textiowrapper_get_decoded_chars(self, -1);
1524
1525 if (result == NULL) {
1526 Py_DECREF(decoded);
1527 return NULL;
1528 }
1529
1530 final = PyUnicode_Concat(result, decoded);
1531 Py_DECREF(result);
1532 Py_DECREF(decoded);
1533 if (final == NULL)
1534 goto fail;
1535
1536 Py_CLEAR(self->snapshot);
1537 return final;
1538 }
1539 else {
1540 int res = 1;
1541 Py_ssize_t remaining = n;
1542
1543 result = textiowrapper_get_decoded_chars(self, n);
1544 if (result == NULL)
1545 goto fail;
1546 remaining -= PyUnicode_GET_SIZE(result);
1547
1548 /* Keep reading chunks until we have n characters to return */
1549 while (remaining > 0) {
1550 res = textiowrapper_read_chunk(self);
1551 if (res < 0) {
1552 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1553 when EINTR occurs so we needn't do it ourselves. */
1554 if (_PyIO_trap_eintr()) {
1555 continue;
1556 }
1557 goto fail;
1558 }
1559 if (res == 0) /* EOF */
1560 break;
1561 if (chunks == NULL) {
1562 chunks = PyList_New(0);
1563 if (chunks == NULL)
1564 goto fail;
1565 }
1566 if (PyList_Append(chunks, result) < 0)
1567 goto fail;
1568 Py_DECREF(result);
1569 result = textiowrapper_get_decoded_chars(self, remaining);
1570 if (result == NULL)
1571 goto fail;
1572 remaining -= PyUnicode_GET_SIZE(result);
1573 }
1574 if (chunks != NULL) {
1575 if (result != NULL && PyList_Append(chunks, result) < 0)
1576 goto fail;
1577 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
1578 if (result == NULL)
1579 goto fail;
1580 Py_CLEAR(chunks);
1581 }
1582 return result;
1583 }
1584 fail:
1585 Py_XDECREF(result);
1586 Py_XDECREF(chunks);
1587 return NULL;
1588 }
1589
1590
1591 /* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1592 that is to the NUL character. Otherwise the function will produce
1593 incorrect results. */
1594 static Py_UNICODE *
find_control_char(Py_UNICODE * start,Py_UNICODE * end,Py_UNICODE ch)1595 find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1596 {
1597 Py_UNICODE *s = start;
1598 for (;;) {
1599 while (*s > ch)
1600 s++;
1601 if (*s == ch)
1602 return s;
1603 if (s == end)
1604 return NULL;
1605 s++;
1606 }
1607 }
1608
1609 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,Py_UNICODE * start,Py_UNICODE * end,Py_ssize_t * consumed)1610 _PyIO_find_line_ending(
1611 int translated, int universal, PyObject *readnl,
1612 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1613 {
1614 Py_ssize_t len = end - start;
1615
1616 if (translated) {
1617 /* Newlines are already translated, only search for \n */
1618 Py_UNICODE *pos = find_control_char(start, end, '\n');
1619 if (pos != NULL)
1620 return pos - start + 1;
1621 else {
1622 *consumed = len;
1623 return -1;
1624 }
1625 }
1626 else if (universal) {
1627 /* Universal newline search. Find any of \r, \r\n, \n
1628 * The decoder ensures that \r\n are not split in two pieces
1629 */
1630 Py_UNICODE *s = start;
1631 for (;;) {
1632 Py_UNICODE ch;
1633 /* Fast path for non-control chars. The loop always ends
1634 since the Py_UNICODE storage is NUL-terminated. */
1635 while (*s > '\r')
1636 s++;
1637 if (s >= end) {
1638 *consumed = len;
1639 return -1;
1640 }
1641 ch = *s++;
1642 if (ch == '\n')
1643 return s - start;
1644 if (ch == '\r') {
1645 if (*s == '\n')
1646 return s - start + 1;
1647 else
1648 return s - start;
1649 }
1650 }
1651 }
1652 else {
1653 /* Non-universal mode. */
1654 Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);
1655 unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);
1656 if (readnl_len == 1) {
1657 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1658 if (pos != NULL)
1659 return pos - start + 1;
1660 *consumed = len;
1661 return -1;
1662 }
1663 else {
1664 Py_UNICODE *s = start;
1665 Py_UNICODE *e = end - readnl_len + 1;
1666 Py_UNICODE *pos;
1667 if (e < s)
1668 e = s;
1669 while (s < e) {
1670 Py_ssize_t i;
1671 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1672 if (pos == NULL || pos >= e)
1673 break;
1674 for (i = 1; i < readnl_len; i++) {
1675 if (pos[i] != nl[i])
1676 break;
1677 }
1678 if (i == readnl_len)
1679 return pos - start + readnl_len;
1680 s = pos + 1;
1681 }
1682 pos = find_control_char(e, end, nl[0]);
1683 if (pos == NULL)
1684 *consumed = len;
1685 else
1686 *consumed = pos - start;
1687 return -1;
1688 }
1689 }
1690 }
1691
1692 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)1693 _textiowrapper_readline(textio *self, Py_ssize_t limit)
1694 {
1695 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1696 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1697 int res;
1698
1699 CHECK_CLOSED(self);
1700
1701 if (_textiowrapper_writeflush(self) < 0)
1702 return NULL;
1703
1704 chunked = 0;
1705
1706 while (1) {
1707 Py_UNICODE *ptr;
1708 Py_ssize_t line_len;
1709 Py_ssize_t consumed = 0;
1710
1711 /* First, get some data if necessary */
1712 res = 1;
1713 while (!self->decoded_chars ||
1714 !PyUnicode_GET_SIZE(self->decoded_chars)) {
1715 res = textiowrapper_read_chunk(self);
1716 if (res < 0) {
1717 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1718 when EINTR occurs so we needn't do it ourselves. */
1719 if (_PyIO_trap_eintr()) {
1720 continue;
1721 }
1722 goto error;
1723 }
1724 if (res == 0)
1725 break;
1726 }
1727 if (res == 0) {
1728 /* end of file */
1729 textiowrapper_set_decoded_chars(self, NULL);
1730 Py_CLEAR(self->snapshot);
1731 start = endpos = offset_to_buffer = 0;
1732 break;
1733 }
1734
1735 if (remaining == NULL) {
1736 line = self->decoded_chars;
1737 start = self->decoded_chars_used;
1738 offset_to_buffer = 0;
1739 Py_INCREF(line);
1740 }
1741 else {
1742 assert(self->decoded_chars_used == 0);
1743 line = PyUnicode_Concat(remaining, self->decoded_chars);
1744 start = 0;
1745 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1746 Py_CLEAR(remaining);
1747 if (line == NULL)
1748 goto error;
1749 }
1750
1751 ptr = PyUnicode_AS_UNICODE(line);
1752 line_len = PyUnicode_GET_SIZE(line);
1753
1754 endpos = _PyIO_find_line_ending(
1755 self->readtranslate, self->readuniversal, self->readnl,
1756 ptr + start, ptr + line_len, &consumed);
1757 if (endpos >= 0) {
1758 endpos += start;
1759 if (limit >= 0 && (endpos - start) + chunked >= limit)
1760 endpos = start + limit - chunked;
1761 break;
1762 }
1763
1764 /* We can put aside up to `endpos` */
1765 endpos = consumed + start;
1766 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1767 /* Didn't find line ending, but reached length limit */
1768 endpos = start + limit - chunked;
1769 break;
1770 }
1771
1772 if (endpos > start) {
1773 /* No line ending seen yet - put aside current data */
1774 PyObject *s;
1775 if (chunks == NULL) {
1776 chunks = PyList_New(0);
1777 if (chunks == NULL)
1778 goto error;
1779 }
1780 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1781 if (s == NULL)
1782 goto error;
1783 if (PyList_Append(chunks, s) < 0) {
1784 Py_DECREF(s);
1785 goto error;
1786 }
1787 chunked += PyUnicode_GET_SIZE(s);
1788 Py_DECREF(s);
1789 }
1790 /* There may be some remaining bytes we'll have to prepend to the
1791 next chunk of data */
1792 if (endpos < line_len) {
1793 remaining = PyUnicode_FromUnicode(
1794 ptr + endpos, line_len - endpos);
1795 if (remaining == NULL)
1796 goto error;
1797 }
1798 Py_CLEAR(line);
1799 /* We have consumed the buffer */
1800 textiowrapper_set_decoded_chars(self, NULL);
1801 }
1802
1803 if (line != NULL) {
1804 /* Our line ends in the current buffer */
1805 self->decoded_chars_used = endpos - offset_to_buffer;
1806 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1807 if (start == 0 && Py_REFCNT(line) == 1) {
1808 if (PyUnicode_Resize(&line, endpos) < 0)
1809 goto error;
1810 }
1811 else {
1812 PyObject *s = PyUnicode_FromUnicode(
1813 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1814 Py_CLEAR(line);
1815 if (s == NULL)
1816 goto error;
1817 line = s;
1818 }
1819 }
1820 }
1821 if (remaining != NULL) {
1822 if (chunks == NULL) {
1823 chunks = PyList_New(0);
1824 if (chunks == NULL)
1825 goto error;
1826 }
1827 if (PyList_Append(chunks, remaining) < 0)
1828 goto error;
1829 Py_CLEAR(remaining);
1830 }
1831 if (chunks != NULL) {
1832 if (line != NULL && PyList_Append(chunks, line) < 0)
1833 goto error;
1834 Py_XSETREF(line, PyUnicode_Join(_PyIO_empty_str, chunks));
1835 if (line == NULL)
1836 goto error;
1837 Py_DECREF(chunks);
1838 }
1839 if (line == NULL)
1840 line = PyUnicode_FromStringAndSize(NULL, 0);
1841
1842 return line;
1843
1844 error:
1845 Py_XDECREF(chunks);
1846 Py_XDECREF(remaining);
1847 Py_XDECREF(line);
1848 return NULL;
1849 }
1850
1851 static PyObject *
textiowrapper_readline(textio * self,PyObject * args)1852 textiowrapper_readline(textio *self, PyObject *args)
1853 {
1854 PyObject *limitobj = NULL;
1855 Py_ssize_t limit = -1;
1856
1857 CHECK_ATTACHED(self);
1858 if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {
1859 return NULL;
1860 }
1861 if (limitobj) {
1862 if (!PyNumber_Check(limitobj)) {
1863 PyErr_Format(PyExc_TypeError,
1864 "integer argument expected, got '%.200s'",
1865 Py_TYPE(limitobj)->tp_name);
1866 return NULL;
1867 }
1868 limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);
1869 if (limit == -1 && PyErr_Occurred())
1870 return NULL;
1871 }
1872 return _textiowrapper_readline(self, limit);
1873 }
1874
1875 /* Seek and Tell */
1876
1877 typedef struct {
1878 Py_off_t start_pos;
1879 int dec_flags;
1880 int bytes_to_feed;
1881 int chars_to_skip;
1882 char need_eof;
1883 } cookie_type;
1884
1885 /*
1886 To speed up cookie packing/unpacking, we store the fields in a temporary
1887 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1888 The following macros define at which offsets in the intermediary byte
1889 string the various CookieStruct fields will be stored.
1890 */
1891
1892 #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1893
1894 #if defined(WORDS_BIGENDIAN)
1895
1896 # define IS_LITTLE_ENDIAN 0
1897
1898 /* We want the least significant byte of start_pos to also be the least
1899 significant byte of the cookie, which means that in big-endian mode we
1900 must copy the fields in reverse order. */
1901
1902 # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1903 # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1904 # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1905 # define OFF_CHARS_TO_SKIP (sizeof(char))
1906 # define OFF_NEED_EOF 0
1907
1908 #else
1909
1910 # define IS_LITTLE_ENDIAN 1
1911
1912 /* Little-endian mode: the least significant byte of start_pos will
1913 naturally end up the least significant byte of the cookie. */
1914
1915 # define OFF_START_POS 0
1916 # define OFF_DEC_FLAGS (sizeof(Py_off_t))
1917 # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1918 # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1919 # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1920
1921 #endif
1922
1923 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)1924 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1925 {
1926 unsigned char buffer[COOKIE_BUF_LEN];
1927 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1928 if (cookieLong == NULL)
1929 return -1;
1930
1931 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1932 IS_LITTLE_ENDIAN, 0) < 0) {
1933 Py_DECREF(cookieLong);
1934 return -1;
1935 }
1936 Py_DECREF(cookieLong);
1937
1938 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1939 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1940 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1941 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1942 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1943
1944 return 0;
1945 }
1946
1947 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)1948 textiowrapper_build_cookie(cookie_type *cookie)
1949 {
1950 unsigned char buffer[COOKIE_BUF_LEN];
1951
1952 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1953 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1954 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1955 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1956 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1957
1958 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1959 }
1960 #undef IS_LITTLE_ENDIAN
1961
1962 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)1963 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1964 {
1965 PyObject *res;
1966 /* When seeking to the start of the stream, we call decoder.reset()
1967 rather than decoder.getstate().
1968 This is for a few decoders such as utf-16 for which the state value
1969 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1970 utf-16, that we are expecting a BOM).
1971 */
1972 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1973 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1974 else
1975 res = PyObject_CallMethod(self->decoder, "setstate",
1976 "((si))", "", cookie->dec_flags);
1977 if (res == NULL)
1978 return -1;
1979 Py_DECREF(res);
1980 return 0;
1981 }
1982
1983 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)1984 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
1985 {
1986 PyObject *res;
1987 /* Same as _textiowrapper_decoder_setstate() above. */
1988 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1989 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1990 self->encoding_start_of_stream = 1;
1991 }
1992 else {
1993 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1994 _PyIO_zero, NULL);
1995 self->encoding_start_of_stream = 0;
1996 }
1997 if (res == NULL)
1998 return -1;
1999 Py_DECREF(res);
2000 return 0;
2001 }
2002
2003 static PyObject *
textiowrapper_seek(textio * self,PyObject * args)2004 textiowrapper_seek(textio *self, PyObject *args)
2005 {
2006 PyObject *cookieObj, *posobj;
2007 cookie_type cookie;
2008 int whence = 0;
2009 PyObject *res;
2010 int cmp;
2011
2012 CHECK_ATTACHED(self);
2013
2014 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2015 return NULL;
2016 CHECK_CLOSED(self);
2017
2018 Py_INCREF(cookieObj);
2019
2020 if (!self->seekable) {
2021 PyErr_SetString(PyExc_IOError,
2022 "underlying stream is not seekable");
2023 goto fail;
2024 }
2025
2026 if (whence == 1) {
2027 /* seek relative to current position */
2028 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2029 if (cmp < 0)
2030 goto fail;
2031
2032 if (cmp == 0) {
2033 PyErr_SetString(PyExc_IOError,
2034 "can't do nonzero cur-relative seeks");
2035 goto fail;
2036 }
2037
2038 /* Seeking to the current position should attempt to
2039 * sync the underlying buffer with the current position.
2040 */
2041 Py_DECREF(cookieObj);
2042 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2043 if (cookieObj == NULL)
2044 goto fail;
2045 }
2046 else if (whence == 2) {
2047 /* seek relative to end of file */
2048
2049 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2050 if (cmp < 0)
2051 goto fail;
2052
2053 if (cmp == 0) {
2054 PyErr_SetString(PyExc_IOError,
2055 "can't do nonzero end-relative seeks");
2056 goto fail;
2057 }
2058
2059 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2060 if (res == NULL)
2061 goto fail;
2062 Py_DECREF(res);
2063
2064 textiowrapper_set_decoded_chars(self, NULL);
2065 Py_CLEAR(self->snapshot);
2066 if (self->decoder) {
2067 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2068 if (res == NULL)
2069 goto fail;
2070 Py_DECREF(res);
2071 }
2072
2073 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2074 Py_XDECREF(cookieObj);
2075 return res;
2076 }
2077 else if (whence != 0) {
2078 PyErr_Format(PyExc_ValueError,
2079 "invalid whence (%d, should be 0, 1 or 2)", whence);
2080 goto fail;
2081 }
2082
2083 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2084 if (cmp < 0)
2085 goto fail;
2086
2087 if (cmp == 1) {
2088 PyObject *repr = PyObject_Repr(cookieObj);
2089 if (repr != NULL) {
2090 PyErr_Format(PyExc_ValueError,
2091 "negative seek position %s",
2092 PyString_AS_STRING(repr));
2093 Py_DECREF(repr);
2094 }
2095 goto fail;
2096 }
2097
2098 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2099 if (res == NULL)
2100 goto fail;
2101 Py_DECREF(res);
2102
2103 /* The strategy of seek() is to go back to the safe start point
2104 * and replay the effect of read(chars_to_skip) from there.
2105 */
2106 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2107 goto fail;
2108
2109 /* Seek back to the safe start point. */
2110 posobj = PyLong_FromOff_t(cookie.start_pos);
2111 if (posobj == NULL)
2112 goto fail;
2113 res = PyObject_CallMethodObjArgs(self->buffer,
2114 _PyIO_str_seek, posobj, NULL);
2115 Py_DECREF(posobj);
2116 if (res == NULL)
2117 goto fail;
2118 Py_DECREF(res);
2119
2120 textiowrapper_set_decoded_chars(self, NULL);
2121 Py_CLEAR(self->snapshot);
2122
2123 /* Restore the decoder to its state from the safe start point. */
2124 if (self->decoder) {
2125 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2126 goto fail;
2127 }
2128
2129 if (cookie.chars_to_skip) {
2130 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2131 PyObject *input_chunk = PyObject_CallMethod(
2132 self->buffer, "read", "i", cookie.bytes_to_feed);
2133 PyObject *decoded;
2134
2135 if (input_chunk == NULL)
2136 goto fail;
2137
2138 if (!PyBytes_Check(input_chunk)) {
2139 PyErr_Format(PyExc_TypeError,
2140 "underlying read() should have returned a bytes "
2141 "object, not '%.200s'",
2142 Py_TYPE(input_chunk)->tp_name);
2143 Py_DECREF(input_chunk);
2144 goto fail;
2145 }
2146
2147 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2148 if (self->snapshot == NULL) {
2149 Py_DECREF(input_chunk);
2150 goto fail;
2151 }
2152
2153 decoded = PyObject_CallMethod(self->decoder, "decode",
2154 "Oi", input_chunk, (int)cookie.need_eof);
2155
2156 if (check_decoded(decoded) < 0)
2157 goto fail;
2158
2159 textiowrapper_set_decoded_chars(self, decoded);
2160
2161 /* Skip chars_to_skip of the decoded characters. */
2162 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2163 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2164 goto fail;
2165 }
2166 self->decoded_chars_used = cookie.chars_to_skip;
2167 }
2168 else {
2169 self->snapshot = Py_BuildValue("is", cookie.dec_flags, "");
2170 if (self->snapshot == NULL)
2171 goto fail;
2172 }
2173
2174 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2175 if (self->encoder) {
2176 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2177 goto fail;
2178 }
2179 return cookieObj;
2180 fail:
2181 Py_XDECREF(cookieObj);
2182 return NULL;
2183
2184 }
2185
2186 static PyObject *
textiowrapper_tell(textio * self,PyObject * args)2187 textiowrapper_tell(textio *self, PyObject *args)
2188 {
2189 PyObject *res;
2190 PyObject *posobj = NULL;
2191 cookie_type cookie = {0,0,0,0,0};
2192 PyObject *next_input;
2193 Py_ssize_t chars_to_skip, chars_decoded;
2194 PyObject *saved_state = NULL;
2195 char *input, *input_end;
2196
2197 CHECK_ATTACHED(self);
2198 CHECK_CLOSED(self);
2199
2200 if (!self->seekable) {
2201 PyErr_SetString(PyExc_IOError,
2202 "underlying stream is not seekable");
2203 goto fail;
2204 }
2205 if (!self->telling) {
2206 PyErr_SetString(PyExc_IOError,
2207 "telling position disabled by next() call");
2208 goto fail;
2209 }
2210
2211 if (_textiowrapper_writeflush(self) < 0)
2212 return NULL;
2213 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2214 if (res == NULL)
2215 goto fail;
2216 Py_DECREF(res);
2217
2218 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2219 if (posobj == NULL)
2220 goto fail;
2221
2222 if (self->decoder == NULL || self->snapshot == NULL) {
2223 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2224 return posobj;
2225 }
2226
2227 #if defined(HAVE_LARGEFILE_SUPPORT)
2228 cookie.start_pos = PyLong_AsLongLong(posobj);
2229 #else
2230 cookie.start_pos = PyLong_AsLong(posobj);
2231 #endif
2232 if (PyErr_Occurred())
2233 goto fail;
2234
2235 /* Skip backward to the snapshot point (see _read_chunk). */
2236 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2237 goto fail;
2238
2239 assert (PyBytes_Check(next_input));
2240
2241 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2242
2243 /* How many decoded characters have been used up since the snapshot? */
2244 if (self->decoded_chars_used == 0) {
2245 /* We haven't moved from the snapshot point. */
2246 Py_DECREF(posobj);
2247 return textiowrapper_build_cookie(&cookie);
2248 }
2249
2250 chars_to_skip = self->decoded_chars_used;
2251
2252 /* Starting from the snapshot position, we will walk the decoder
2253 * forward until it gives us enough decoded characters.
2254 */
2255 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2256 _PyIO_str_getstate, NULL);
2257 if (saved_state == NULL)
2258 goto fail;
2259
2260 /* Note our initial start point. */
2261 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2262 goto fail;
2263
2264 /* Feed the decoder one byte at a time. As we go, note the
2265 * nearest "safe start point" before the current location
2266 * (a point where the decoder has nothing buffered, so seek()
2267 * can safely start from there and advance to this location).
2268 */
2269 chars_decoded = 0;
2270 input = PyBytes_AS_STRING(next_input);
2271 input_end = input + PyBytes_GET_SIZE(next_input);
2272 while (input < input_end) {
2273 PyObject *state;
2274 char *dec_buffer;
2275 Py_ssize_t dec_buffer_len;
2276 int dec_flags;
2277
2278 PyObject *decoded = PyObject_CallMethod(
2279 self->decoder, "decode", "s#", input, (Py_ssize_t)1);
2280 if (check_decoded(decoded) < 0)
2281 goto fail;
2282 chars_decoded += PyUnicode_GET_SIZE(decoded);
2283 Py_DECREF(decoded);
2284
2285 cookie.bytes_to_feed += 1;
2286
2287 state = PyObject_CallMethodObjArgs(self->decoder,
2288 _PyIO_str_getstate, NULL);
2289 if (state == NULL)
2290 goto fail;
2291 if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2292 Py_DECREF(state);
2293 goto fail;
2294 }
2295 Py_DECREF(state);
2296
2297 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2298 /* Decoder buffer is empty, so this is a safe start point. */
2299 cookie.start_pos += cookie.bytes_to_feed;
2300 chars_to_skip -= chars_decoded;
2301 cookie.dec_flags = dec_flags;
2302 cookie.bytes_to_feed = 0;
2303 chars_decoded = 0;
2304 }
2305 if (chars_decoded >= chars_to_skip)
2306 break;
2307 input++;
2308 }
2309 if (input == input_end) {
2310 /* We didn't get enough decoded data; signal EOF to get more. */
2311 PyObject *decoded = PyObject_CallMethod(
2312 self->decoder, "decode", "si", "", /* final = */ 1);
2313 if (check_decoded(decoded) < 0)
2314 goto fail;
2315 chars_decoded += PyUnicode_GET_SIZE(decoded);
2316 Py_DECREF(decoded);
2317 cookie.need_eof = 1;
2318
2319 if (chars_decoded < chars_to_skip) {
2320 PyErr_SetString(PyExc_IOError,
2321 "can't reconstruct logical file position");
2322 goto fail;
2323 }
2324 }
2325
2326 /* finally */
2327 Py_XDECREF(posobj);
2328 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2329 Py_DECREF(saved_state);
2330 if (res == NULL)
2331 return NULL;
2332 Py_DECREF(res);
2333
2334 /* The returned cookie corresponds to the last safe start point. */
2335 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2336 return textiowrapper_build_cookie(&cookie);
2337
2338 fail:
2339 Py_XDECREF(posobj);
2340 if (saved_state) {
2341 PyObject *type, *value, *traceback;
2342 PyErr_Fetch(&type, &value, &traceback);
2343
2344 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2345 _PyErr_ReplaceException(type, value, traceback);
2346 Py_DECREF(saved_state);
2347 Py_XDECREF(res);
2348 }
2349 return NULL;
2350 }
2351
2352 static PyObject *
textiowrapper_truncate(textio * self,PyObject * args)2353 textiowrapper_truncate(textio *self, PyObject *args)
2354 {
2355 PyObject *pos = Py_None;
2356 PyObject *res;
2357
2358 CHECK_ATTACHED(self)
2359 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2360 return NULL;
2361 }
2362
2363 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2364 if (res == NULL)
2365 return NULL;
2366 Py_DECREF(res);
2367
2368 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2369 }
2370
2371 static PyObject *
textiowrapper_repr(textio * self)2372 textiowrapper_repr(textio *self)
2373 {
2374 PyObject *nameobj, *res;
2375 PyObject *namerepr = NULL, *encrepr = NULL;
2376
2377 CHECK_INITIALIZED(self);
2378
2379 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2380 if (nameobj == NULL) {
2381 if (PyErr_ExceptionMatches(PyExc_Exception))
2382 PyErr_Clear();
2383 else
2384 goto error;
2385 encrepr = PyObject_Repr(self->encoding);
2386 res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2387 PyString_AS_STRING(encrepr));
2388 }
2389 else {
2390 encrepr = PyObject_Repr(self->encoding);
2391 namerepr = PyObject_Repr(nameobj);
2392 res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2393 PyString_AS_STRING(namerepr),
2394 PyString_AS_STRING(encrepr));
2395 Py_DECREF(nameobj);
2396 }
2397 Py_XDECREF(namerepr);
2398 Py_XDECREF(encrepr);
2399 return res;
2400
2401 error:
2402 Py_XDECREF(namerepr);
2403 Py_XDECREF(encrepr);
2404 return NULL;
2405 }
2406
2407
2408 /* Inquiries */
2409
2410 static PyObject *
textiowrapper_fileno(textio * self,PyObject * args)2411 textiowrapper_fileno(textio *self, PyObject *args)
2412 {
2413 CHECK_ATTACHED(self);
2414 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2415 }
2416
2417 static PyObject *
textiowrapper_seekable(textio * self,PyObject * args)2418 textiowrapper_seekable(textio *self, PyObject *args)
2419 {
2420 CHECK_ATTACHED(self);
2421 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2422 }
2423
2424 static PyObject *
textiowrapper_readable(textio * self,PyObject * args)2425 textiowrapper_readable(textio *self, PyObject *args)
2426 {
2427 CHECK_ATTACHED(self);
2428 return PyObject_CallMethod(self->buffer, "readable", NULL);
2429 }
2430
2431 static PyObject *
textiowrapper_writable(textio * self,PyObject * args)2432 textiowrapper_writable(textio *self, PyObject *args)
2433 {
2434 CHECK_ATTACHED(self);
2435 return PyObject_CallMethod(self->buffer, "writable", NULL);
2436 }
2437
2438 static PyObject *
textiowrapper_isatty(textio * self,PyObject * args)2439 textiowrapper_isatty(textio *self, PyObject *args)
2440 {
2441 CHECK_ATTACHED(self);
2442 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2443 }
2444
2445 static PyObject *
textiowrapper_flush(textio * self,PyObject * args)2446 textiowrapper_flush(textio *self, PyObject *args)
2447 {
2448 CHECK_ATTACHED(self);
2449 CHECK_CLOSED(self);
2450 self->telling = self->seekable;
2451 if (_textiowrapper_writeflush(self) < 0)
2452 return NULL;
2453 return PyObject_CallMethod(self->buffer, "flush", NULL);
2454 }
2455
2456 static PyObject *
textiowrapper_close(textio * self,PyObject * args)2457 textiowrapper_close(textio *self, PyObject *args)
2458 {
2459 PyObject *res;
2460 int r;
2461 CHECK_ATTACHED(self);
2462
2463 res = textiowrapper_closed_get(self, NULL);
2464 if (res == NULL)
2465 return NULL;
2466 r = PyObject_IsTrue(res);
2467 Py_DECREF(res);
2468 if (r < 0)
2469 return NULL;
2470
2471 if (r > 0) {
2472 Py_RETURN_NONE; /* stream already closed */
2473 }
2474 else {
2475 PyObject *exc = NULL, *val, *tb;
2476 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2477 if (res == NULL)
2478 PyErr_Fetch(&exc, &val, &tb);
2479 else
2480 Py_DECREF(res);
2481
2482 res = PyObject_CallMethod(self->buffer, "close", NULL);
2483 if (exc != NULL) {
2484 _PyErr_ReplaceException(exc, val, tb);
2485 Py_CLEAR(res);
2486 }
2487 return res;
2488 }
2489 }
2490
2491 static PyObject *
textiowrapper_iternext(textio * self)2492 textiowrapper_iternext(textio *self)
2493 {
2494 PyObject *line;
2495
2496 CHECK_ATTACHED(self);
2497
2498 self->telling = 0;
2499 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2500 /* Skip method call overhead for speed */
2501 line = _textiowrapper_readline(self, -1);
2502 }
2503 else {
2504 line = PyObject_CallMethodObjArgs((PyObject *)self,
2505 _PyIO_str_readline, NULL);
2506 if (line && !PyUnicode_Check(line)) {
2507 PyErr_Format(PyExc_IOError,
2508 "readline() should have returned an str object, "
2509 "not '%.200s'", Py_TYPE(line)->tp_name);
2510 Py_DECREF(line);
2511 return NULL;
2512 }
2513 }
2514
2515 if (line == NULL)
2516 return NULL;
2517
2518 if (PyUnicode_GET_SIZE(line) == 0) {
2519 /* Reached EOF or would have blocked */
2520 Py_DECREF(line);
2521 Py_CLEAR(self->snapshot);
2522 self->telling = self->seekable;
2523 return NULL;
2524 }
2525
2526 return line;
2527 }
2528
2529 static PyObject *
textiowrapper_name_get(textio * self,void * context)2530 textiowrapper_name_get(textio *self, void *context)
2531 {
2532 CHECK_ATTACHED(self);
2533 return PyObject_GetAttrString(self->buffer, "name");
2534 }
2535
2536 static PyObject *
textiowrapper_closed_get(textio * self,void * context)2537 textiowrapper_closed_get(textio *self, void *context)
2538 {
2539 CHECK_ATTACHED(self);
2540 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2541 }
2542
2543 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)2544 textiowrapper_newlines_get(textio *self, void *context)
2545 {
2546 PyObject *res;
2547 CHECK_ATTACHED(self);
2548 if (self->decoder == NULL)
2549 Py_RETURN_NONE;
2550 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2551 if (res == NULL) {
2552 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2553 PyErr_Clear();
2554 Py_RETURN_NONE;
2555 }
2556 else {
2557 return NULL;
2558 }
2559 }
2560 return res;
2561 }
2562
2563 static PyObject *
textiowrapper_errors_get(textio * self,void * context)2564 textiowrapper_errors_get(textio *self, void *context)
2565 {
2566 CHECK_INITIALIZED(self);
2567 Py_INCREF(self->errors);
2568 return self->errors;
2569 }
2570
2571 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)2572 textiowrapper_chunk_size_get(textio *self, void *context)
2573 {
2574 CHECK_ATTACHED(self);
2575 return PyLong_FromSsize_t(self->chunk_size);
2576 }
2577
2578 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)2579 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2580 {
2581 Py_ssize_t n;
2582 CHECK_ATTACHED_INT(self);
2583 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2584 if (n == -1 && PyErr_Occurred())
2585 return -1;
2586 if (n <= 0) {
2587 PyErr_SetString(PyExc_ValueError,
2588 "a strictly positive integer is required");
2589 return -1;
2590 }
2591 self->chunk_size = n;
2592 return 0;
2593 }
2594
2595 static PyMethodDef textiowrapper_methods[] = {
2596 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2597 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2598 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2599 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2600 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2601 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2602
2603 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2604 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2605 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2606 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2607 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2608
2609 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2610 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2611 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2612 {NULL, NULL}
2613 };
2614
2615 static PyMemberDef textiowrapper_members[] = {
2616 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2617 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2618 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2619 {NULL}
2620 };
2621
2622 static PyGetSetDef textiowrapper_getset[] = {
2623 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2624 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2625 /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2626 */
2627 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2628 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2629 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2630 (setter)textiowrapper_chunk_size_set, NULL},
2631 {NULL}
2632 };
2633
2634 PyTypeObject PyTextIOWrapper_Type = {
2635 PyVarObject_HEAD_INIT(NULL, 0)
2636 "_io.TextIOWrapper", /*tp_name*/
2637 sizeof(textio), /*tp_basicsize*/
2638 0, /*tp_itemsize*/
2639 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2640 0, /*tp_print*/
2641 0, /*tp_getattr*/
2642 0, /*tps_etattr*/
2643 0, /*tp_compare */
2644 (reprfunc)textiowrapper_repr,/*tp_repr*/
2645 0, /*tp_as_number*/
2646 0, /*tp_as_sequence*/
2647 0, /*tp_as_mapping*/
2648 0, /*tp_hash */
2649 0, /*tp_call*/
2650 0, /*tp_str*/
2651 0, /*tp_getattro*/
2652 0, /*tp_setattro*/
2653 0, /*tp_as_buffer*/
2654 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2655 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2656 textiowrapper_doc, /* tp_doc */
2657 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2658 (inquiry)textiowrapper_clear, /* tp_clear */
2659 0, /* tp_richcompare */
2660 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2661 0, /* tp_iter */
2662 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2663 textiowrapper_methods, /* tp_methods */
2664 textiowrapper_members, /* tp_members */
2665 textiowrapper_getset, /* tp_getset */
2666 0, /* tp_base */
2667 0, /* tp_dict */
2668 0, /* tp_descr_get */
2669 0, /* tp_descr_set */
2670 offsetof(textio, dict), /*tp_dictoffset*/
2671 (initproc)textiowrapper_init, /* tp_init */
2672 0, /* tp_alloc */
2673 PyType_GenericNew, /* tp_new */
2674 };
2675