• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* File object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
6 
7 #ifdef HAVE_SYS_TYPES_H
8 #include <sys/types.h>
9 #endif /* HAVE_SYS_TYPES_H */
10 
11 #ifdef MS_WINDOWS
12 #define fileno _fileno
13 /* can simulate truncate with Win32 API functions; see file_truncate */
14 #define HAVE_FTRUNCATE
15 #define WIN32_LEAN_AND_MEAN
16 #include <windows.h>
17 #endif
18 
19 #if defined(PYOS_OS2) && defined(PYCC_GCC)
20 #include <io.h>
21 #endif
22 
23 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
24 
25 #ifdef HAVE_ERRNO_H
26 #include <errno.h>
27 #endif
28 
29 #ifdef HAVE_GETC_UNLOCKED
30 #define GETC(f) getc_unlocked(f)
31 #define FLOCKFILE(f) flockfile(f)
32 #define FUNLOCKFILE(f) funlockfile(f)
33 #else
34 #define GETC(f) getc(f)
35 #define FLOCKFILE(f)
36 #define FUNLOCKFILE(f)
37 #endif
38 
39 /* Bits in f_newlinetypes */
40 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
41 #define NEWLINE_CR 1            /* \r newline seen */
42 #define NEWLINE_LF 2            /* \n newline seen */
43 #define NEWLINE_CRLF 4          /* \r\n newline seen */
44 
45 /*
46  * These macros release the GIL while preventing the f_close() function being
47  * called in the interval between them.  For that purpose, a running total of
48  * the number of currently running unlocked code sections is kept in
49  * the unlocked_count field of the PyFileObject. The close() method raises
50  * an IOError if that field is non-zero.  See issue #815646, #595601.
51  */
52 
53 #define FILE_BEGIN_ALLOW_THREADS(fobj) \
54 { \
55     fobj->unlocked_count++; \
56     Py_BEGIN_ALLOW_THREADS
57 
58 #define FILE_END_ALLOW_THREADS(fobj) \
59     Py_END_ALLOW_THREADS \
60     fobj->unlocked_count--; \
61     assert(fobj->unlocked_count >= 0); \
62 }
63 
64 #define FILE_ABORT_ALLOW_THREADS(fobj) \
65     Py_BLOCK_THREADS \
66     fobj->unlocked_count--; \
67     assert(fobj->unlocked_count >= 0);
68 
69 #ifdef __cplusplus
70 extern "C" {
71 #endif
72 
73 FILE *
PyFile_AsFile(PyObject * f)74 PyFile_AsFile(PyObject *f)
75 {
76     if (f == NULL || !PyFile_Check(f))
77         return NULL;
78     else
79         return ((PyFileObject *)f)->f_fp;
80 }
81 
PyFile_IncUseCount(PyFileObject * fobj)82 void PyFile_IncUseCount(PyFileObject *fobj)
83 {
84     fobj->unlocked_count++;
85 }
86 
PyFile_DecUseCount(PyFileObject * fobj)87 void PyFile_DecUseCount(PyFileObject *fobj)
88 {
89     fobj->unlocked_count--;
90     assert(fobj->unlocked_count >= 0);
91 }
92 
93 PyObject *
PyFile_Name(PyObject * f)94 PyFile_Name(PyObject *f)
95 {
96     if (f == NULL || !PyFile_Check(f))
97         return NULL;
98     else
99         return ((PyFileObject *)f)->f_name;
100 }
101 
102 /* This is a safe wrapper around PyObject_Print to print to the FILE
103    of a PyFileObject. PyObject_Print releases the GIL but knows nothing
104    about PyFileObject. */
105 static int
file_PyObject_Print(PyObject * op,PyFileObject * f,int flags)106 file_PyObject_Print(PyObject *op, PyFileObject *f, int flags)
107 {
108     int result;
109     PyFile_IncUseCount(f);
110     result = PyObject_Print(op, f->f_fp, flags);
111     PyFile_DecUseCount(f);
112     return result;
113 }
114 
115 /* On Unix, fopen will succeed for directories.
116    In Python, there should be no file objects referring to
117    directories, so we need a check.  */
118 
119 static PyFileObject*
dircheck(PyFileObject * f)120 dircheck(PyFileObject* f)
121 {
122 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
123     struct stat buf;
124     int res;
125     if (f->f_fp == NULL)
126         return f;
127 
128     Py_BEGIN_ALLOW_THREADS
129     res = fstat(fileno(f->f_fp), &buf);
130     Py_END_ALLOW_THREADS
131 
132     if (res == 0 && S_ISDIR(buf.st_mode)) {
133         char *msg = strerror(EISDIR);
134         PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(isO)",
135                                               EISDIR, msg, f->f_name);
136         PyErr_SetObject(PyExc_IOError, exc);
137         Py_XDECREF(exc);
138         return NULL;
139     }
140 #endif
141     return f;
142 }
143 
144 
145 static PyObject *
fill_file_fields(PyFileObject * f,FILE * fp,PyObject * name,char * mode,int (* close)(FILE *))146 fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
147                  int (*close)(FILE *))
148 {
149     assert(name != NULL);
150     assert(f != NULL);
151     assert(PyFile_Check(f));
152     assert(f->f_fp == NULL);
153 
154     Py_DECREF(f->f_name);
155     Py_DECREF(f->f_mode);
156     Py_DECREF(f->f_encoding);
157     Py_DECREF(f->f_errors);
158 
159     Py_INCREF(name);
160     f->f_name = name;
161 
162     f->f_mode = PyString_FromString(mode);
163 
164     f->f_close = close;
165     f->f_softspace = 0;
166     f->f_binary = strchr(mode,'b') != NULL;
167     f->f_buf = NULL;
168     f->f_univ_newline = (strchr(mode, 'U') != NULL);
169     f->f_newlinetypes = NEWLINE_UNKNOWN;
170     f->f_skipnextlf = 0;
171     Py_INCREF(Py_None);
172     f->f_encoding = Py_None;
173     Py_INCREF(Py_None);
174     f->f_errors = Py_None;
175     f->readable = f->writable = 0;
176     if (strchr(mode, 'r') != NULL || f->f_univ_newline)
177         f->readable = 1;
178     if (strchr(mode, 'w') != NULL || strchr(mode, 'a') != NULL)
179         f->writable = 1;
180     if (strchr(mode, '+') != NULL)
181         f->readable = f->writable = 1;
182 
183     if (f->f_mode == NULL)
184         return NULL;
185     f->f_fp = fp;
186     f = dircheck(f);
187     return (PyObject *) f;
188 }
189 
190 #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
191 #define Py_VERIFY_WINNT
192 /* The CRT on windows compiled with Visual Studio 2005 and higher may
193  * assert if given invalid mode strings.  This is all fine and well
194  * in static languages like C where the mode string is typcially hard
195  * coded.  But in Python, were we pass in the mode string from the user,
196  * we need to verify it first manually
197  */
_PyVerify_Mode_WINNT(const char * mode)198 static int _PyVerify_Mode_WINNT(const char *mode)
199 {
200     /* See if mode string is valid on Windows to avoid hard assertions */
201     /* remove leading spacese */
202     int singles = 0;
203     int pairs = 0;
204     int encoding = 0;
205     const char *s, *c;
206 
207     while(*mode == ' ') /* strip initial spaces */
208         ++mode;
209     if (!strchr("rwa", *mode)) /* must start with one of these */
210         return 0;
211     while (*++mode) {
212         if (*mode == ' ' || *mode == 'N') /* ignore spaces and N */
213             continue;
214         s = "+TD"; /* each of this can appear only once */
215         c = strchr(s, *mode);
216         if (c) {
217             ptrdiff_t idx = s-c;
218             if (singles & (1<<idx))
219                 return 0;
220             singles |= (1<<idx);
221             continue;
222         }
223         s = "btcnSR"; /* only one of each letter in the pairs allowed */
224         c = strchr(s, *mode);
225         if (c) {
226             ptrdiff_t idx = (s-c)/2;
227             if (pairs & (1<<idx))
228                 return 0;
229             pairs |= (1<<idx);
230             continue;
231         }
232         if (*mode == ',') {
233             encoding = 1;
234             break;
235         }
236         return 0; /* found an invalid char */
237     }
238 
239     if (encoding) {
240         char *e[] = {"UTF-8", "UTF-16LE", "UNICODE"};
241         while (*mode == ' ')
242             ++mode;
243         /* find 'ccs =' */
244         if (strncmp(mode, "ccs", 3))
245             return 0;
246         mode += 3;
247         while (*mode == ' ')
248             ++mode;
249         if (*mode != '=')
250             return 0;
251         while (*mode == ' ')
252             ++mode;
253         for(encoding = 0; encoding<_countof(e); ++encoding) {
254             size_t l = strlen(e[encoding]);
255             if (!strncmp(mode, e[encoding], l)) {
256                 mode += l; /* found a valid encoding */
257                 break;
258             }
259         }
260         if (encoding == _countof(e))
261             return 0;
262     }
263     /* skip trailing spaces */
264     while (*mode == ' ')
265         ++mode;
266 
267     return *mode == '\0'; /* must be at the end of the string */
268 }
269 #endif
270 
271 /* check for known incorrect mode strings - problem is, platforms are
272    free to accept any mode characters they like and are supposed to
273    ignore stuff they don't understand... write or append mode with
274    universal newline support is expressly forbidden by PEP 278.
275    Additionally, remove the 'U' from the mode string as platforms
276    won't know what it is. Non-zero return signals an exception */
277 int
_PyFile_SanitizeMode(char * mode)278 _PyFile_SanitizeMode(char *mode)
279 {
280     char *upos;
281     size_t len = strlen(mode);
282 
283     if (!len) {
284         PyErr_SetString(PyExc_ValueError, "empty mode string");
285         return -1;
286     }
287 
288     upos = strchr(mode, 'U');
289     if (upos) {
290         memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
291 
292         if (mode[0] == 'w' || mode[0] == 'a') {
293             PyErr_Format(PyExc_ValueError, "universal newline "
294                          "mode can only be used with modes "
295                          "starting with 'r'");
296             return -1;
297         }
298 
299         if (mode[0] != 'r') {
300             memmove(mode+1, mode, strlen(mode)+1);
301             mode[0] = 'r';
302         }
303 
304         if (!strchr(mode, 'b')) {
305             memmove(mode+2, mode+1, strlen(mode));
306             mode[1] = 'b';
307         }
308     } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
309         PyErr_Format(PyExc_ValueError, "mode string must begin with "
310                     "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
311         return -1;
312     }
313 #ifdef Py_VERIFY_WINNT
314     /* additional checks on NT with visual studio 2005 and higher */
315     if (!_PyVerify_Mode_WINNT(mode)) {
316         PyErr_Format(PyExc_ValueError, "Invalid mode ('%.50s')", mode);
317         return -1;
318     }
319 #endif
320     return 0;
321 }
322 
323 static PyObject *
open_the_file(PyFileObject * f,char * name,char * mode)324 open_the_file(PyFileObject *f, char *name, char *mode)
325 {
326     char *newmode;
327     assert(f != NULL);
328     assert(PyFile_Check(f));
329 #ifdef MS_WINDOWS
330     /* windows ignores the passed name in order to support Unicode */
331     assert(f->f_name != NULL);
332 #else
333     assert(name != NULL);
334 #endif
335     assert(mode != NULL);
336     assert(f->f_fp == NULL);
337 
338     /* probably need to replace 'U' by 'rb' */
339     newmode = PyMem_MALLOC(strlen(mode) + 3);
340     if (!newmode) {
341         PyErr_NoMemory();
342         return NULL;
343     }
344     strcpy(newmode, mode);
345 
346     if (_PyFile_SanitizeMode(newmode)) {
347         f = NULL;
348         goto cleanup;
349     }
350 
351     /* rexec.py can't stop a user from getting the file() constructor --
352        all they have to do is get *any* file object f, and then do
353        type(f).  Here we prevent them from doing damage with it. */
354     if (PyEval_GetRestricted()) {
355         PyErr_SetString(PyExc_IOError,
356         "file() constructor not accessible in restricted mode");
357         f = NULL;
358         goto cleanup;
359     }
360     errno = 0;
361 
362 #ifdef MS_WINDOWS
363     if (PyUnicode_Check(f->f_name)) {
364         PyObject *wmode;
365         wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
366         if (f->f_name && wmode) {
367             FILE_BEGIN_ALLOW_THREADS(f)
368             /* PyUnicode_AS_UNICODE OK without thread
369                lock as it is a simple dereference. */
370             f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
371                               PyUnicode_AS_UNICODE(wmode));
372             FILE_END_ALLOW_THREADS(f)
373         }
374         Py_XDECREF(wmode);
375     }
376 #endif
377     if (NULL == f->f_fp && NULL != name) {
378         FILE_BEGIN_ALLOW_THREADS(f)
379         f->f_fp = fopen(name, newmode);
380         FILE_END_ALLOW_THREADS(f)
381     }
382 
383     if (f->f_fp == NULL) {
384 #if defined  _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
385         /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
386          * across all Windows flavors.  When it sets EINVAL varies
387          * across Windows flavors, the exact conditions aren't
388          * documented, and the answer lies in the OS's implementation
389          * of Win32's CreateFile function (whose source is secret).
390          * Seems the best we can do is map EINVAL to ENOENT.
391          * Starting with Visual Studio .NET 2005, EINVAL is correctly
392          * set by our CRT error handler (set in exceptions.c.)
393          */
394         if (errno == 0)         /* bad mode string */
395             errno = EINVAL;
396         else if (errno == EINVAL) /* unknown, but not a mode string */
397             errno = ENOENT;
398 #endif
399         /* EINVAL is returned when an invalid filename or
400          * an invalid mode is supplied. */
401         if (errno == EINVAL) {
402             PyObject *v;
403             char message[100];
404             PyOS_snprintf(message, 100,
405                 "invalid mode ('%.50s') or filename", mode);
406             v = Py_BuildValue("(isO)", errno, message, f->f_name);
407             if (v != NULL) {
408                 PyErr_SetObject(PyExc_IOError, v);
409                 Py_DECREF(v);
410             }
411         }
412         else
413             PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
414         f = NULL;
415     }
416     if (f != NULL)
417         f = dircheck(f);
418 
419 cleanup:
420     PyMem_FREE(newmode);
421 
422     return (PyObject *)f;
423 }
424 
425 static PyObject *
close_the_file(PyFileObject * f)426 close_the_file(PyFileObject *f)
427 {
428     int sts = 0;
429     int (*local_close)(FILE *);
430     FILE *local_fp = f->f_fp;
431     char *local_setbuf = f->f_setbuf;
432     if (local_fp != NULL) {
433         local_close = f->f_close;
434         if (local_close != NULL && f->unlocked_count > 0) {
435             if (Py_REFCNT(f) > 0) {
436                 PyErr_SetString(PyExc_IOError,
437                     "close() called during concurrent "
438                     "operation on the same file object");
439             } else {
440                 /* This should not happen unless someone is
441                  * carelessly playing with the PyFileObject
442                  * struct fields and/or its associated FILE
443                  * pointer. */
444                 PyErr_SetString(PyExc_SystemError,
445                     "PyFileObject locking error in "
446                     "destructor (refcnt <= 0 at close)");
447             }
448             return NULL;
449         }
450         /* NULL out the FILE pointer before releasing the GIL, because
451          * it will not be valid anymore after the close() function is
452          * called. */
453         f->f_fp = NULL;
454         if (local_close != NULL) {
455             /* Issue #9295: must temporarily reset f_setbuf so that another
456                thread doesn't free it when running file_close() concurrently.
457                Otherwise this close() will crash when flushing the buffer. */
458             f->f_setbuf = NULL;
459             Py_BEGIN_ALLOW_THREADS
460             errno = 0;
461             sts = (*local_close)(local_fp);
462             Py_END_ALLOW_THREADS
463             f->f_setbuf = local_setbuf;
464             if (sts == EOF)
465                 return PyErr_SetFromErrno(PyExc_IOError);
466             if (sts != 0)
467                 return PyInt_FromLong((long)sts);
468         }
469     }
470     Py_RETURN_NONE;
471 }
472 
473 PyObject *
PyFile_FromFile(FILE * fp,char * name,char * mode,int (* close)(FILE *))474 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
475 {
476     PyFileObject *f;
477     PyObject *o_name;
478 
479     f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type, NULL, NULL);
480     if (f == NULL)
481         return NULL;
482     o_name = PyString_FromString(name);
483     if (o_name == NULL) {
484         if (close != NULL && fp != NULL)
485             close(fp);
486         Py_DECREF(f);
487         return NULL;
488     }
489     if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
490         Py_DECREF(f);
491         Py_DECREF(o_name);
492         return NULL;
493     }
494     Py_DECREF(o_name);
495     return (PyObject *)f;
496 }
497 
498 PyObject *
PyFile_FromString(char * name,char * mode)499 PyFile_FromString(char *name, char *mode)
500 {
501     extern int fclose(FILE *);
502     PyFileObject *f;
503 
504     f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
505     if (f != NULL) {
506         if (open_the_file(f, name, mode) == NULL) {
507             Py_DECREF(f);
508             f = NULL;
509         }
510     }
511     return (PyObject *)f;
512 }
513 
514 void
PyFile_SetBufSize(PyObject * f,int bufsize)515 PyFile_SetBufSize(PyObject *f, int bufsize)
516 {
517     PyFileObject *file = (PyFileObject *)f;
518     if (bufsize >= 0) {
519         int type;
520         switch (bufsize) {
521         case 0:
522             type = _IONBF;
523             break;
524 #ifdef HAVE_SETVBUF
525         case 1:
526             type = _IOLBF;
527             bufsize = BUFSIZ;
528             break;
529 #endif
530         default:
531             type = _IOFBF;
532 #ifndef HAVE_SETVBUF
533             bufsize = BUFSIZ;
534 #endif
535             break;
536         }
537         fflush(file->f_fp);
538         if (type == _IONBF) {
539             PyMem_Free(file->f_setbuf);
540             file->f_setbuf = NULL;
541         } else {
542             file->f_setbuf = (char *)PyMem_Realloc(file->f_setbuf,
543                                                     bufsize);
544         }
545 #ifdef HAVE_SETVBUF
546         setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
547 #else /* !HAVE_SETVBUF */
548         setbuf(file->f_fp, file->f_setbuf);
549 #endif /* !HAVE_SETVBUF */
550     }
551 }
552 
553 /* Set the encoding used to output Unicode strings.
554    Return 1 on success, 0 on failure. */
555 
556 int
PyFile_SetEncoding(PyObject * f,const char * enc)557 PyFile_SetEncoding(PyObject *f, const char *enc)
558 {
559     return PyFile_SetEncodingAndErrors(f, enc, NULL);
560 }
561 
562 int
PyFile_SetEncodingAndErrors(PyObject * f,const char * enc,char * errors)563 PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
564 {
565     PyFileObject *file = (PyFileObject*)f;
566     PyObject *str, *oerrors;
567 
568     assert(PyFile_Check(f));
569     str = PyString_FromString(enc);
570     if (!str)
571         return 0;
572     if (errors) {
573         oerrors = PyString_FromString(errors);
574         if (!oerrors) {
575             Py_DECREF(str);
576             return 0;
577         }
578     } else {
579         oerrors = Py_None;
580         Py_INCREF(Py_None);
581     }
582     Py_SETREF(file->f_encoding, str);
583     Py_SETREF(file->f_errors, oerrors);
584     return 1;
585 }
586 
587 static PyObject *
err_closed(void)588 err_closed(void)
589 {
590     PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
591     return NULL;
592 }
593 
594 static PyObject *
err_mode(char * action)595 err_mode(char *action)
596 {
597     PyErr_Format(PyExc_IOError, "File not open for %s", action);
598     return NULL;
599 }
600 
601 /* Refuse regular file I/O if there's data in the iteration-buffer.
602  * Mixing them would cause data to arrive out of order, as the read*
603  * methods don't use the iteration buffer. */
604 static PyObject *
err_iterbuffered(void)605 err_iterbuffered(void)
606 {
607     PyErr_SetString(PyExc_ValueError,
608         "Mixing iteration and read methods would lose data");
609     return NULL;
610 }
611 
612 static void
drop_file_readahead(PyFileObject * f)613 drop_file_readahead(PyFileObject *f)
614 {
615     PyMem_FREE(f->f_buf);
616     f->f_buf = NULL;
617 }
618 
619 /* Methods */
620 
621 static void
file_dealloc(PyFileObject * f)622 file_dealloc(PyFileObject *f)
623 {
624     PyObject *ret;
625     if (f->weakreflist != NULL)
626         PyObject_ClearWeakRefs((PyObject *) f);
627     ret = close_the_file(f);
628     if (!ret) {
629         PySys_WriteStderr("close failed in file object destructor:\n");
630         PyErr_Print();
631     }
632     else {
633         Py_DECREF(ret);
634     }
635     PyMem_Free(f->f_setbuf);
636     Py_XDECREF(f->f_name);
637     Py_XDECREF(f->f_mode);
638     Py_XDECREF(f->f_encoding);
639     Py_XDECREF(f->f_errors);
640     drop_file_readahead(f);
641     Py_TYPE(f)->tp_free((PyObject *)f);
642 }
643 
644 static PyObject *
file_repr(PyFileObject * f)645 file_repr(PyFileObject *f)
646 {
647     PyObject *ret = NULL;
648     PyObject *name = NULL;
649     if (PyUnicode_Check(f->f_name)) {
650 #ifdef Py_USING_UNICODE
651         const char *name_str;
652         name = PyUnicode_AsUnicodeEscapeString(f->f_name);
653         name_str = name ? PyString_AsString(name) : "?";
654         ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
655                            f->f_fp == NULL ? "closed" : "open",
656                            name_str,
657                            PyString_AsString(f->f_mode),
658                            f);
659         Py_XDECREF(name);
660         return ret;
661 #endif
662     } else {
663         name = PyObject_Repr(f->f_name);
664         if (name == NULL)
665             return NULL;
666         ret = PyString_FromFormat("<%s file %s, mode '%s' at %p>",
667                            f->f_fp == NULL ? "closed" : "open",
668                            PyString_AsString(name),
669                            PyString_AsString(f->f_mode),
670                            f);
671         Py_XDECREF(name);
672         return ret;
673     }
674 }
675 
676 static PyObject *
file_close(PyFileObject * f)677 file_close(PyFileObject *f)
678 {
679     PyObject *sts = close_the_file(f);
680     if (sts) {
681         PyMem_Free(f->f_setbuf);
682         f->f_setbuf = NULL;
683     }
684     return sts;
685 }
686 
687 
688 /* Our very own off_t-like type, 64-bit if possible */
689 #if !defined(HAVE_LARGEFILE_SUPPORT)
690 typedef off_t Py_off_t;
691 #elif SIZEOF_OFF_T >= 8
692 typedef off_t Py_off_t;
693 #elif SIZEOF_FPOS_T >= 8
694 typedef fpos_t Py_off_t;
695 #else
696 #error "Large file support, but neither off_t nor fpos_t is large enough."
697 #endif
698 
699 
700 /* a portable fseek() function
701    return 0 on success, non-zero on failure (with errno set) */
702 static int
_portable_fseek(FILE * fp,Py_off_t offset,int whence)703 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
704 {
705 #if !defined(HAVE_LARGEFILE_SUPPORT)
706     return fseek(fp, offset, whence);
707 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
708     return fseeko(fp, offset, whence);
709 #elif defined(HAVE_FSEEK64)
710     return fseek64(fp, offset, whence);
711 #elif defined(__BEOS__)
712     return _fseek(fp, offset, whence);
713 #elif SIZEOF_FPOS_T >= 8
714     /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
715        and fgetpos() to implement fseek()*/
716     fpos_t pos;
717     switch (whence) {
718     case SEEK_END:
719 #ifdef MS_WINDOWS
720         fflush(fp);
721         if (_lseeki64(fileno(fp), 0, 2) == -1)
722             return -1;
723 #else
724         if (fseek(fp, 0, SEEK_END) != 0)
725             return -1;
726 #endif
727         /* fall through */
728     case SEEK_CUR:
729         if (fgetpos(fp, &pos) != 0)
730             return -1;
731         offset += pos;
732         break;
733     /* case SEEK_SET: break; */
734     }
735     return fsetpos(fp, &offset);
736 #else
737 #error "Large file support, but no way to fseek."
738 #endif
739 }
740 
741 
742 /* a portable ftell() function
743    Return -1 on failure with errno set appropriately, current file
744    position on success */
745 static Py_off_t
_portable_ftell(FILE * fp)746 _portable_ftell(FILE* fp)
747 {
748 #if !defined(HAVE_LARGEFILE_SUPPORT)
749     return ftell(fp);
750 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
751     return ftello(fp);
752 #elif defined(HAVE_FTELL64)
753     return ftell64(fp);
754 #elif SIZEOF_FPOS_T >= 8
755     fpos_t pos;
756     if (fgetpos(fp, &pos) != 0)
757         return -1;
758     return pos;
759 #else
760 #error "Large file support, but no way to ftell."
761 #endif
762 }
763 
764 
765 static PyObject *
file_seek(PyFileObject * f,PyObject * args)766 file_seek(PyFileObject *f, PyObject *args)
767 {
768     int whence;
769     int ret;
770     Py_off_t offset;
771     PyObject *offobj, *off_index;
772 
773     if (f->f_fp == NULL)
774         return err_closed();
775     drop_file_readahead(f);
776     whence = 0;
777     if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
778         return NULL;
779     off_index = PyNumber_Index(offobj);
780     if (!off_index) {
781         if (!PyFloat_Check(offobj))
782             return NULL;
783         /* Deprecated in 2.6 */
784         PyErr_Clear();
785         if (PyErr_WarnEx(PyExc_DeprecationWarning,
786                          "integer argument expected, got float",
787                          1) < 0)
788             return NULL;
789         off_index = offobj;
790         Py_INCREF(offobj);
791     }
792 #if !defined(HAVE_LARGEFILE_SUPPORT)
793     offset = PyInt_AsLong(off_index);
794 #else
795     offset = PyLong_Check(off_index) ?
796         PyLong_AsLongLong(off_index) : PyInt_AsLong(off_index);
797 #endif
798     Py_DECREF(off_index);
799     if (PyErr_Occurred())
800         return NULL;
801 
802     FILE_BEGIN_ALLOW_THREADS(f)
803     errno = 0;
804     ret = _portable_fseek(f->f_fp, offset, whence);
805     FILE_END_ALLOW_THREADS(f)
806 
807     if (ret != 0) {
808         PyErr_SetFromErrno(PyExc_IOError);
809         clearerr(f->f_fp);
810         return NULL;
811     }
812     f->f_skipnextlf = 0;
813     Py_INCREF(Py_None);
814     return Py_None;
815 }
816 
817 
818 #ifdef HAVE_FTRUNCATE
819 static PyObject *
file_truncate(PyFileObject * f,PyObject * args)820 file_truncate(PyFileObject *f, PyObject *args)
821 {
822     Py_off_t newsize;
823     PyObject *newsizeobj = NULL;
824     Py_off_t initialpos;
825     int ret;
826 
827     if (f->f_fp == NULL)
828         return err_closed();
829     if (!f->writable)
830         return err_mode("writing");
831     if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
832         return NULL;
833 
834     /* Get current file position.  If the file happens to be open for
835      * update and the last operation was an input operation, C doesn't
836      * define what the later fflush() will do, but we promise truncate()
837      * won't change the current position (and fflush() *does* change it
838      * then at least on Windows).  The easiest thing is to capture
839      * current pos now and seek back to it at the end.
840      */
841     FILE_BEGIN_ALLOW_THREADS(f)
842     errno = 0;
843     initialpos = _portable_ftell(f->f_fp);
844     FILE_END_ALLOW_THREADS(f)
845     if (initialpos == -1)
846         goto onioerror;
847 
848     /* Set newsize to current position if newsizeobj NULL, else to the
849      * specified value.
850      */
851     if (newsizeobj != NULL) {
852 #if !defined(HAVE_LARGEFILE_SUPPORT)
853         newsize = PyInt_AsLong(newsizeobj);
854 #else
855         newsize = PyLong_Check(newsizeobj) ?
856                         PyLong_AsLongLong(newsizeobj) :
857                 PyInt_AsLong(newsizeobj);
858 #endif
859         if (PyErr_Occurred())
860             return NULL;
861     }
862     else /* default to current position */
863         newsize = initialpos;
864 
865     /* Flush the stream.  We're mixing stream-level I/O with lower-level
866      * I/O, and a flush may be necessary to synch both platform views
867      * of the current file state.
868      */
869     FILE_BEGIN_ALLOW_THREADS(f)
870     errno = 0;
871     ret = fflush(f->f_fp);
872     FILE_END_ALLOW_THREADS(f)
873     if (ret != 0)
874         goto onioerror;
875 
876 #ifdef MS_WINDOWS
877     /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
878        so don't even try using it. */
879     {
880         HANDLE hFile;
881 
882         /* Have to move current pos to desired endpoint on Windows. */
883         FILE_BEGIN_ALLOW_THREADS(f)
884         errno = 0;
885         ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
886         FILE_END_ALLOW_THREADS(f)
887         if (ret)
888             goto onioerror;
889 
890         /* Truncate.  Note that this may grow the file! */
891         FILE_BEGIN_ALLOW_THREADS(f)
892         errno = 0;
893         hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
894         ret = hFile == (HANDLE)-1;
895         if (ret == 0) {
896             ret = SetEndOfFile(hFile) == 0;
897             if (ret)
898                 errno = EACCES;
899         }
900         FILE_END_ALLOW_THREADS(f)
901         if (ret)
902             goto onioerror;
903     }
904 #else
905     FILE_BEGIN_ALLOW_THREADS(f)
906     errno = 0;
907     ret = ftruncate(fileno(f->f_fp), newsize);
908     FILE_END_ALLOW_THREADS(f)
909     if (ret != 0)
910         goto onioerror;
911 #endif /* !MS_WINDOWS */
912 
913     /* Restore original file position. */
914     FILE_BEGIN_ALLOW_THREADS(f)
915     errno = 0;
916     ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
917     FILE_END_ALLOW_THREADS(f)
918     if (ret)
919         goto onioerror;
920 
921     Py_INCREF(Py_None);
922     return Py_None;
923 
924 onioerror:
925     PyErr_SetFromErrno(PyExc_IOError);
926     clearerr(f->f_fp);
927     return NULL;
928 }
929 #endif /* HAVE_FTRUNCATE */
930 
931 static PyObject *
file_tell(PyFileObject * f)932 file_tell(PyFileObject *f)
933 {
934     Py_off_t pos;
935 
936     if (f->f_fp == NULL)
937         return err_closed();
938     FILE_BEGIN_ALLOW_THREADS(f)
939     errno = 0;
940     pos = _portable_ftell(f->f_fp);
941     FILE_END_ALLOW_THREADS(f)
942 
943     if (pos == -1) {
944         PyErr_SetFromErrno(PyExc_IOError);
945         clearerr(f->f_fp);
946         return NULL;
947     }
948     if (f->f_skipnextlf) {
949         int c;
950         c = GETC(f->f_fp);
951         if (c == '\n') {
952             f->f_newlinetypes |= NEWLINE_CRLF;
953             pos++;
954             f->f_skipnextlf = 0;
955         } else if (c != EOF) ungetc(c, f->f_fp);
956     }
957 #if !defined(HAVE_LARGEFILE_SUPPORT)
958     return PyInt_FromLong(pos);
959 #else
960     return PyLong_FromLongLong(pos);
961 #endif
962 }
963 
964 static PyObject *
file_fileno(PyFileObject * f)965 file_fileno(PyFileObject *f)
966 {
967     if (f->f_fp == NULL)
968         return err_closed();
969     return PyInt_FromLong((long) fileno(f->f_fp));
970 }
971 
972 static PyObject *
file_flush(PyFileObject * f)973 file_flush(PyFileObject *f)
974 {
975     int res;
976 
977     if (f->f_fp == NULL)
978         return err_closed();
979     FILE_BEGIN_ALLOW_THREADS(f)
980     errno = 0;
981     res = fflush(f->f_fp);
982     FILE_END_ALLOW_THREADS(f)
983     if (res != 0) {
984         PyErr_SetFromErrno(PyExc_IOError);
985         clearerr(f->f_fp);
986         return NULL;
987     }
988     Py_INCREF(Py_None);
989     return Py_None;
990 }
991 
992 static PyObject *
file_isatty(PyFileObject * f)993 file_isatty(PyFileObject *f)
994 {
995     long res;
996     if (f->f_fp == NULL)
997         return err_closed();
998     FILE_BEGIN_ALLOW_THREADS(f)
999     res = isatty((int)fileno(f->f_fp));
1000     FILE_END_ALLOW_THREADS(f)
1001     return PyBool_FromLong(res);
1002 }
1003 
1004 
1005 #if BUFSIZ < 8192
1006 #define SMALLCHUNK 8192
1007 #else
1008 #define SMALLCHUNK BUFSIZ
1009 #endif
1010 
1011 static size_t
new_buffersize(PyFileObject * f,size_t currentsize)1012 new_buffersize(PyFileObject *f, size_t currentsize)
1013 {
1014 #ifdef HAVE_FSTAT
1015     off_t pos, end;
1016     struct stat st;
1017     int res;
1018     size_t bufsize = 0;
1019 
1020     FILE_BEGIN_ALLOW_THREADS(f)
1021     res = fstat(fileno(f->f_fp), &st);
1022 
1023     if (res == 0) {
1024         end = st.st_size;
1025         /* The following is not a bug: we really need to call lseek()
1026            *and* ftell().  The reason is that some stdio libraries
1027            mistakenly flush their buffer when ftell() is called and
1028            the lseek() call it makes fails, thereby throwing away
1029            data that cannot be recovered in any way.  To avoid this,
1030            we first test lseek(), and only call ftell() if lseek()
1031            works.  We can't use the lseek() value either, because we
1032            need to take the amount of buffered data into account.
1033            (Yet another reason why stdio stinks. :-) */
1034 
1035         pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
1036 
1037         if (pos >= 0) {
1038             pos = ftell(f->f_fp);
1039         }
1040         if (pos < 0)
1041             clearerr(f->f_fp);
1042         if (end > pos && pos >= 0)
1043             bufsize = currentsize + end - pos + 1;
1044         /* Add 1 so if the file were to grow we'd notice. */
1045     }
1046     FILE_END_ALLOW_THREADS(f)
1047     if (bufsize != 0)
1048         return bufsize;
1049 #endif
1050     /* Expand the buffer by an amount proportional to the current size,
1051        giving us amortized linear-time behavior. Use a less-than-double
1052        growth factor to avoid excessive allocation. */
1053     return currentsize + (currentsize >> 3) + 6;
1054 }
1055 
1056 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
1057 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
1058 #else
1059 #ifdef EWOULDBLOCK
1060 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
1061 #else
1062 #ifdef EAGAIN
1063 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
1064 #else
1065 #define BLOCKED_ERRNO(x) 0
1066 #endif
1067 #endif
1068 #endif
1069 
1070 static PyObject *
file_read(PyFileObject * f,PyObject * args)1071 file_read(PyFileObject *f, PyObject *args)
1072 {
1073     long bytesrequested = -1;
1074     size_t bytesread, buffersize, chunksize;
1075     PyObject *v;
1076 
1077     if (f->f_fp == NULL)
1078         return err_closed();
1079     if (!f->readable)
1080         return err_mode("reading");
1081     /* refuse to mix with f.next() */
1082     if (f->f_buf != NULL &&
1083         (f->f_bufend - f->f_bufptr) > 0 &&
1084         f->f_buf[0] != '\0')
1085         return err_iterbuffered();
1086     if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
1087         return NULL;
1088     if (bytesrequested < 0)
1089         buffersize = new_buffersize(f, (size_t)0);
1090     else
1091         buffersize = bytesrequested;
1092     if (buffersize > PY_SSIZE_T_MAX) {
1093         PyErr_SetString(PyExc_OverflowError,
1094     "requested number of bytes is more than a Python string can hold");
1095         return NULL;
1096     }
1097     v = PyString_FromStringAndSize((char *)NULL, buffersize);
1098     if (v == NULL)
1099         return NULL;
1100     bytesread = 0;
1101     for (;;) {
1102         int interrupted;
1103         FILE_BEGIN_ALLOW_THREADS(f)
1104         errno = 0;
1105         chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
1106                   buffersize - bytesread, f->f_fp, (PyObject *)f);
1107         interrupted = ferror(f->f_fp) && errno == EINTR;
1108         FILE_END_ALLOW_THREADS(f)
1109         if (interrupted) {
1110             clearerr(f->f_fp);
1111             if (PyErr_CheckSignals()) {
1112                 Py_DECREF(v);
1113                 return NULL;
1114             }
1115         }
1116         if (chunksize == 0) {
1117             if (interrupted)
1118                 continue;
1119             if (!ferror(f->f_fp))
1120                 break;
1121             clearerr(f->f_fp);
1122             /* When in non-blocking mode, data shouldn't
1123              * be discarded if a blocking signal was
1124              * received. That will also happen if
1125              * chunksize != 0, but bytesread < buffersize. */
1126             if (bytesread > 0 && BLOCKED_ERRNO(errno))
1127                 break;
1128             PyErr_SetFromErrno(PyExc_IOError);
1129             Py_DECREF(v);
1130             return NULL;
1131         }
1132         bytesread += chunksize;
1133         if (bytesread < buffersize && !interrupted) {
1134             clearerr(f->f_fp);
1135             break;
1136         }
1137         if (bytesrequested < 0) {
1138             buffersize = new_buffersize(f, buffersize);
1139             if (_PyString_Resize(&v, buffersize) < 0)
1140                 return NULL;
1141         } else {
1142             /* Got what was requested. */
1143             break;
1144         }
1145     }
1146     if (bytesread != buffersize && _PyString_Resize(&v, bytesread))
1147         return NULL;
1148     return v;
1149 }
1150 
1151 static PyObject *
file_readinto(PyFileObject * f,PyObject * args)1152 file_readinto(PyFileObject *f, PyObject *args)
1153 {
1154     char *ptr;
1155     Py_ssize_t ntodo;
1156     Py_ssize_t ndone, nnow;
1157     Py_buffer pbuf;
1158 
1159     if (f->f_fp == NULL)
1160         return err_closed();
1161     if (!f->readable)
1162         return err_mode("reading");
1163     /* refuse to mix with f.next() */
1164     if (f->f_buf != NULL &&
1165         (f->f_bufend - f->f_bufptr) > 0 &&
1166         f->f_buf[0] != '\0')
1167         return err_iterbuffered();
1168     if (!PyArg_ParseTuple(args, "w*", &pbuf))
1169         return NULL;
1170     ptr = pbuf.buf;
1171     ntodo = pbuf.len;
1172     ndone = 0;
1173     while (ntodo > 0) {
1174         int interrupted;
1175         FILE_BEGIN_ALLOW_THREADS(f)
1176         errno = 0;
1177         nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
1178                                         (PyObject *)f);
1179         interrupted = ferror(f->f_fp) && errno == EINTR;
1180         FILE_END_ALLOW_THREADS(f)
1181         if (interrupted) {
1182             clearerr(f->f_fp);
1183             if (PyErr_CheckSignals()) {
1184                 PyBuffer_Release(&pbuf);
1185                 return NULL;
1186             }
1187         }
1188         if (nnow == 0) {
1189             if (interrupted)
1190                 continue;
1191             if (!ferror(f->f_fp))
1192                 break;
1193             PyErr_SetFromErrno(PyExc_IOError);
1194             clearerr(f->f_fp);
1195             PyBuffer_Release(&pbuf);
1196             return NULL;
1197         }
1198         ndone += nnow;
1199         ntodo -= nnow;
1200     }
1201     PyBuffer_Release(&pbuf);
1202     return PyInt_FromSsize_t(ndone);
1203 }
1204 
1205 /**************************************************************************
1206 Routine to get next line using platform fgets().
1207 
1208 Under MSVC 6:
1209 
1210 + MS threadsafe getc is very slow (multiple layers of function calls before+
1211   after each character, to lock+unlock the stream).
1212 + The stream-locking functions are MS-internal -- can't access them from user
1213   code.
1214 + There's nothing Tim could find in the MS C or platform SDK libraries that
1215   can worm around this.
1216 + MS fgets locks/unlocks only once per line; it's the only hook we have.
1217 
1218 So we use fgets for speed(!), despite that it's painful.
1219 
1220 MS realloc is also slow.
1221 
1222 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
1223 have):
1224     Linux               a wash
1225     Solaris             a wash
1226     Tru64 Unix          getline_via_fgets significantly faster
1227 
1228 CAUTION:  The C std isn't clear about this:  in those cases where fgets
1229 writes something into the buffer, can it write into any position beyond the
1230 required trailing null byte?  MSVC 6 fgets does not, and no platform is (yet)
1231 known on which it does; and it would be a strange way to code fgets. Still,
1232 getline_via_fgets may not work correctly if it does.  The std test
1233 test_bufio.py should fail if platform fgets() routinely writes beyond the
1234 trailing null byte.  #define DONT_USE_FGETS_IN_GETLINE to disable this code.
1235 **************************************************************************/
1236 
1237 /* Use this routine if told to, or by default on non-get_unlocked()
1238  * platforms unless told not to.  Yikes!  Let's spell that out:
1239  * On a platform with getc_unlocked():
1240  *     By default, use getc_unlocked().
1241  *     If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
1242  * On a platform without getc_unlocked():
1243  *     By default, use fgets().
1244  *     If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
1245  */
1246 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
1247 #define USE_FGETS_IN_GETLINE
1248 #endif
1249 
1250 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
1251 #undef USE_FGETS_IN_GETLINE
1252 #endif
1253 
1254 #ifdef USE_FGETS_IN_GETLINE
1255 static PyObject*
getline_via_fgets(PyFileObject * f,FILE * fp)1256 getline_via_fgets(PyFileObject *f, FILE *fp)
1257 {
1258 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
1259  * no-realloc, one-fgets()-call path.  Boosting it isn't free, because we have
1260  * to fill this much of the buffer with a known value in order to figure out
1261  * how much of the buffer fgets() overwrites.  So if INITBUFSIZE is larger
1262  * than "most" lines, we waste time filling unused buffer slots.  100 is
1263  * surely adequate for most peoples' email archives, chewing over source code,
1264  * etc -- "regular old text files".
1265  * MAXBUFSIZE is the maximum line length that lets us get away with the less
1266  * fast (but still zippy) no-realloc, two-fgets()-call path.  See above for
1267  * cautions about boosting that.  300 was chosen because the worst real-life
1268  * text-crunching job reported on Python-Dev was a mail-log crawler where over
1269  * half the lines were 254 chars.
1270  */
1271 #define INITBUFSIZE 100
1272 #define MAXBUFSIZE 300
1273     char* p;            /* temp */
1274     char buf[MAXBUFSIZE];
1275     PyObject* v;        /* the string object result */
1276     char* pvfree;       /* address of next free slot */
1277     char* pvend;    /* address one beyond last free slot */
1278     size_t nfree;       /* # of free buffer slots; pvend-pvfree */
1279     size_t total_v_size;  /* total # of slots in buffer */
1280     size_t increment;           /* amount to increment the buffer */
1281     size_t prev_v_size;
1282 
1283     /* Optimize for normal case:  avoid _PyString_Resize if at all
1284      * possible via first reading into stack buffer "buf".
1285      */
1286     total_v_size = INITBUFSIZE;         /* start small and pray */
1287     pvfree = buf;
1288     for (;;) {
1289         FILE_BEGIN_ALLOW_THREADS(f)
1290         pvend = buf + total_v_size;
1291         nfree = pvend - pvfree;
1292         memset(pvfree, '\n', nfree);
1293         assert(nfree < INT_MAX); /* Should be atmost MAXBUFSIZE */
1294         p = fgets(pvfree, (int)nfree, fp);
1295         FILE_END_ALLOW_THREADS(f)
1296 
1297         if (p == NULL) {
1298             clearerr(fp);
1299             if (PyErr_CheckSignals())
1300                 return NULL;
1301             v = PyString_FromStringAndSize(buf, pvfree - buf);
1302             return v;
1303         }
1304         /* fgets read *something* */
1305         p = memchr(pvfree, '\n', nfree);
1306         if (p != NULL) {
1307             /* Did the \n come from fgets or from us?
1308              * Since fgets stops at the first \n, and then writes
1309              * \0, if it's from fgets a \0 must be next.  But if
1310              * that's so, it could not have come from us, since
1311              * the \n's we filled the buffer with have only more
1312              * \n's to the right.
1313              */
1314             if (p+1 < pvend && *(p+1) == '\0') {
1315                 /* It's from fgets:  we win!  In particular,
1316                  * we haven't done any mallocs yet, and can
1317                  * build the final result on the first try.
1318                  */
1319                 ++p;                    /* include \n from fgets */
1320             }
1321             else {
1322                 /* Must be from us:  fgets didn't fill the
1323                  * buffer and didn't find a newline, so it
1324                  * must be the last and newline-free line of
1325                  * the file.
1326                  */
1327                 assert(p > pvfree && *(p-1) == '\0');
1328                 --p;                    /* don't include \0 from fgets */
1329             }
1330             v = PyString_FromStringAndSize(buf, p - buf);
1331             return v;
1332         }
1333         /* yuck:  fgets overwrote all the newlines, i.e. the entire
1334          * buffer.  So this line isn't over yet, or maybe it is but
1335          * we're exactly at EOF.  If we haven't already, try using the
1336          * rest of the stack buffer.
1337          */
1338         assert(*(pvend-1) == '\0');
1339         if (pvfree == buf) {
1340             pvfree = pvend - 1;                 /* overwrite trailing null */
1341             total_v_size = MAXBUFSIZE;
1342         }
1343         else
1344             break;
1345     }
1346 
1347     /* The stack buffer isn't big enough; malloc a string object and read
1348      * into its buffer.
1349      */
1350     total_v_size = MAXBUFSIZE << 1;
1351     v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1352     if (v == NULL)
1353         return v;
1354     /* copy over everything except the last null byte */
1355     memcpy(BUF(v), buf, MAXBUFSIZE-1);
1356     pvfree = BUF(v) + MAXBUFSIZE - 1;
1357 
1358     /* Keep reading stuff into v; if it ever ends successfully, break
1359      * after setting p one beyond the end of the line.  The code here is
1360      * very much like the code above, except reads into v's buffer; see
1361      * the code above for detailed comments about the logic.
1362      */
1363     for (;;) {
1364         FILE_BEGIN_ALLOW_THREADS(f)
1365         pvend = BUF(v) + total_v_size;
1366         nfree = pvend - pvfree;
1367         memset(pvfree, '\n', nfree);
1368         assert(nfree < INT_MAX);
1369         p = fgets(pvfree, (int)nfree, fp);
1370         FILE_END_ALLOW_THREADS(f)
1371 
1372         if (p == NULL) {
1373             clearerr(fp);
1374             if (PyErr_CheckSignals()) {
1375                 Py_DECREF(v);
1376                 return NULL;
1377             }
1378             p = pvfree;
1379             break;
1380         }
1381         p = memchr(pvfree, '\n', nfree);
1382         if (p != NULL) {
1383             if (p+1 < pvend && *(p+1) == '\0') {
1384                 /* \n came from fgets */
1385                 ++p;
1386                 break;
1387             }
1388             /* \n came from us; last line of file, no newline */
1389             assert(p > pvfree && *(p-1) == '\0');
1390             --p;
1391             break;
1392         }
1393         /* expand buffer and try again */
1394         assert(*(pvend-1) == '\0');
1395         increment = total_v_size >> 2;          /* mild exponential growth */
1396         prev_v_size = total_v_size;
1397         total_v_size += increment;
1398         /* check for overflow */
1399         if (total_v_size <= prev_v_size ||
1400             total_v_size > PY_SSIZE_T_MAX) {
1401             PyErr_SetString(PyExc_OverflowError,
1402                 "line is longer than a Python string can hold");
1403             Py_DECREF(v);
1404             return NULL;
1405         }
1406         if (_PyString_Resize(&v, (int)total_v_size) < 0)
1407             return NULL;
1408         /* overwrite the trailing null byte */
1409         pvfree = BUF(v) + (prev_v_size - 1);
1410     }
1411     if (BUF(v) + total_v_size != p && _PyString_Resize(&v, p - BUF(v)))
1412         return NULL;
1413     return v;
1414 #undef INITBUFSIZE
1415 #undef MAXBUFSIZE
1416 }
1417 #endif  /* ifdef USE_FGETS_IN_GETLINE */
1418 
1419 /* Internal routine to get a line.
1420    Size argument interpretation:
1421    > 0: max length;
1422    <= 0: read arbitrary line
1423 */
1424 
1425 static PyObject *
get_line(PyFileObject * f,int n)1426 get_line(PyFileObject *f, int n)
1427 {
1428     FILE *fp = f->f_fp;
1429     int c;
1430     char *buf, *end;
1431     size_t total_v_size;        /* total # of slots in buffer */
1432     size_t used_v_size;         /* # used slots in buffer */
1433     size_t increment;       /* amount to increment the buffer */
1434     PyObject *v;
1435     int newlinetypes = f->f_newlinetypes;
1436     int skipnextlf = f->f_skipnextlf;
1437     int univ_newline = f->f_univ_newline;
1438 
1439 #if defined(USE_FGETS_IN_GETLINE)
1440     if (n <= 0 && !univ_newline )
1441         return getline_via_fgets(f, fp);
1442 #endif
1443     total_v_size = n > 0 ? n : 100;
1444     v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1445     if (v == NULL)
1446         return NULL;
1447     buf = BUF(v);
1448     end = buf + total_v_size;
1449 
1450     for (;;) {
1451         FILE_BEGIN_ALLOW_THREADS(f)
1452         FLOCKFILE(fp);
1453         if (univ_newline) {
1454             c = 'x'; /* Shut up gcc warning */
1455             while ( buf != end && (c = GETC(fp)) != EOF ) {
1456                 if (skipnextlf ) {
1457                     skipnextlf = 0;
1458                     if (c == '\n') {
1459                         /* Seeing a \n here with
1460                          * skipnextlf true means we
1461                          * saw a \r before.
1462                          */
1463                         newlinetypes |= NEWLINE_CRLF;
1464                         c = GETC(fp);
1465                         if (c == EOF) break;
1466                     } else {
1467                         newlinetypes |= NEWLINE_CR;
1468                     }
1469                 }
1470                 if (c == '\r') {
1471                     skipnextlf = 1;
1472                     c = '\n';
1473                 } else if ( c == '\n')
1474                     newlinetypes |= NEWLINE_LF;
1475                 *buf++ = c;
1476                 if (c == '\n') break;
1477             }
1478             if (c == EOF) {
1479                 if (ferror(fp) && errno == EINTR) {
1480                     FUNLOCKFILE(fp);
1481                     FILE_ABORT_ALLOW_THREADS(f)
1482                     f->f_newlinetypes = newlinetypes;
1483                     f->f_skipnextlf = skipnextlf;
1484 
1485                     if (PyErr_CheckSignals()) {
1486                         Py_DECREF(v);
1487                         return NULL;
1488                     }
1489                     /* We executed Python signal handlers and got no exception.
1490                      * Now back to reading the line where we left off. */
1491                     clearerr(fp);
1492                     continue;
1493                 }
1494                 if (skipnextlf)
1495                     newlinetypes |= NEWLINE_CR;
1496             }
1497         } else /* If not universal newlines use the normal loop */
1498         while ((c = GETC(fp)) != EOF &&
1499                (*buf++ = c) != '\n' &&
1500             buf != end)
1501             ;
1502         FUNLOCKFILE(fp);
1503         FILE_END_ALLOW_THREADS(f)
1504         f->f_newlinetypes = newlinetypes;
1505         f->f_skipnextlf = skipnextlf;
1506         if (c == '\n')
1507             break;
1508         if (c == EOF) {
1509             if (ferror(fp)) {
1510                 if (errno == EINTR) {
1511                     if (PyErr_CheckSignals()) {
1512                         Py_DECREF(v);
1513                         return NULL;
1514                     }
1515                     /* We executed Python signal handlers and got no exception.
1516                      * Now back to reading the line where we left off. */
1517                     clearerr(fp);
1518                     continue;
1519                 }
1520                 PyErr_SetFromErrno(PyExc_IOError);
1521                 clearerr(fp);
1522                 Py_DECREF(v);
1523                 return NULL;
1524             }
1525             clearerr(fp);
1526             if (PyErr_CheckSignals()) {
1527                 Py_DECREF(v);
1528                 return NULL;
1529             }
1530             break;
1531         }
1532         /* Must be because buf == end */
1533         if (n > 0)
1534             break;
1535         used_v_size = total_v_size;
1536         increment = total_v_size >> 2; /* mild exponential growth */
1537         total_v_size += increment;
1538         if (total_v_size > PY_SSIZE_T_MAX) {
1539             PyErr_SetString(PyExc_OverflowError,
1540                 "line is longer than a Python string can hold");
1541             Py_DECREF(v);
1542             return NULL;
1543         }
1544         if (_PyString_Resize(&v, total_v_size) < 0)
1545             return NULL;
1546         buf = BUF(v) + used_v_size;
1547         end = BUF(v) + total_v_size;
1548     }
1549 
1550     used_v_size = buf - BUF(v);
1551     if (used_v_size != total_v_size && _PyString_Resize(&v, used_v_size))
1552         return NULL;
1553     return v;
1554 }
1555 
1556 /* External C interface */
1557 
1558 PyObject *
PyFile_GetLine(PyObject * f,int n)1559 PyFile_GetLine(PyObject *f, int n)
1560 {
1561     PyObject *result;
1562 
1563     if (f == NULL) {
1564         PyErr_BadInternalCall();
1565         return NULL;
1566     }
1567 
1568     if (PyFile_Check(f)) {
1569         PyFileObject *fo = (PyFileObject *)f;
1570         if (fo->f_fp == NULL)
1571             return err_closed();
1572         if (!fo->readable)
1573             return err_mode("reading");
1574         /* refuse to mix with f.next() */
1575         if (fo->f_buf != NULL &&
1576             (fo->f_bufend - fo->f_bufptr) > 0 &&
1577             fo->f_buf[0] != '\0')
1578             return err_iterbuffered();
1579         result = get_line(fo, n);
1580     }
1581     else {
1582         PyObject *reader;
1583         PyObject *args;
1584 
1585         reader = PyObject_GetAttrString(f, "readline");
1586         if (reader == NULL)
1587             return NULL;
1588         if (n <= 0)
1589             args = PyTuple_New(0);
1590         else
1591             args = Py_BuildValue("(i)", n);
1592         if (args == NULL) {
1593             Py_DECREF(reader);
1594             return NULL;
1595         }
1596         result = PyEval_CallObject(reader, args);
1597         Py_DECREF(reader);
1598         Py_DECREF(args);
1599         if (result != NULL && !PyString_Check(result) &&
1600             !PyUnicode_Check(result)) {
1601             Py_DECREF(result);
1602             result = NULL;
1603             PyErr_SetString(PyExc_TypeError,
1604                        "object.readline() returned non-string");
1605         }
1606     }
1607 
1608     if (n < 0 && result != NULL && PyString_Check(result)) {
1609         char *s = PyString_AS_STRING(result);
1610         Py_ssize_t len = PyString_GET_SIZE(result);
1611         if (len == 0) {
1612             Py_DECREF(result);
1613             result = NULL;
1614             PyErr_SetString(PyExc_EOFError,
1615                             "EOF when reading a line");
1616         }
1617         else if (s[len-1] == '\n') {
1618             if (result->ob_refcnt == 1) {
1619                 if (_PyString_Resize(&result, len-1))
1620                     return NULL;
1621             }
1622             else {
1623                 PyObject *v;
1624                 v = PyString_FromStringAndSize(s, len-1);
1625                 Py_DECREF(result);
1626                 result = v;
1627             }
1628         }
1629     }
1630 #ifdef Py_USING_UNICODE
1631     if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1632         Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1633         Py_ssize_t len = PyUnicode_GET_SIZE(result);
1634         if (len == 0) {
1635             Py_DECREF(result);
1636             result = NULL;
1637             PyErr_SetString(PyExc_EOFError,
1638                             "EOF when reading a line");
1639         }
1640         else if (s[len-1] == '\n') {
1641             if (result->ob_refcnt == 1)
1642                 PyUnicode_Resize(&result, len-1);
1643             else {
1644                 PyObject *v;
1645                 v = PyUnicode_FromUnicode(s, len-1);
1646                 Py_DECREF(result);
1647                 result = v;
1648             }
1649         }
1650     }
1651 #endif
1652     return result;
1653 }
1654 
1655 /* Python method */
1656 
1657 static PyObject *
file_readline(PyFileObject * f,PyObject * args)1658 file_readline(PyFileObject *f, PyObject *args)
1659 {
1660     int n = -1;
1661 
1662     if (f->f_fp == NULL)
1663         return err_closed();
1664     if (!f->readable)
1665         return err_mode("reading");
1666     /* refuse to mix with f.next() */
1667     if (f->f_buf != NULL &&
1668         (f->f_bufend - f->f_bufptr) > 0 &&
1669         f->f_buf[0] != '\0')
1670         return err_iterbuffered();
1671     if (!PyArg_ParseTuple(args, "|i:readline", &n))
1672         return NULL;
1673     if (n == 0)
1674         return PyString_FromString("");
1675     if (n < 0)
1676         n = 0;
1677     return get_line(f, n);
1678 }
1679 
1680 static PyObject *
file_readlines(PyFileObject * f,PyObject * args)1681 file_readlines(PyFileObject *f, PyObject *args)
1682 {
1683     long sizehint = 0;
1684     PyObject *list = NULL;
1685     PyObject *line;
1686     char small_buffer[SMALLCHUNK];
1687     char *buffer = small_buffer;
1688     size_t buffersize = SMALLCHUNK;
1689     PyObject *big_buffer = NULL;
1690     size_t nfilled = 0;
1691     size_t nread;
1692     size_t totalread = 0;
1693     char *p, *q, *end;
1694     int err;
1695     int shortread = 0;  /* bool, did the previous read come up short? */
1696 
1697     if (f->f_fp == NULL)
1698         return err_closed();
1699     if (!f->readable)
1700         return err_mode("reading");
1701     /* refuse to mix with f.next() */
1702     if (f->f_buf != NULL &&
1703         (f->f_bufend - f->f_bufptr) > 0 &&
1704         f->f_buf[0] != '\0')
1705         return err_iterbuffered();
1706     if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1707         return NULL;
1708     if ((list = PyList_New(0)) == NULL)
1709         return NULL;
1710     for (;;) {
1711         if (shortread)
1712             nread = 0;
1713         else {
1714             FILE_BEGIN_ALLOW_THREADS(f)
1715             errno = 0;
1716             nread = Py_UniversalNewlineFread(buffer+nfilled,
1717                 buffersize-nfilled, f->f_fp, (PyObject *)f);
1718             FILE_END_ALLOW_THREADS(f)
1719             shortread = (nread < buffersize-nfilled);
1720         }
1721         if (nread == 0) {
1722             sizehint = 0;
1723             if (!ferror(f->f_fp))
1724                 break;
1725             if (errno == EINTR) {
1726                 if (PyErr_CheckSignals()) {
1727                     goto error;
1728                 }
1729                 clearerr(f->f_fp);
1730                 shortread = 0;
1731                 continue;
1732             }
1733             PyErr_SetFromErrno(PyExc_IOError);
1734             clearerr(f->f_fp);
1735             goto error;
1736         }
1737         totalread += nread;
1738         p = (char *)memchr(buffer+nfilled, '\n', nread);
1739         if (p == NULL) {
1740             /* Need a larger buffer to fit this line */
1741             nfilled += nread;
1742             buffersize *= 2;
1743             if (buffersize > PY_SSIZE_T_MAX) {
1744                 PyErr_SetString(PyExc_OverflowError,
1745                 "line is longer than a Python string can hold");
1746                 goto error;
1747             }
1748             if (big_buffer == NULL) {
1749                 /* Create the big buffer */
1750                 big_buffer = PyString_FromStringAndSize(
1751                     NULL, buffersize);
1752                 if (big_buffer == NULL)
1753                     goto error;
1754                 buffer = PyString_AS_STRING(big_buffer);
1755                 memcpy(buffer, small_buffer, nfilled);
1756             }
1757             else {
1758                 /* Grow the big buffer */
1759                 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1760                     goto error;
1761                 buffer = PyString_AS_STRING(big_buffer);
1762             }
1763             continue;
1764         }
1765         end = buffer+nfilled+nread;
1766         q = buffer;
1767         do {
1768             /* Process complete lines */
1769             p++;
1770             line = PyString_FromStringAndSize(q, p-q);
1771             if (line == NULL)
1772                 goto error;
1773             err = PyList_Append(list, line);
1774             Py_DECREF(line);
1775             if (err != 0)
1776                 goto error;
1777             q = p;
1778             p = (char *)memchr(q, '\n', end-q);
1779         } while (p != NULL);
1780         /* Move the remaining incomplete line to the start */
1781         nfilled = end-q;
1782         memmove(buffer, q, nfilled);
1783         if (sizehint > 0)
1784             if (totalread >= (size_t)sizehint)
1785                 break;
1786     }
1787     if (nfilled != 0) {
1788         /* Partial last line */
1789         line = PyString_FromStringAndSize(buffer, nfilled);
1790         if (line == NULL)
1791             goto error;
1792         if (sizehint > 0) {
1793             /* Need to complete the last line */
1794             PyObject *rest = get_line(f, 0);
1795             if (rest == NULL) {
1796                 Py_DECREF(line);
1797                 goto error;
1798             }
1799             PyString_Concat(&line, rest);
1800             Py_DECREF(rest);
1801             if (line == NULL)
1802                 goto error;
1803         }
1804         err = PyList_Append(list, line);
1805         Py_DECREF(line);
1806         if (err != 0)
1807             goto error;
1808     }
1809 
1810 cleanup:
1811     Py_XDECREF(big_buffer);
1812     return list;
1813 
1814 error:
1815     Py_CLEAR(list);
1816     goto cleanup;
1817 }
1818 
1819 static PyObject *
file_write(PyFileObject * f,PyObject * args)1820 file_write(PyFileObject *f, PyObject *args)
1821 {
1822     Py_buffer pbuf;
1823     const char *s;
1824     Py_ssize_t n, n2;
1825     PyObject *encoded = NULL;
1826     int err_flag = 0, err;
1827 
1828     if (f->f_fp == NULL)
1829         return err_closed();
1830     if (!f->writable)
1831         return err_mode("writing");
1832     if (f->f_binary) {
1833         if (!PyArg_ParseTuple(args, "s*", &pbuf))
1834             return NULL;
1835         s = pbuf.buf;
1836         n = pbuf.len;
1837     }
1838     else {
1839         PyObject *text;
1840         if (!PyArg_ParseTuple(args, "O", &text))
1841             return NULL;
1842 
1843         if (PyString_Check(text)) {
1844             s = PyString_AS_STRING(text);
1845             n = PyString_GET_SIZE(text);
1846 #ifdef Py_USING_UNICODE
1847         } else if (PyUnicode_Check(text)) {
1848             const char *encoding, *errors;
1849             if (f->f_encoding != Py_None)
1850                 encoding = PyString_AS_STRING(f->f_encoding);
1851             else
1852                 encoding = PyUnicode_GetDefaultEncoding();
1853             if (f->f_errors != Py_None)
1854                 errors = PyString_AS_STRING(f->f_errors);
1855             else
1856                 errors = "strict";
1857             encoded = PyUnicode_AsEncodedString(text, encoding, errors);
1858             if (encoded == NULL)
1859                 return NULL;
1860             s = PyString_AS_STRING(encoded);
1861             n = PyString_GET_SIZE(encoded);
1862 #endif
1863         } else {
1864             if (PyObject_AsCharBuffer(text, &s, &n))
1865                 return NULL;
1866         }
1867     }
1868     f->f_softspace = 0;
1869     FILE_BEGIN_ALLOW_THREADS(f)
1870     errno = 0;
1871     n2 = fwrite(s, 1, n, f->f_fp);
1872     if (n2 != n || ferror(f->f_fp)) {
1873         err_flag = 1;
1874         err = errno;
1875     }
1876     FILE_END_ALLOW_THREADS(f)
1877     Py_XDECREF(encoded);
1878     if (f->f_binary)
1879         PyBuffer_Release(&pbuf);
1880     if (err_flag) {
1881         errno = err;
1882         PyErr_SetFromErrno(PyExc_IOError);
1883         clearerr(f->f_fp);
1884         return NULL;
1885     }
1886     Py_INCREF(Py_None);
1887     return Py_None;
1888 }
1889 
1890 static PyObject *
file_writelines(PyFileObject * f,PyObject * seq)1891 file_writelines(PyFileObject *f, PyObject *seq)
1892 {
1893 #define CHUNKSIZE 1000
1894     PyObject *list, *line;
1895     PyObject *it;       /* iter(seq) */
1896     PyObject *result;
1897     int index, islist;
1898     Py_ssize_t i, j, nwritten, len;
1899 
1900     assert(seq != NULL);
1901     if (f->f_fp == NULL)
1902         return err_closed();
1903     if (!f->writable)
1904         return err_mode("writing");
1905 
1906     result = NULL;
1907     list = NULL;
1908     islist = PyList_Check(seq);
1909     if  (islist)
1910         it = NULL;
1911     else {
1912         it = PyObject_GetIter(seq);
1913         if (it == NULL) {
1914             PyErr_SetString(PyExc_TypeError,
1915                 "writelines() requires an iterable argument");
1916             return NULL;
1917         }
1918         /* From here on, fail by going to error, to reclaim "it". */
1919         list = PyList_New(CHUNKSIZE);
1920         if (list == NULL)
1921             goto error;
1922     }
1923 
1924     /* Strategy: slurp CHUNKSIZE lines into a private list,
1925        checking that they are all strings, then write that list
1926        without holding the interpreter lock, then come back for more. */
1927     for (index = 0; ; index += CHUNKSIZE) {
1928         if (islist) {
1929             Py_XDECREF(list);
1930             list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1931             if (list == NULL)
1932                 goto error;
1933             j = PyList_GET_SIZE(list);
1934         }
1935         else {
1936             for (j = 0; j < CHUNKSIZE; j++) {
1937                 line = PyIter_Next(it);
1938                 if (line == NULL) {
1939                     if (PyErr_Occurred())
1940                         goto error;
1941                     break;
1942                 }
1943                 PyList_SetItem(list, j, line);
1944             }
1945             /* The iterator might have closed the file on us. */
1946             if (f->f_fp == NULL) {
1947                 err_closed();
1948                 goto error;
1949             }
1950         }
1951         if (j == 0)
1952             break;
1953 
1954         /* Check that all entries are indeed strings. If not,
1955            apply the same rules as for file.write() and
1956            convert the results to strings. This is slow, but
1957            seems to be the only way since all conversion APIs
1958            could potentially execute Python code. */
1959         for (i = 0; i < j; i++) {
1960             PyObject *v = PyList_GET_ITEM(list, i);
1961             if (!PyString_Check(v)) {
1962                 const char *buffer;
1963                 int res;
1964                 if (f->f_binary) {
1965                     res = PyObject_AsReadBuffer(v, (const void**)&buffer, &len);
1966                 } else {
1967                     res = PyObject_AsCharBuffer(v, &buffer, &len);
1968                 }
1969                 if (res) {
1970                     PyErr_SetString(PyExc_TypeError,
1971             "writelines() argument must be a sequence of strings");
1972                             goto error;
1973                 }
1974                 line = PyString_FromStringAndSize(buffer,
1975                                                   len);
1976                 if (line == NULL)
1977                     goto error;
1978                 Py_DECREF(v);
1979                 PyList_SET_ITEM(list, i, line);
1980             }
1981         }
1982 
1983         /* Since we are releasing the global lock, the
1984            following code may *not* execute Python code. */
1985         f->f_softspace = 0;
1986         FILE_BEGIN_ALLOW_THREADS(f)
1987         errno = 0;
1988         for (i = 0; i < j; i++) {
1989             line = PyList_GET_ITEM(list, i);
1990             len = PyString_GET_SIZE(line);
1991             nwritten = fwrite(PyString_AS_STRING(line),
1992                               1, len, f->f_fp);
1993             if (nwritten != len) {
1994                 FILE_ABORT_ALLOW_THREADS(f)
1995                 PyErr_SetFromErrno(PyExc_IOError);
1996                 clearerr(f->f_fp);
1997                 goto error;
1998             }
1999         }
2000         FILE_END_ALLOW_THREADS(f)
2001 
2002         if (j < CHUNKSIZE)
2003             break;
2004     }
2005 
2006     Py_INCREF(Py_None);
2007     result = Py_None;
2008   error:
2009     Py_XDECREF(list);
2010     Py_XDECREF(it);
2011     return result;
2012 #undef CHUNKSIZE
2013 }
2014 
2015 static PyObject *
file_self(PyFileObject * f)2016 file_self(PyFileObject *f)
2017 {
2018     if (f->f_fp == NULL)
2019         return err_closed();
2020     Py_INCREF(f);
2021     return (PyObject *)f;
2022 }
2023 
2024 static PyObject *
file_xreadlines(PyFileObject * f)2025 file_xreadlines(PyFileObject *f)
2026 {
2027     if (PyErr_WarnPy3k("f.xreadlines() not supported in 3.x, "
2028                        "try 'for line in f' instead", 1) < 0)
2029            return NULL;
2030     return file_self(f);
2031 }
2032 
2033 static PyObject *
file_exit(PyObject * f,PyObject * args)2034 file_exit(PyObject *f, PyObject *args)
2035 {
2036     PyObject *ret = PyObject_CallMethod(f, "close", NULL);
2037     if (!ret)
2038         /* If error occurred, pass through */
2039         return NULL;
2040     Py_DECREF(ret);
2041     /* We cannot return the result of close since a true
2042      * value will be interpreted as "yes, swallow the
2043      * exception if one was raised inside the with block". */
2044     Py_RETURN_NONE;
2045 }
2046 
2047 PyDoc_STRVAR(readline_doc,
2048 "readline([size]) -> next line from the file, as a string.\n"
2049 "\n"
2050 "Retain newline.  A non-negative size argument limits the maximum\n"
2051 "number of bytes to return (an incomplete line may be returned then).\n"
2052 "Return an empty string at EOF.");
2053 
2054 PyDoc_STRVAR(read_doc,
2055 "read([size]) -> read at most size bytes, returned as a string.\n"
2056 "\n"
2057 "If the size argument is negative or omitted, read until EOF is reached.\n"
2058 "Notice that when in non-blocking mode, less data than what was requested\n"
2059 "may be returned, even if no size parameter was given.");
2060 
2061 PyDoc_STRVAR(write_doc,
2062 "write(str) -> None.  Write string str to file.\n"
2063 "\n"
2064 "Note that due to buffering, flush() or close() may be needed before\n"
2065 "the file on disk reflects the data written.");
2066 
2067 PyDoc_STRVAR(fileno_doc,
2068 "fileno() -> integer \"file descriptor\".\n"
2069 "\n"
2070 "This is needed for lower-level file interfaces, such os.read().");
2071 
2072 PyDoc_STRVAR(seek_doc,
2073 "seek(offset[, whence]) -> None.  Move to new file position.\n"
2074 "\n"
2075 "Argument offset is a byte count.  Optional argument whence defaults to\n"
2076 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
2077 "(move relative to current position, positive or negative), and 2 (move\n"
2078 "relative to end of file, usually negative, although many platforms allow\n"
2079 "seeking beyond the end of a file).  If the file is opened in text mode,\n"
2080 "only offsets returned by tell() are legal.  Use of other offsets causes\n"
2081 "undefined behavior."
2082 "\n"
2083 "Note that not all file objects are seekable.");
2084 
2085 #ifdef HAVE_FTRUNCATE
2086 PyDoc_STRVAR(truncate_doc,
2087 "truncate([size]) -> None.  Truncate the file to at most size bytes.\n"
2088 "\n"
2089 "Size defaults to the current file position, as returned by tell().");
2090 #endif
2091 
2092 PyDoc_STRVAR(tell_doc,
2093 "tell() -> current file position, an integer (may be a long integer).");
2094 
2095 PyDoc_STRVAR(readinto_doc,
2096 "readinto() -> Undocumented.  Don't use this; it may go away.");
2097 
2098 PyDoc_STRVAR(readlines_doc,
2099 "readlines([size]) -> list of strings, each a line from the file.\n"
2100 "\n"
2101 "Call readline() repeatedly and return a list of the lines so read.\n"
2102 "The optional size argument, if given, is an approximate bound on the\n"
2103 "total number of bytes in the lines returned.");
2104 
2105 PyDoc_STRVAR(xreadlines_doc,
2106 "xreadlines() -> returns self.\n"
2107 "\n"
2108 "For backward compatibility. File objects now include the performance\n"
2109 "optimizations previously implemented in the xreadlines module.");
2110 
2111 PyDoc_STRVAR(writelines_doc,
2112 "writelines(sequence_of_strings) -> None.  Write the strings to the file.\n"
2113 "\n"
2114 "Note that newlines are not added.  The sequence can be any iterable object\n"
2115 "producing strings. This is equivalent to calling write() for each string.");
2116 
2117 PyDoc_STRVAR(flush_doc,
2118 "flush() -> None.  Flush the internal I/O buffer.");
2119 
2120 PyDoc_STRVAR(close_doc,
2121 "close() -> None or (perhaps) an integer.  Close the file.\n"
2122 "\n"
2123 "Sets data attribute .closed to True.  A closed file cannot be used for\n"
2124 "further I/O operations.  close() may be called more than once without\n"
2125 "error.  Some kinds of file objects (for example, opened by popen())\n"
2126 "may return an exit status upon closing.");
2127 
2128 PyDoc_STRVAR(isatty_doc,
2129 "isatty() -> true or false.  True if the file is connected to a tty device.");
2130 
2131 PyDoc_STRVAR(enter_doc,
2132              "__enter__() -> self.");
2133 
2134 PyDoc_STRVAR(exit_doc,
2135              "__exit__(*excinfo) -> None.  Closes the file.");
2136 
2137 static PyMethodDef file_methods[] = {
2138     {"readline",  (PyCFunction)file_readline, METH_VARARGS, readline_doc},
2139     {"read",      (PyCFunction)file_read,     METH_VARARGS, read_doc},
2140     {"write",     (PyCFunction)file_write,    METH_VARARGS, write_doc},
2141     {"fileno",    (PyCFunction)file_fileno,   METH_NOARGS,  fileno_doc},
2142     {"seek",      (PyCFunction)file_seek,     METH_VARARGS, seek_doc},
2143 #ifdef HAVE_FTRUNCATE
2144     {"truncate",  (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
2145 #endif
2146     {"tell",      (PyCFunction)file_tell,     METH_NOARGS,  tell_doc},
2147     {"readinto",  (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
2148     {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc},
2149     {"xreadlines",(PyCFunction)file_xreadlines, METH_NOARGS, xreadlines_doc},
2150     {"writelines",(PyCFunction)file_writelines, METH_O,     writelines_doc},
2151     {"flush",     (PyCFunction)file_flush,    METH_NOARGS,  flush_doc},
2152     {"close",     (PyCFunction)file_close,    METH_NOARGS,  close_doc},
2153     {"isatty",    (PyCFunction)file_isatty,   METH_NOARGS,  isatty_doc},
2154     {"__enter__", (PyCFunction)file_self,     METH_NOARGS,  enter_doc},
2155     {"__exit__",  (PyCFunction)file_exit,     METH_VARARGS, exit_doc},
2156     {NULL,            NULL}             /* sentinel */
2157 };
2158 
2159 #define OFF(x) offsetof(PyFileObject, x)
2160 
2161 static PyMemberDef file_memberlist[] = {
2162     {"mode",            T_OBJECT,       OFF(f_mode),    RO,
2163      "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
2164     {"name",            T_OBJECT,       OFF(f_name),    RO,
2165      "file name"},
2166     {"encoding",        T_OBJECT,       OFF(f_encoding),        RO,
2167      "file encoding"},
2168     {"errors",          T_OBJECT,       OFF(f_errors),  RO,
2169      "Unicode error handler"},
2170     /* getattr(f, "closed") is implemented without this table */
2171     {NULL}      /* Sentinel */
2172 };
2173 
2174 static PyObject *
get_closed(PyFileObject * f,void * closure)2175 get_closed(PyFileObject *f, void *closure)
2176 {
2177     return PyBool_FromLong((long)(f->f_fp == 0));
2178 }
2179 static PyObject *
get_newlines(PyFileObject * f,void * closure)2180 get_newlines(PyFileObject *f, void *closure)
2181 {
2182     switch (f->f_newlinetypes) {
2183     case NEWLINE_UNKNOWN:
2184         Py_INCREF(Py_None);
2185         return Py_None;
2186     case NEWLINE_CR:
2187         return PyString_FromString("\r");
2188     case NEWLINE_LF:
2189         return PyString_FromString("\n");
2190     case NEWLINE_CR|NEWLINE_LF:
2191         return Py_BuildValue("(ss)", "\r", "\n");
2192     case NEWLINE_CRLF:
2193         return PyString_FromString("\r\n");
2194     case NEWLINE_CR|NEWLINE_CRLF:
2195         return Py_BuildValue("(ss)", "\r", "\r\n");
2196     case NEWLINE_LF|NEWLINE_CRLF:
2197         return Py_BuildValue("(ss)", "\n", "\r\n");
2198     case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
2199         return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
2200     default:
2201         PyErr_Format(PyExc_SystemError,
2202                      "Unknown newlines value 0x%x\n",
2203                      f->f_newlinetypes);
2204         return NULL;
2205     }
2206 }
2207 
2208 static PyObject *
get_softspace(PyFileObject * f,void * closure)2209 get_softspace(PyFileObject *f, void *closure)
2210 {
2211     if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2212         return NULL;
2213     return PyInt_FromLong(f->f_softspace);
2214 }
2215 
2216 static int
set_softspace(PyFileObject * f,PyObject * value)2217 set_softspace(PyFileObject *f, PyObject *value)
2218 {
2219     int new;
2220     if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2221         return -1;
2222 
2223     if (value == NULL) {
2224         PyErr_SetString(PyExc_TypeError,
2225                         "can't delete softspace attribute");
2226         return -1;
2227     }
2228 
2229     new = PyInt_AsLong(value);
2230     if (new == -1 && PyErr_Occurred())
2231         return -1;
2232     f->f_softspace = new;
2233     return 0;
2234 }
2235 
2236 static PyGetSetDef file_getsetlist[] = {
2237     {"closed", (getter)get_closed, NULL, "True if the file is closed"},
2238     {"newlines", (getter)get_newlines, NULL,
2239      "end-of-line convention used in this file"},
2240     {"softspace", (getter)get_softspace, (setter)set_softspace,
2241      "flag indicating that a space needs to be printed; used by print"},
2242     {0},
2243 };
2244 
2245 typedef struct {
2246     char *buf, *bufptr, *bufend;
2247 } readaheadbuffer;
2248 
2249 static void
drop_readaheadbuffer(readaheadbuffer * rab)2250 drop_readaheadbuffer(readaheadbuffer *rab)
2251 {
2252     if (rab->buf != NULL) {
2253         PyMem_FREE(rab->buf);
2254         rab->buf = NULL;
2255     }
2256 }
2257 
2258 /* Make sure that file has a readahead buffer with at least one byte
2259    (unless at EOF) and no more than bufsize.  Returns negative value on
2260    error, will set MemoryError if bufsize bytes cannot be allocated. */
2261 static int
readahead(PyFileObject * f,readaheadbuffer * rab,Py_ssize_t bufsize)2262 readahead(PyFileObject *f, readaheadbuffer *rab, Py_ssize_t bufsize)
2263 {
2264     Py_ssize_t chunksize;
2265 
2266     if (rab->buf != NULL) {
2267         if ((rab->bufend - rab->bufptr) >= 1)
2268             return 0;
2269         else
2270             drop_readaheadbuffer(rab);
2271     }
2272     if ((rab->buf = PyMem_MALLOC(bufsize)) == NULL) {
2273         PyErr_NoMemory();
2274         return -1;
2275     }
2276     FILE_BEGIN_ALLOW_THREADS(f)
2277     errno = 0;
2278     chunksize = Py_UniversalNewlineFread(rab->buf, bufsize, f->f_fp, (PyObject *)f);
2279     FILE_END_ALLOW_THREADS(f)
2280     if (chunksize == 0) {
2281         if (ferror(f->f_fp)) {
2282             PyErr_SetFromErrno(PyExc_IOError);
2283             clearerr(f->f_fp);
2284             drop_readaheadbuffer(rab);
2285             return -1;
2286         }
2287     }
2288     rab->bufptr = rab->buf;
2289     rab->bufend = rab->buf + chunksize;
2290     return 0;
2291 }
2292 
2293 /* Used by file_iternext.  The returned string will start with 'skip'
2294    uninitialized bytes followed by the remainder of the line. Don't be
2295    horrified by the recursive call: maximum recursion depth is limited by
2296    logarithmic buffer growth to about 50 even when reading a 1gb line. */
2297 
2298 static PyStringObject *
readahead_get_line_skip(PyFileObject * f,readaheadbuffer * rab,Py_ssize_t skip,Py_ssize_t bufsize)2299 readahead_get_line_skip(PyFileObject *f, readaheadbuffer *rab, Py_ssize_t skip, Py_ssize_t bufsize)
2300 {
2301     PyStringObject* s;
2302     char *bufptr;
2303     char *buf;
2304     Py_ssize_t len;
2305 
2306     if (rab->buf == NULL)
2307         if (readahead(f, rab, bufsize) < 0)
2308             return NULL;
2309 
2310     len = rab->bufend - rab->bufptr;
2311     if (len == 0)
2312         return (PyStringObject *)PyString_FromStringAndSize(NULL, skip);
2313     bufptr = (char *)memchr(rab->bufptr, '\n', len);
2314     if (bufptr != NULL) {
2315         bufptr++;                               /* Count the '\n' */
2316         len = bufptr - rab->bufptr;
2317         s = (PyStringObject *)PyString_FromStringAndSize(NULL, skip + len);
2318         if (s == NULL)
2319             return NULL;
2320         memcpy(PyString_AS_STRING(s) + skip, rab->bufptr, len);
2321         rab->bufptr = bufptr;
2322         if (bufptr == rab->bufend)
2323             drop_readaheadbuffer(rab);
2324     } else {
2325         bufptr = rab->bufptr;
2326         buf = rab->buf;
2327         rab->buf = NULL;                /* Force new readahead buffer */
2328         assert(len <= PY_SSIZE_T_MAX - skip);
2329         s = readahead_get_line_skip(f, rab, skip + len, bufsize + (bufsize>>2));
2330         if (s == NULL) {
2331             PyMem_FREE(buf);
2332             return NULL;
2333         }
2334         memcpy(PyString_AS_STRING(s) + skip, bufptr, len);
2335         PyMem_FREE(buf);
2336     }
2337     return s;
2338 }
2339 
2340 /* A larger buffer size may actually decrease performance. */
2341 #define READAHEAD_BUFSIZE 8192
2342 
2343 static PyObject *
file_iternext(PyFileObject * f)2344 file_iternext(PyFileObject *f)
2345 {
2346     PyStringObject* l;
2347 
2348     if (f->f_fp == NULL)
2349         return err_closed();
2350     if (!f->readable)
2351         return err_mode("reading");
2352 
2353     {
2354         /*
2355           Multiple threads can enter this method while the GIL is released
2356           during file read and wreak havoc on the file object's readahead
2357           buffer. To avoid dealing with cross-thread coordination issues, we
2358           cache the file buffer state locally and only set it back on the file
2359           object when we're done.
2360         */
2361         readaheadbuffer rab = {f->f_buf, f->f_bufptr, f->f_bufend};
2362         f->f_buf = NULL;
2363         l = readahead_get_line_skip(f, &rab, 0, READAHEAD_BUFSIZE);
2364         /*
2365           Make sure the file's internal read buffer is cleared out. This will
2366           only do anything if some other thread interleaved with us during
2367           readahead. We want to drop any changeling buffer, so we don't leak
2368           memory. We may lose data, but that's what you get for reading the same
2369           file object in multiple threads.
2370         */
2371         drop_file_readahead(f);
2372         f->f_buf = rab.buf;
2373         f->f_bufptr = rab.bufptr;
2374         f->f_bufend = rab.bufend;
2375     }
2376 
2377     if (l == NULL || PyString_GET_SIZE(l) == 0) {
2378         Py_XDECREF(l);
2379         return NULL;
2380     }
2381     return (PyObject *)l;
2382 }
2383 
2384 
2385 static PyObject *
file_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2386 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2387 {
2388     PyObject *self;
2389     static PyObject *not_yet_string;
2390 
2391     assert(type != NULL && type->tp_alloc != NULL);
2392 
2393     if (not_yet_string == NULL) {
2394         not_yet_string = PyString_InternFromString("<uninitialized file>");
2395         if (not_yet_string == NULL)
2396             return NULL;
2397     }
2398 
2399     self = type->tp_alloc(type, 0);
2400     if (self != NULL) {
2401         /* Always fill in the name and mode, so that nobody else
2402            needs to special-case NULLs there. */
2403         Py_INCREF(not_yet_string);
2404         ((PyFileObject *)self)->f_name = not_yet_string;
2405         Py_INCREF(not_yet_string);
2406         ((PyFileObject *)self)->f_mode = not_yet_string;
2407         Py_INCREF(Py_None);
2408         ((PyFileObject *)self)->f_encoding = Py_None;
2409         Py_INCREF(Py_None);
2410         ((PyFileObject *)self)->f_errors = Py_None;
2411         ((PyFileObject *)self)->weakreflist = NULL;
2412         ((PyFileObject *)self)->unlocked_count = 0;
2413     }
2414     return self;
2415 }
2416 
2417 static int
file_init(PyObject * self,PyObject * args,PyObject * kwds)2418 file_init(PyObject *self, PyObject *args, PyObject *kwds)
2419 {
2420     PyFileObject *foself = (PyFileObject *)self;
2421     int ret = 0;
2422     static char *kwlist[] = {"name", "mode", "buffering", 0};
2423     char *name = NULL;
2424     char *mode = "r";
2425     int bufsize = -1;
2426     int wideargument = 0;
2427 #ifdef MS_WINDOWS
2428     PyObject *po;
2429 #endif
2430 
2431     assert(PyFile_Check(self));
2432     if (foself->f_fp != NULL) {
2433         /* Have to close the existing file first. */
2434         PyObject *closeresult = file_close(foself);
2435         if (closeresult == NULL)
2436             return -1;
2437         Py_DECREF(closeresult);
2438     }
2439 
2440 #ifdef MS_WINDOWS
2441     if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
2442                                     kwlist, &po, &mode, &bufsize) &&
2443         wcslen(PyUnicode_AS_UNICODE(po)) == (size_t)PyUnicode_GET_SIZE(po)) {
2444         wideargument = 1;
2445         if (fill_file_fields(foself, NULL, po, mode,
2446                              fclose) == NULL)
2447             goto Error;
2448     } else {
2449         /* Drop the argument parsing error as narrow
2450            strings are also valid. */
2451         PyErr_Clear();
2452     }
2453 #endif
2454 
2455     if (!wideargument) {
2456         PyObject *o_name;
2457 
2458         if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
2459                                          Py_FileSystemDefaultEncoding,
2460                                          &name,
2461                                          &mode, &bufsize))
2462             return -1;
2463 
2464         /* We parse again to get the name as a PyObject */
2465         if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
2466                                          kwlist, &o_name, &mode,
2467                                          &bufsize))
2468             goto Error;
2469 
2470         if (fill_file_fields(foself, NULL, o_name, mode,
2471                              fclose) == NULL)
2472             goto Error;
2473     }
2474     if (open_the_file(foself, name, mode) == NULL)
2475         goto Error;
2476     foself->f_setbuf = NULL;
2477     PyFile_SetBufSize(self, bufsize);
2478     goto Done;
2479 
2480 Error:
2481     ret = -1;
2482     /* fall through */
2483 Done:
2484     PyMem_Free(name); /* free the encoded string */
2485     return ret;
2486 }
2487 
2488 PyDoc_VAR(file_doc) =
2489 PyDoc_STR(
2490 "file(name[, mode[, buffering]]) -> file object\n"
2491 "\n"
2492 "Open a file.  The mode can be 'r', 'w' or 'a' for reading (default),\n"
2493 "writing or appending.  The file will be created if it doesn't exist\n"
2494 "when opened for writing or appending; it will be truncated when\n"
2495 "opened for writing.  Add a 'b' to the mode for binary files.\n"
2496 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
2497 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2498 "buffered, and larger numbers specify the buffer size.  The preferred way\n"
2499 "to open a file is with the builtin open() function.\n"
2500 )
2501 PyDoc_STR(
2502 "Add a 'U' to mode to open the file for input with universal newline\n"
2503 "support.  Any line ending in the input file will be seen as a '\\n'\n"
2504 "in Python.  Also, a file so opened gains the attribute 'newlines';\n"
2505 "the value for this attribute is one of None (no newline read yet),\n"
2506 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2507 "\n"
2508 "'U' cannot be combined with 'w' or '+' mode.\n"
2509 );
2510 
2511 PyTypeObject PyFile_Type = {
2512     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2513     "file",
2514     sizeof(PyFileObject),
2515     0,
2516     (destructor)file_dealloc,                   /* tp_dealloc */
2517     0,                                          /* tp_print */
2518     0,                                          /* tp_getattr */
2519     0,                                          /* tp_setattr */
2520     0,                                          /* tp_compare */
2521     (reprfunc)file_repr,                        /* tp_repr */
2522     0,                                          /* tp_as_number */
2523     0,                                          /* tp_as_sequence */
2524     0,                                          /* tp_as_mapping */
2525     0,                                          /* tp_hash */
2526     0,                                          /* tp_call */
2527     0,                                          /* tp_str */
2528     PyObject_GenericGetAttr,                    /* tp_getattro */
2529     /* softspace is writable:  we must supply tp_setattro */
2530     PyObject_GenericSetAttr,                    /* tp_setattro */
2531     0,                                          /* tp_as_buffer */
2532     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2533     file_doc,                                   /* tp_doc */
2534     0,                                          /* tp_traverse */
2535     0,                                          /* tp_clear */
2536     0,                                          /* tp_richcompare */
2537     offsetof(PyFileObject, weakreflist),        /* tp_weaklistoffset */
2538     (getiterfunc)file_self,                     /* tp_iter */
2539     (iternextfunc)file_iternext,                /* tp_iternext */
2540     file_methods,                               /* tp_methods */
2541     file_memberlist,                            /* tp_members */
2542     file_getsetlist,                            /* tp_getset */
2543     0,                                          /* tp_base */
2544     0,                                          /* tp_dict */
2545     0,                                          /* tp_descr_get */
2546     0,                                          /* tp_descr_set */
2547     0,                                          /* tp_dictoffset */
2548     file_init,                                  /* tp_init */
2549     PyType_GenericAlloc,                        /* tp_alloc */
2550     file_new,                                   /* tp_new */
2551     PyObject_Del,                           /* tp_free */
2552 };
2553 
2554 /* Interface for the 'soft space' between print items. */
2555 
2556 int
PyFile_SoftSpace(PyObject * f,int newflag)2557 PyFile_SoftSpace(PyObject *f, int newflag)
2558 {
2559     long oldflag = 0;
2560     if (f == NULL) {
2561         /* Do nothing */
2562     }
2563     else if (PyFile_Check(f)) {
2564         oldflag = ((PyFileObject *)f)->f_softspace;
2565         ((PyFileObject *)f)->f_softspace = newflag;
2566     }
2567     else {
2568         PyObject *v;
2569         v = PyObject_GetAttrString(f, "softspace");
2570         if (v == NULL)
2571             PyErr_Clear();
2572         else {
2573             if (PyInt_Check(v))
2574                 oldflag = PyInt_AsLong(v);
2575             assert(oldflag < INT_MAX);
2576             Py_DECREF(v);
2577         }
2578         v = PyInt_FromLong((long)newflag);
2579         if (v == NULL)
2580             PyErr_Clear();
2581         else {
2582             if (PyObject_SetAttrString(f, "softspace", v) != 0)
2583                 PyErr_Clear();
2584             Py_DECREF(v);
2585         }
2586     }
2587     return (int)oldflag;
2588 }
2589 
2590 /* Interfaces to write objects/strings to file-like objects */
2591 
2592 int
PyFile_WriteObject(PyObject * v,PyObject * f,int flags)2593 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2594 {
2595     PyObject *writer, *value, *args, *result;
2596     if (f == NULL) {
2597         PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2598         return -1;
2599     }
2600     else if (PyFile_Check(f)) {
2601         PyFileObject *fobj = (PyFileObject *) f;
2602 #ifdef Py_USING_UNICODE
2603         PyObject *enc = fobj->f_encoding;
2604         int result;
2605 #endif
2606         if (fobj->f_fp == NULL) {
2607             err_closed();
2608             return -1;
2609         }
2610 #ifdef Py_USING_UNICODE
2611         if ((flags & Py_PRINT_RAW) &&
2612             PyUnicode_Check(v) && enc != Py_None) {
2613             char *cenc = PyString_AS_STRING(enc);
2614             char *errors = fobj->f_errors == Py_None ?
2615               "strict" : PyString_AS_STRING(fobj->f_errors);
2616             value = PyUnicode_AsEncodedString(v, cenc, errors);
2617             if (value == NULL)
2618                 return -1;
2619         } else {
2620             value = v;
2621             Py_INCREF(value);
2622         }
2623         result = file_PyObject_Print(value, fobj, flags);
2624         Py_DECREF(value);
2625         return result;
2626 #else
2627         return file_PyObject_Print(v, fobj, flags);
2628 #endif
2629     }
2630     writer = PyObject_GetAttrString(f, "write");
2631     if (writer == NULL)
2632         return -1;
2633     if (flags & Py_PRINT_RAW) {
2634         if (PyUnicode_Check(v)) {
2635             value = v;
2636             Py_INCREF(value);
2637         } else
2638             value = PyObject_Str(v);
2639     }
2640     else
2641         value = PyObject_Repr(v);
2642     if (value == NULL) {
2643         Py_DECREF(writer);
2644         return -1;
2645     }
2646     args = PyTuple_Pack(1, value);
2647     if (args == NULL) {
2648         Py_DECREF(value);
2649         Py_DECREF(writer);
2650         return -1;
2651     }
2652     result = PyEval_CallObject(writer, args);
2653     Py_DECREF(args);
2654     Py_DECREF(value);
2655     Py_DECREF(writer);
2656     if (result == NULL)
2657         return -1;
2658     Py_DECREF(result);
2659     return 0;
2660 }
2661 
2662 int
PyFile_WriteString(const char * s,PyObject * f)2663 PyFile_WriteString(const char *s, PyObject *f)
2664 {
2665 
2666     if (f == NULL) {
2667         /* Should be caused by a pre-existing error */
2668         if (!PyErr_Occurred())
2669             PyErr_SetString(PyExc_SystemError,
2670                             "null file for PyFile_WriteString");
2671         return -1;
2672     }
2673     else if (PyFile_Check(f)) {
2674         PyFileObject *fobj = (PyFileObject *) f;
2675         FILE *fp = PyFile_AsFile(f);
2676         if (fp == NULL) {
2677             err_closed();
2678             return -1;
2679         }
2680         FILE_BEGIN_ALLOW_THREADS(fobj)
2681         fputs(s, fp);
2682         FILE_END_ALLOW_THREADS(fobj)
2683         return 0;
2684     }
2685     else if (!PyErr_Occurred()) {
2686         PyObject *v = PyString_FromString(s);
2687         int err;
2688         if (v == NULL)
2689             return -1;
2690         err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2691         Py_DECREF(v);
2692         return err;
2693     }
2694     else
2695         return -1;
2696 }
2697 
2698 /* Try to get a file-descriptor from a Python object.  If the object
2699    is an integer or long integer, its value is returned.  If not, the
2700    object's fileno() method is called if it exists; the method must return
2701    an integer or long integer, which is returned as the file descriptor value.
2702    -1 is returned on failure.
2703 */
2704 
PyObject_AsFileDescriptor(PyObject * o)2705 int PyObject_AsFileDescriptor(PyObject *o)
2706 {
2707     int fd;
2708     PyObject *meth;
2709 
2710     if (PyInt_Check(o)) {
2711         fd = _PyInt_AsInt(o);
2712     }
2713     else if (PyLong_Check(o)) {
2714         fd = _PyLong_AsInt(o);
2715     }
2716     else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2717     {
2718         PyObject *fno = PyEval_CallObject(meth, NULL);
2719         Py_DECREF(meth);
2720         if (fno == NULL)
2721             return -1;
2722 
2723         if (PyInt_Check(fno)) {
2724             fd = _PyInt_AsInt(fno);
2725             Py_DECREF(fno);
2726         }
2727         else if (PyLong_Check(fno)) {
2728             fd = _PyLong_AsInt(fno);
2729             Py_DECREF(fno);
2730         }
2731         else {
2732             PyErr_SetString(PyExc_TypeError,
2733                             "fileno() returned a non-integer");
2734             Py_DECREF(fno);
2735             return -1;
2736         }
2737     }
2738     else {
2739         PyErr_SetString(PyExc_TypeError,
2740                         "argument must be an int, or have a fileno() method");
2741         return -1;
2742     }
2743 
2744     if (fd < 0) {
2745         PyErr_Format(PyExc_ValueError,
2746                      "file descriptor cannot be a negative integer (%i)",
2747                      fd);
2748         return -1;
2749     }
2750     return fd;
2751 }
2752 
2753 /* From here on we need access to the real fgets and fread */
2754 #undef fgets
2755 #undef fread
2756 
2757 /*
2758 ** Py_UniversalNewlineFgets is an fgets variation that understands
2759 ** all of \r, \n and \r\n conventions.
2760 ** The stream should be opened in binary mode.
2761 ** If fobj is NULL the routine always does newline conversion, and
2762 ** it may peek one char ahead to gobble the second char in \r\n.
2763 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2764 ** is no readahead but in stead a flag is used to skip a following
2765 ** \n on the next read. Also, if the file is open in binary mode
2766 ** the whole conversion is skipped. Finally, the routine keeps track of
2767 ** the different types of newlines seen.
2768 ** Note that we need no error handling: fgets() treats error and eof
2769 ** identically.
2770 */
2771 char *
Py_UniversalNewlineFgets(char * buf,int n,FILE * stream,PyObject * fobj)2772 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2773 {
2774     char *p = buf;
2775     int c;
2776     int newlinetypes = 0;
2777     int skipnextlf = 0;
2778     int univ_newline = 1;
2779 
2780     if (fobj) {
2781         if (!PyFile_Check(fobj)) {
2782             errno = ENXIO;              /* What can you do... */
2783             return NULL;
2784         }
2785         univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2786         if ( !univ_newline )
2787             return fgets(buf, n, stream);
2788         newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2789         skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2790     }
2791     FLOCKFILE(stream);
2792     c = 'x'; /* Shut up gcc warning */
2793     while (--n > 0 && (c = GETC(stream)) != EOF ) {
2794         if (skipnextlf ) {
2795             skipnextlf = 0;
2796             if (c == '\n') {
2797                 /* Seeing a \n here with skipnextlf true
2798                 ** means we saw a \r before.
2799                 */
2800                 newlinetypes |= NEWLINE_CRLF;
2801                 c = GETC(stream);
2802                 if (c == EOF) break;
2803             } else {
2804                 /*
2805                 ** Note that c == EOF also brings us here,
2806                 ** so we're okay if the last char in the file
2807                 ** is a CR.
2808                 */
2809                 newlinetypes |= NEWLINE_CR;
2810             }
2811         }
2812         if (c == '\r') {
2813             /* A \r is translated into a \n, and we skip
2814             ** an adjacent \n, if any. We don't set the
2815             ** newlinetypes flag until we've seen the next char.
2816             */
2817             skipnextlf = 1;
2818             c = '\n';
2819         } else if ( c == '\n') {
2820             newlinetypes |= NEWLINE_LF;
2821         }
2822         *p++ = c;
2823         if (c == '\n') break;
2824     }
2825     if ( c == EOF && skipnextlf )
2826         newlinetypes |= NEWLINE_CR;
2827     FUNLOCKFILE(stream);
2828     *p = '\0';
2829     if (fobj) {
2830         ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2831         ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2832     } else if ( skipnextlf ) {
2833         /* If we have no file object we cannot save the
2834         ** skipnextlf flag. We have to readahead, which
2835         ** will cause a pause if we're reading from an
2836         ** interactive stream, but that is very unlikely
2837         ** unless we're doing something silly like
2838         ** execfile("/dev/tty").
2839         */
2840         c = GETC(stream);
2841         if ( c != '\n' )
2842             ungetc(c, stream);
2843     }
2844     if (p == buf)
2845         return NULL;
2846     return buf;
2847 }
2848 
2849 /*
2850 ** Py_UniversalNewlineFread is an fread variation that understands
2851 ** all of \r, \n and \r\n conventions.
2852 ** The stream should be opened in binary mode.
2853 ** fobj must be a PyFileObject. In this case there
2854 ** is no readahead but in stead a flag is used to skip a following
2855 ** \n on the next read. Also, if the file is open in binary mode
2856 ** the whole conversion is skipped. Finally, the routine keeps track of
2857 ** the different types of newlines seen.
2858 */
2859 size_t
Py_UniversalNewlineFread(char * buf,size_t n,FILE * stream,PyObject * fobj)2860 Py_UniversalNewlineFread(char *buf, size_t n,
2861                          FILE *stream, PyObject *fobj)
2862 {
2863     char *dst = buf;
2864     PyFileObject *f = (PyFileObject *)fobj;
2865     int newlinetypes, skipnextlf;
2866 
2867     assert(buf != NULL);
2868     assert(stream != NULL);
2869 
2870     if (!fobj || !PyFile_Check(fobj)) {
2871         errno = ENXIO;          /* What can you do... */
2872         return 0;
2873     }
2874     if (!f->f_univ_newline)
2875         return fread(buf, 1, n, stream);
2876     newlinetypes = f->f_newlinetypes;
2877     skipnextlf = f->f_skipnextlf;
2878     /* Invariant:  n is the number of bytes remaining to be filled
2879      * in the buffer.
2880      */
2881     while (n) {
2882         size_t nread;
2883         int shortread;
2884         char *src = dst;
2885 
2886         nread = fread(dst, 1, n, stream);
2887         assert(nread <= n);
2888         if (nread == 0)
2889             break;
2890 
2891         n -= nread; /* assuming 1 byte out for each in; will adjust */
2892         shortread = n != 0;             /* true iff EOF or error */
2893         while (nread--) {
2894             char c = *src++;
2895             if (c == '\r') {
2896                 /* Save as LF and set flag to skip next LF. */
2897                 *dst++ = '\n';
2898                 skipnextlf = 1;
2899             }
2900             else if (skipnextlf && c == '\n') {
2901                 /* Skip LF, and remember we saw CR LF. */
2902                 skipnextlf = 0;
2903                 newlinetypes |= NEWLINE_CRLF;
2904                 ++n;
2905             }
2906             else {
2907                 /* Normal char to be stored in buffer.  Also
2908                  * update the newlinetypes flag if either this
2909                  * is an LF or the previous char was a CR.
2910                  */
2911                 if (c == '\n')
2912                     newlinetypes |= NEWLINE_LF;
2913                 else if (skipnextlf)
2914                     newlinetypes |= NEWLINE_CR;
2915                 *dst++ = c;
2916                 skipnextlf = 0;
2917             }
2918         }
2919         if (shortread) {
2920             /* If this is EOF, update type flags. */
2921             if (skipnextlf && feof(stream))
2922                 newlinetypes |= NEWLINE_CR;
2923             break;
2924         }
2925     }
2926     f->f_newlinetypes = newlinetypes;
2927     f->f_skipnextlf = skipnextlf;
2928     return dst - buf;
2929 }
2930 
2931 #ifdef __cplusplus
2932 }
2933 #endif
2934