• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* File object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
6 
7 #ifdef HAVE_SYS_TYPES_H
8 #include <sys/types.h>
9 #endif /* HAVE_SYS_TYPES_H */
10 
11 #ifdef MS_WINDOWS
12 #define fileno _fileno
13 /* can simulate truncate with Win32 API functions; see file_truncate */
14 #define HAVE_FTRUNCATE
15 #define WIN32_LEAN_AND_MEAN
16 #include <windows.h>
17 #endif
18 
19 #if defined(PYOS_OS2) && defined(PYCC_GCC)
20 #include <io.h>
21 #endif
22 
23 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
24 
25 #ifdef HAVE_ERRNO_H
26 #include <errno.h>
27 #endif
28 
29 #ifdef HAVE_GETC_UNLOCKED
30 #define GETC(f) getc_unlocked(f)
31 #define FLOCKFILE(f) flockfile(f)
32 #define FUNLOCKFILE(f) funlockfile(f)
33 #else
34 #define GETC(f) getc(f)
35 #define FLOCKFILE(f)
36 #define FUNLOCKFILE(f)
37 #endif
38 
39 /* Bits in f_newlinetypes */
40 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
41 #define NEWLINE_CR 1            /* \r newline seen */
42 #define NEWLINE_LF 2            /* \n newline seen */
43 #define NEWLINE_CRLF 4          /* \r\n newline seen */
44 
45 /*
46  * These macros release the GIL while preventing the f_close() function being
47  * called in the interval between them.  For that purpose, a running total of
48  * the number of currently running unlocked code sections is kept in
49  * the unlocked_count field of the PyFileObject. The close() method raises
50  * an IOError if that field is non-zero.  See issue #815646, #595601.
51  */
52 
53 #define FILE_BEGIN_ALLOW_THREADS(fobj) \
54 { \
55     fobj->unlocked_count++; \
56     Py_BEGIN_ALLOW_THREADS
57 
58 #define FILE_END_ALLOW_THREADS(fobj) \
59     Py_END_ALLOW_THREADS \
60     fobj->unlocked_count--; \
61     assert(fobj->unlocked_count >= 0); \
62 }
63 
64 #define FILE_ABORT_ALLOW_THREADS(fobj) \
65     Py_BLOCK_THREADS \
66     fobj->unlocked_count--; \
67     assert(fobj->unlocked_count >= 0);
68 
69 #ifdef __cplusplus
70 extern "C" {
71 #endif
72 
73 FILE *
PyFile_AsFile(PyObject * f)74 PyFile_AsFile(PyObject *f)
75 {
76     if (f == NULL || !PyFile_Check(f))
77         return NULL;
78     else
79         return ((PyFileObject *)f)->f_fp;
80 }
81 
PyFile_IncUseCount(PyFileObject * fobj)82 void PyFile_IncUseCount(PyFileObject *fobj)
83 {
84     fobj->unlocked_count++;
85 }
86 
PyFile_DecUseCount(PyFileObject * fobj)87 void PyFile_DecUseCount(PyFileObject *fobj)
88 {
89     fobj->unlocked_count--;
90     assert(fobj->unlocked_count >= 0);
91 }
92 
93 PyObject *
PyFile_Name(PyObject * f)94 PyFile_Name(PyObject *f)
95 {
96     if (f == NULL || !PyFile_Check(f))
97         return NULL;
98     else
99         return ((PyFileObject *)f)->f_name;
100 }
101 
102 /* This is a safe wrapper around PyObject_Print to print to the FILE
103    of a PyFileObject. PyObject_Print releases the GIL but knows nothing
104    about PyFileObject. */
105 static int
file_PyObject_Print(PyObject * op,PyFileObject * f,int flags)106 file_PyObject_Print(PyObject *op, PyFileObject *f, int flags)
107 {
108     int result;
109     PyFile_IncUseCount(f);
110     result = PyObject_Print(op, f->f_fp, flags);
111     PyFile_DecUseCount(f);
112     return result;
113 }
114 
115 /* On Unix, fopen will succeed for directories.
116    In Python, there should be no file objects referring to
117    directories, so we need a check.  */
118 
119 static PyFileObject*
dircheck(PyFileObject * f)120 dircheck(PyFileObject* f)
121 {
122 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
123     struct stat buf;
124     if (f->f_fp == NULL)
125         return f;
126     if (fstat(fileno(f->f_fp), &buf) == 0 &&
127         S_ISDIR(buf.st_mode)) {
128         char *msg = strerror(EISDIR);
129         PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(isO)",
130                                               EISDIR, msg, f->f_name);
131         PyErr_SetObject(PyExc_IOError, exc);
132         Py_XDECREF(exc);
133         return NULL;
134     }
135 #endif
136     return f;
137 }
138 
139 
140 static PyObject *
fill_file_fields(PyFileObject * f,FILE * fp,PyObject * name,char * mode,int (* close)(FILE *))141 fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
142                  int (*close)(FILE *))
143 {
144     assert(name != NULL);
145     assert(f != NULL);
146     assert(PyFile_Check(f));
147     assert(f->f_fp == NULL);
148 
149     Py_DECREF(f->f_name);
150     Py_DECREF(f->f_mode);
151     Py_DECREF(f->f_encoding);
152     Py_DECREF(f->f_errors);
153 
154     Py_INCREF(name);
155     f->f_name = name;
156 
157     f->f_mode = PyString_FromString(mode);
158 
159     f->f_close = close;
160     f->f_softspace = 0;
161     f->f_binary = strchr(mode,'b') != NULL;
162     f->f_buf = NULL;
163     f->f_univ_newline = (strchr(mode, 'U') != NULL);
164     f->f_newlinetypes = NEWLINE_UNKNOWN;
165     f->f_skipnextlf = 0;
166     Py_INCREF(Py_None);
167     f->f_encoding = Py_None;
168     Py_INCREF(Py_None);
169     f->f_errors = Py_None;
170     f->readable = f->writable = 0;
171     if (strchr(mode, 'r') != NULL || f->f_univ_newline)
172         f->readable = 1;
173     if (strchr(mode, 'w') != NULL || strchr(mode, 'a') != NULL)
174         f->writable = 1;
175     if (strchr(mode, '+') != NULL)
176         f->readable = f->writable = 1;
177 
178     if (f->f_mode == NULL)
179         return NULL;
180     f->f_fp = fp;
181     f = dircheck(f);
182     return (PyObject *) f;
183 }
184 
185 #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
186 #define Py_VERIFY_WINNT
187 /* The CRT on windows compiled with Visual Studio 2005 and higher may
188  * assert if given invalid mode strings.  This is all fine and well
189  * in static languages like C where the mode string is typcially hard
190  * coded.  But in Python, were we pass in the mode string from the user,
191  * we need to verify it first manually
192  */
_PyVerify_Mode_WINNT(const char * mode)193 static int _PyVerify_Mode_WINNT(const char *mode)
194 {
195     /* See if mode string is valid on Windows to avoid hard assertions */
196     /* remove leading spacese */
197     int singles = 0;
198     int pairs = 0;
199     int encoding = 0;
200     const char *s, *c;
201 
202     while(*mode == ' ') /* strip initial spaces */
203         ++mode;
204     if (!strchr("rwa", *mode)) /* must start with one of these */
205         return 0;
206     while (*++mode) {
207         if (*mode == ' ' || *mode == 'N') /* ignore spaces and N */
208             continue;
209         s = "+TD"; /* each of this can appear only once */
210         c = strchr(s, *mode);
211         if (c) {
212             ptrdiff_t idx = s-c;
213             if (singles & (1<<idx))
214                 return 0;
215             singles |= (1<<idx);
216             continue;
217         }
218         s = "btcnSR"; /* only one of each letter in the pairs allowed */
219         c = strchr(s, *mode);
220         if (c) {
221             ptrdiff_t idx = (s-c)/2;
222             if (pairs & (1<<idx))
223                 return 0;
224             pairs |= (1<<idx);
225             continue;
226         }
227         if (*mode == ',') {
228             encoding = 1;
229             break;
230         }
231         return 0; /* found an invalid char */
232     }
233 
234     if (encoding) {
235         char *e[] = {"UTF-8", "UTF-16LE", "UNICODE"};
236         while (*mode == ' ')
237             ++mode;
238         /* find 'ccs =' */
239         if (strncmp(mode, "ccs", 3))
240             return 0;
241         mode += 3;
242         while (*mode == ' ')
243             ++mode;
244         if (*mode != '=')
245             return 0;
246         while (*mode == ' ')
247             ++mode;
248         for(encoding = 0; encoding<_countof(e); ++encoding) {
249             size_t l = strlen(e[encoding]);
250             if (!strncmp(mode, e[encoding], l)) {
251                 mode += l; /* found a valid encoding */
252                 break;
253             }
254         }
255         if (encoding == _countof(e))
256             return 0;
257     }
258     /* skip trailing spaces */
259     while (*mode == ' ')
260         ++mode;
261 
262     return *mode == '\0'; /* must be at the end of the string */
263 }
264 #endif
265 
266 /* check for known incorrect mode strings - problem is, platforms are
267    free to accept any mode characters they like and are supposed to
268    ignore stuff they don't understand... write or append mode with
269    universal newline support is expressly forbidden by PEP 278.
270    Additionally, remove the 'U' from the mode string as platforms
271    won't know what it is. Non-zero return signals an exception */
272 int
_PyFile_SanitizeMode(char * mode)273 _PyFile_SanitizeMode(char *mode)
274 {
275     char *upos;
276     size_t len = strlen(mode);
277 
278     if (!len) {
279         PyErr_SetString(PyExc_ValueError, "empty mode string");
280         return -1;
281     }
282 
283     upos = strchr(mode, 'U');
284     if (upos) {
285         memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
286 
287         if (mode[0] == 'w' || mode[0] == 'a') {
288             PyErr_Format(PyExc_ValueError, "universal newline "
289                          "mode can only be used with modes "
290                          "starting with 'r'");
291             return -1;
292         }
293 
294         if (mode[0] != 'r') {
295             memmove(mode+1, mode, strlen(mode)+1);
296             mode[0] = 'r';
297         }
298 
299         if (!strchr(mode, 'b')) {
300             memmove(mode+2, mode+1, strlen(mode));
301             mode[1] = 'b';
302         }
303     } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
304         PyErr_Format(PyExc_ValueError, "mode string must begin with "
305                     "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
306         return -1;
307     }
308 #ifdef Py_VERIFY_WINNT
309     /* additional checks on NT with visual studio 2005 and higher */
310     if (!_PyVerify_Mode_WINNT(mode)) {
311         PyErr_Format(PyExc_ValueError, "Invalid mode ('%.50s')", mode);
312         return -1;
313     }
314 #endif
315     return 0;
316 }
317 
318 static PyObject *
open_the_file(PyFileObject * f,char * name,char * mode)319 open_the_file(PyFileObject *f, char *name, char *mode)
320 {
321     char *newmode;
322     assert(f != NULL);
323     assert(PyFile_Check(f));
324 #ifdef MS_WINDOWS
325     /* windows ignores the passed name in order to support Unicode */
326     assert(f->f_name != NULL);
327 #else
328     assert(name != NULL);
329 #endif
330     assert(mode != NULL);
331     assert(f->f_fp == NULL);
332 
333     /* probably need to replace 'U' by 'rb' */
334     newmode = PyMem_MALLOC(strlen(mode) + 3);
335     if (!newmode) {
336         PyErr_NoMemory();
337         return NULL;
338     }
339     strcpy(newmode, mode);
340 
341     if (_PyFile_SanitizeMode(newmode)) {
342         f = NULL;
343         goto cleanup;
344     }
345 
346     /* rexec.py can't stop a user from getting the file() constructor --
347        all they have to do is get *any* file object f, and then do
348        type(f).  Here we prevent them from doing damage with it. */
349     if (PyEval_GetRestricted()) {
350         PyErr_SetString(PyExc_IOError,
351         "file() constructor not accessible in restricted mode");
352         f = NULL;
353         goto cleanup;
354     }
355     errno = 0;
356 
357 #ifdef MS_WINDOWS
358     if (PyUnicode_Check(f->f_name)) {
359         PyObject *wmode;
360         wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
361         if (f->f_name && wmode) {
362             FILE_BEGIN_ALLOW_THREADS(f)
363             /* PyUnicode_AS_UNICODE OK without thread
364                lock as it is a simple dereference. */
365             f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
366                               PyUnicode_AS_UNICODE(wmode));
367             FILE_END_ALLOW_THREADS(f)
368         }
369         Py_XDECREF(wmode);
370     }
371 #endif
372     if (NULL == f->f_fp && NULL != name) {
373         FILE_BEGIN_ALLOW_THREADS(f)
374         f->f_fp = fopen(name, newmode);
375         FILE_END_ALLOW_THREADS(f)
376     }
377 
378     if (f->f_fp == NULL) {
379 #if defined  _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
380         /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
381          * across all Windows flavors.  When it sets EINVAL varies
382          * across Windows flavors, the exact conditions aren't
383          * documented, and the answer lies in the OS's implementation
384          * of Win32's CreateFile function (whose source is secret).
385          * Seems the best we can do is map EINVAL to ENOENT.
386          * Starting with Visual Studio .NET 2005, EINVAL is correctly
387          * set by our CRT error handler (set in exceptions.c.)
388          */
389         if (errno == 0)         /* bad mode string */
390             errno = EINVAL;
391         else if (errno == EINVAL) /* unknown, but not a mode string */
392             errno = ENOENT;
393 #endif
394         /* EINVAL is returned when an invalid filename or
395          * an invalid mode is supplied. */
396         if (errno == EINVAL) {
397             PyObject *v;
398             char message[100];
399             PyOS_snprintf(message, 100,
400                 "invalid mode ('%.50s') or filename", mode);
401             v = Py_BuildValue("(isO)", errno, message, f->f_name);
402             if (v != NULL) {
403                 PyErr_SetObject(PyExc_IOError, v);
404                 Py_DECREF(v);
405             }
406         }
407         else
408             PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
409         f = NULL;
410     }
411     if (f != NULL)
412         f = dircheck(f);
413 
414 cleanup:
415     PyMem_FREE(newmode);
416 
417     return (PyObject *)f;
418 }
419 
420 static PyObject *
close_the_file(PyFileObject * f)421 close_the_file(PyFileObject *f)
422 {
423     int sts = 0;
424     int (*local_close)(FILE *);
425     FILE *local_fp = f->f_fp;
426     char *local_setbuf = f->f_setbuf;
427     if (local_fp != NULL) {
428         local_close = f->f_close;
429         if (local_close != NULL && f->unlocked_count > 0) {
430             if (f->ob_refcnt > 0) {
431                 PyErr_SetString(PyExc_IOError,
432                     "close() called during concurrent "
433                     "operation on the same file object.");
434             } else {
435                 /* This should not happen unless someone is
436                  * carelessly playing with the PyFileObject
437                  * struct fields and/or its associated FILE
438                  * pointer. */
439                 PyErr_SetString(PyExc_SystemError,
440                     "PyFileObject locking error in "
441                     "destructor (refcnt <= 0 at close).");
442             }
443             return NULL;
444         }
445         /* NULL out the FILE pointer before releasing the GIL, because
446          * it will not be valid anymore after the close() function is
447          * called. */
448         f->f_fp = NULL;
449         if (local_close != NULL) {
450             /* Issue #9295: must temporarily reset f_setbuf so that another
451                thread doesn't free it when running file_close() concurrently.
452                Otherwise this close() will crash when flushing the buffer. */
453             f->f_setbuf = NULL;
454             Py_BEGIN_ALLOW_THREADS
455             errno = 0;
456             sts = (*local_close)(local_fp);
457             Py_END_ALLOW_THREADS
458             f->f_setbuf = local_setbuf;
459             if (sts == EOF)
460                 return PyErr_SetFromErrno(PyExc_IOError);
461             if (sts != 0)
462                 return PyInt_FromLong((long)sts);
463         }
464     }
465     Py_RETURN_NONE;
466 }
467 
468 PyObject *
PyFile_FromFile(FILE * fp,char * name,char * mode,int (* close)(FILE *))469 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
470 {
471     PyFileObject *f;
472     PyObject *o_name;
473 
474     f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type, NULL, NULL);
475     if (f == NULL)
476         return NULL;
477     o_name = PyString_FromString(name);
478     if (o_name == NULL) {
479         if (close != NULL && fp != NULL)
480             close(fp);
481         Py_DECREF(f);
482         return NULL;
483     }
484     if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
485         Py_DECREF(f);
486         Py_DECREF(o_name);
487         return NULL;
488     }
489     Py_DECREF(o_name);
490     return (PyObject *)f;
491 }
492 
493 PyObject *
PyFile_FromString(char * name,char * mode)494 PyFile_FromString(char *name, char *mode)
495 {
496     extern int fclose(FILE *);
497     PyFileObject *f;
498 
499     f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
500     if (f != NULL) {
501         if (open_the_file(f, name, mode) == NULL) {
502             Py_DECREF(f);
503             f = NULL;
504         }
505     }
506     return (PyObject *)f;
507 }
508 
509 void
PyFile_SetBufSize(PyObject * f,int bufsize)510 PyFile_SetBufSize(PyObject *f, int bufsize)
511 {
512     PyFileObject *file = (PyFileObject *)f;
513     if (bufsize >= 0) {
514         int type;
515         switch (bufsize) {
516         case 0:
517             type = _IONBF;
518             break;
519 #ifdef HAVE_SETVBUF
520         case 1:
521             type = _IOLBF;
522             bufsize = BUFSIZ;
523             break;
524 #endif
525         default:
526             type = _IOFBF;
527 #ifndef HAVE_SETVBUF
528             bufsize = BUFSIZ;
529 #endif
530             break;
531         }
532         fflush(file->f_fp);
533         if (type == _IONBF) {
534             PyMem_Free(file->f_setbuf);
535             file->f_setbuf = NULL;
536         } else {
537             file->f_setbuf = (char *)PyMem_Realloc(file->f_setbuf,
538                                                     bufsize);
539         }
540 #ifdef HAVE_SETVBUF
541         setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
542 #else /* !HAVE_SETVBUF */
543         setbuf(file->f_fp, file->f_setbuf);
544 #endif /* !HAVE_SETVBUF */
545     }
546 }
547 
548 /* Set the encoding used to output Unicode strings.
549    Return 1 on success, 0 on failure. */
550 
551 int
PyFile_SetEncoding(PyObject * f,const char * enc)552 PyFile_SetEncoding(PyObject *f, const char *enc)
553 {
554     return PyFile_SetEncodingAndErrors(f, enc, NULL);
555 }
556 
557 int
PyFile_SetEncodingAndErrors(PyObject * f,const char * enc,char * errors)558 PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
559 {
560     PyFileObject *file = (PyFileObject*)f;
561     PyObject *str, *oerrors;
562 
563     assert(PyFile_Check(f));
564     str = PyString_FromString(enc);
565     if (!str)
566         return 0;
567     if (errors) {
568         oerrors = PyString_FromString(errors);
569         if (!oerrors) {
570             Py_DECREF(str);
571             return 0;
572         }
573     } else {
574         oerrors = Py_None;
575         Py_INCREF(Py_None);
576     }
577     Py_DECREF(file->f_encoding);
578     file->f_encoding = str;
579     Py_DECREF(file->f_errors);
580     file->f_errors = oerrors;
581     return 1;
582 }
583 
584 static PyObject *
err_closed(void)585 err_closed(void)
586 {
587     PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
588     return NULL;
589 }
590 
591 static PyObject *
err_mode(char * action)592 err_mode(char *action)
593 {
594     PyErr_Format(PyExc_IOError, "File not open for %s", action);
595     return NULL;
596 }
597 
598 /* Refuse regular file I/O if there's data in the iteration-buffer.
599  * Mixing them would cause data to arrive out of order, as the read*
600  * methods don't use the iteration buffer. */
601 static PyObject *
err_iterbuffered(void)602 err_iterbuffered(void)
603 {
604     PyErr_SetString(PyExc_ValueError,
605         "Mixing iteration and read methods would lose data");
606     return NULL;
607 }
608 
609 static void drop_readahead(PyFileObject *);
610 
611 /* Methods */
612 
613 static void
file_dealloc(PyFileObject * f)614 file_dealloc(PyFileObject *f)
615 {
616     PyObject *ret;
617     if (f->weakreflist != NULL)
618         PyObject_ClearWeakRefs((PyObject *) f);
619     ret = close_the_file(f);
620     if (!ret) {
621         PySys_WriteStderr("close failed in file object destructor:\n");
622         PyErr_Print();
623     }
624     else {
625         Py_DECREF(ret);
626     }
627     PyMem_Free(f->f_setbuf);
628     Py_XDECREF(f->f_name);
629     Py_XDECREF(f->f_mode);
630     Py_XDECREF(f->f_encoding);
631     Py_XDECREF(f->f_errors);
632     drop_readahead(f);
633     Py_TYPE(f)->tp_free((PyObject *)f);
634 }
635 
636 static PyObject *
file_repr(PyFileObject * f)637 file_repr(PyFileObject *f)
638 {
639     PyObject *ret = NULL;
640     PyObject *name = NULL;
641     if (PyUnicode_Check(f->f_name)) {
642 #ifdef Py_USING_UNICODE
643         const char *name_str;
644         name = PyUnicode_AsUnicodeEscapeString(f->f_name);
645         name_str = name ? PyString_AsString(name) : "?";
646         ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
647                            f->f_fp == NULL ? "closed" : "open",
648                            name_str,
649                            PyString_AsString(f->f_mode),
650                            f);
651         Py_XDECREF(name);
652         return ret;
653 #endif
654     } else {
655         name = PyObject_Repr(f->f_name);
656         if (name == NULL)
657             return NULL;
658         ret = PyString_FromFormat("<%s file %s, mode '%s' at %p>",
659                            f->f_fp == NULL ? "closed" : "open",
660                            PyString_AsString(name),
661                            PyString_AsString(f->f_mode),
662                            f);
663         Py_XDECREF(name);
664         return ret;
665     }
666 }
667 
668 static PyObject *
file_close(PyFileObject * f)669 file_close(PyFileObject *f)
670 {
671     PyObject *sts = close_the_file(f);
672     if (sts) {
673         PyMem_Free(f->f_setbuf);
674         f->f_setbuf = NULL;
675     }
676     return sts;
677 }
678 
679 
680 /* Our very own off_t-like type, 64-bit if possible */
681 #if !defined(HAVE_LARGEFILE_SUPPORT)
682 typedef off_t Py_off_t;
683 #elif SIZEOF_OFF_T >= 8
684 typedef off_t Py_off_t;
685 #elif SIZEOF_FPOS_T >= 8
686 typedef fpos_t Py_off_t;
687 #else
688 #error "Large file support, but neither off_t nor fpos_t is large enough."
689 #endif
690 
691 
692 /* a portable fseek() function
693    return 0 on success, non-zero on failure (with errno set) */
694 static int
_portable_fseek(FILE * fp,Py_off_t offset,int whence)695 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
696 {
697 #if !defined(HAVE_LARGEFILE_SUPPORT)
698     return fseek(fp, offset, whence);
699 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
700     return fseeko(fp, offset, whence);
701 #elif defined(HAVE_FSEEK64)
702     return fseek64(fp, offset, whence);
703 #elif defined(__BEOS__)
704     return _fseek(fp, offset, whence);
705 #elif SIZEOF_FPOS_T >= 8
706     /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
707        and fgetpos() to implement fseek()*/
708     fpos_t pos;
709     switch (whence) {
710     case SEEK_END:
711 #ifdef MS_WINDOWS
712         fflush(fp);
713         if (_lseeki64(fileno(fp), 0, 2) == -1)
714             return -1;
715 #else
716         if (fseek(fp, 0, SEEK_END) != 0)
717             return -1;
718 #endif
719         /* fall through */
720     case SEEK_CUR:
721         if (fgetpos(fp, &pos) != 0)
722             return -1;
723         offset += pos;
724         break;
725     /* case SEEK_SET: break; */
726     }
727     return fsetpos(fp, &offset);
728 #else
729 #error "Large file support, but no way to fseek."
730 #endif
731 }
732 
733 
734 /* a portable ftell() function
735    Return -1 on failure with errno set appropriately, current file
736    position on success */
737 static Py_off_t
_portable_ftell(FILE * fp)738 _portable_ftell(FILE* fp)
739 {
740 #if !defined(HAVE_LARGEFILE_SUPPORT)
741     return ftell(fp);
742 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
743     return ftello(fp);
744 #elif defined(HAVE_FTELL64)
745     return ftell64(fp);
746 #elif SIZEOF_FPOS_T >= 8
747     fpos_t pos;
748     if (fgetpos(fp, &pos) != 0)
749         return -1;
750     return pos;
751 #else
752 #error "Large file support, but no way to ftell."
753 #endif
754 }
755 
756 
757 static PyObject *
file_seek(PyFileObject * f,PyObject * args)758 file_seek(PyFileObject *f, PyObject *args)
759 {
760     int whence;
761     int ret;
762     Py_off_t offset;
763     PyObject *offobj, *off_index;
764 
765     if (f->f_fp == NULL)
766         return err_closed();
767     drop_readahead(f);
768     whence = 0;
769     if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
770         return NULL;
771     off_index = PyNumber_Index(offobj);
772     if (!off_index) {
773         if (!PyFloat_Check(offobj))
774             return NULL;
775         /* Deprecated in 2.6 */
776         PyErr_Clear();
777         if (PyErr_WarnEx(PyExc_DeprecationWarning,
778                          "integer argument expected, got float",
779                          1) < 0)
780             return NULL;
781         off_index = offobj;
782         Py_INCREF(offobj);
783     }
784 #if !defined(HAVE_LARGEFILE_SUPPORT)
785     offset = PyInt_AsLong(off_index);
786 #else
787     offset = PyLong_Check(off_index) ?
788         PyLong_AsLongLong(off_index) : PyInt_AsLong(off_index);
789 #endif
790     Py_DECREF(off_index);
791     if (PyErr_Occurred())
792         return NULL;
793 
794     FILE_BEGIN_ALLOW_THREADS(f)
795     errno = 0;
796     ret = _portable_fseek(f->f_fp, offset, whence);
797     FILE_END_ALLOW_THREADS(f)
798 
799     if (ret != 0) {
800         PyErr_SetFromErrno(PyExc_IOError);
801         clearerr(f->f_fp);
802         return NULL;
803     }
804     f->f_skipnextlf = 0;
805     Py_INCREF(Py_None);
806     return Py_None;
807 }
808 
809 
810 #ifdef HAVE_FTRUNCATE
811 static PyObject *
file_truncate(PyFileObject * f,PyObject * args)812 file_truncate(PyFileObject *f, PyObject *args)
813 {
814     Py_off_t newsize;
815     PyObject *newsizeobj = NULL;
816     Py_off_t initialpos;
817     int ret;
818 
819     if (f->f_fp == NULL)
820         return err_closed();
821     if (!f->writable)
822         return err_mode("writing");
823     if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
824         return NULL;
825 
826     /* Get current file position.  If the file happens to be open for
827      * update and the last operation was an input operation, C doesn't
828      * define what the later fflush() will do, but we promise truncate()
829      * won't change the current position (and fflush() *does* change it
830      * then at least on Windows).  The easiest thing is to capture
831      * current pos now and seek back to it at the end.
832      */
833     FILE_BEGIN_ALLOW_THREADS(f)
834     errno = 0;
835     initialpos = _portable_ftell(f->f_fp);
836     FILE_END_ALLOW_THREADS(f)
837     if (initialpos == -1)
838         goto onioerror;
839 
840     /* Set newsize to current postion if newsizeobj NULL, else to the
841      * specified value.
842      */
843     if (newsizeobj != NULL) {
844 #if !defined(HAVE_LARGEFILE_SUPPORT)
845         newsize = PyInt_AsLong(newsizeobj);
846 #else
847         newsize = PyLong_Check(newsizeobj) ?
848                         PyLong_AsLongLong(newsizeobj) :
849                 PyInt_AsLong(newsizeobj);
850 #endif
851         if (PyErr_Occurred())
852             return NULL;
853     }
854     else /* default to current position */
855         newsize = initialpos;
856 
857     /* Flush the stream.  We're mixing stream-level I/O with lower-level
858      * I/O, and a flush may be necessary to synch both platform views
859      * of the current file state.
860      */
861     FILE_BEGIN_ALLOW_THREADS(f)
862     errno = 0;
863     ret = fflush(f->f_fp);
864     FILE_END_ALLOW_THREADS(f)
865     if (ret != 0)
866         goto onioerror;
867 
868 #ifdef MS_WINDOWS
869     /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
870        so don't even try using it. */
871     {
872         HANDLE hFile;
873 
874         /* Have to move current pos to desired endpoint on Windows. */
875         FILE_BEGIN_ALLOW_THREADS(f)
876         errno = 0;
877         ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
878         FILE_END_ALLOW_THREADS(f)
879         if (ret)
880             goto onioerror;
881 
882         /* Truncate.  Note that this may grow the file! */
883         FILE_BEGIN_ALLOW_THREADS(f)
884         errno = 0;
885         hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
886         ret = hFile == (HANDLE)-1;
887         if (ret == 0) {
888             ret = SetEndOfFile(hFile) == 0;
889             if (ret)
890                 errno = EACCES;
891         }
892         FILE_END_ALLOW_THREADS(f)
893         if (ret)
894             goto onioerror;
895     }
896 #else
897     FILE_BEGIN_ALLOW_THREADS(f)
898     errno = 0;
899     ret = ftruncate(fileno(f->f_fp), newsize);
900     FILE_END_ALLOW_THREADS(f)
901     if (ret != 0)
902         goto onioerror;
903 #endif /* !MS_WINDOWS */
904 
905     /* Restore original file position. */
906     FILE_BEGIN_ALLOW_THREADS(f)
907     errno = 0;
908     ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
909     FILE_END_ALLOW_THREADS(f)
910     if (ret)
911         goto onioerror;
912 
913     Py_INCREF(Py_None);
914     return Py_None;
915 
916 onioerror:
917     PyErr_SetFromErrno(PyExc_IOError);
918     clearerr(f->f_fp);
919     return NULL;
920 }
921 #endif /* HAVE_FTRUNCATE */
922 
923 static PyObject *
file_tell(PyFileObject * f)924 file_tell(PyFileObject *f)
925 {
926     Py_off_t pos;
927 
928     if (f->f_fp == NULL)
929         return err_closed();
930     FILE_BEGIN_ALLOW_THREADS(f)
931     errno = 0;
932     pos = _portable_ftell(f->f_fp);
933     FILE_END_ALLOW_THREADS(f)
934 
935     if (pos == -1) {
936         PyErr_SetFromErrno(PyExc_IOError);
937         clearerr(f->f_fp);
938         return NULL;
939     }
940     if (f->f_skipnextlf) {
941         int c;
942         c = GETC(f->f_fp);
943         if (c == '\n') {
944             f->f_newlinetypes |= NEWLINE_CRLF;
945             pos++;
946             f->f_skipnextlf = 0;
947         } else if (c != EOF) ungetc(c, f->f_fp);
948     }
949 #if !defined(HAVE_LARGEFILE_SUPPORT)
950     return PyInt_FromLong(pos);
951 #else
952     return PyLong_FromLongLong(pos);
953 #endif
954 }
955 
956 static PyObject *
file_fileno(PyFileObject * f)957 file_fileno(PyFileObject *f)
958 {
959     if (f->f_fp == NULL)
960         return err_closed();
961     return PyInt_FromLong((long) fileno(f->f_fp));
962 }
963 
964 static PyObject *
file_flush(PyFileObject * f)965 file_flush(PyFileObject *f)
966 {
967     int res;
968 
969     if (f->f_fp == NULL)
970         return err_closed();
971     FILE_BEGIN_ALLOW_THREADS(f)
972     errno = 0;
973     res = fflush(f->f_fp);
974     FILE_END_ALLOW_THREADS(f)
975     if (res != 0) {
976         PyErr_SetFromErrno(PyExc_IOError);
977         clearerr(f->f_fp);
978         return NULL;
979     }
980     Py_INCREF(Py_None);
981     return Py_None;
982 }
983 
984 static PyObject *
file_isatty(PyFileObject * f)985 file_isatty(PyFileObject *f)
986 {
987     long res;
988     if (f->f_fp == NULL)
989         return err_closed();
990     FILE_BEGIN_ALLOW_THREADS(f)
991     res = isatty((int)fileno(f->f_fp));
992     FILE_END_ALLOW_THREADS(f)
993     return PyBool_FromLong(res);
994 }
995 
996 
997 #if BUFSIZ < 8192
998 #define SMALLCHUNK 8192
999 #else
1000 #define SMALLCHUNK BUFSIZ
1001 #endif
1002 
1003 static size_t
new_buffersize(PyFileObject * f,size_t currentsize)1004 new_buffersize(PyFileObject *f, size_t currentsize)
1005 {
1006 #ifdef HAVE_FSTAT
1007     off_t pos, end;
1008     struct stat st;
1009     if (fstat(fileno(f->f_fp), &st) == 0) {
1010         end = st.st_size;
1011         /* The following is not a bug: we really need to call lseek()
1012            *and* ftell().  The reason is that some stdio libraries
1013            mistakenly flush their buffer when ftell() is called and
1014            the lseek() call it makes fails, thereby throwing away
1015            data that cannot be recovered in any way.  To avoid this,
1016            we first test lseek(), and only call ftell() if lseek()
1017            works.  We can't use the lseek() value either, because we
1018            need to take the amount of buffered data into account.
1019            (Yet another reason why stdio stinks. :-) */
1020         pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
1021         if (pos >= 0) {
1022             pos = ftell(f->f_fp);
1023         }
1024         if (pos < 0)
1025             clearerr(f->f_fp);
1026         if (end > pos && pos >= 0)
1027             return currentsize + end - pos + 1;
1028         /* Add 1 so if the file were to grow we'd notice. */
1029     }
1030 #endif
1031     /* Expand the buffer by an amount proportional to the current size,
1032        giving us amortized linear-time behavior. Use a less-than-double
1033        growth factor to avoid excessive allocation. */
1034     return currentsize + (currentsize >> 3) + 6;
1035 }
1036 
1037 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
1038 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
1039 #else
1040 #ifdef EWOULDBLOCK
1041 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
1042 #else
1043 #ifdef EAGAIN
1044 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
1045 #else
1046 #define BLOCKED_ERRNO(x) 0
1047 #endif
1048 #endif
1049 #endif
1050 
1051 static PyObject *
file_read(PyFileObject * f,PyObject * args)1052 file_read(PyFileObject *f, PyObject *args)
1053 {
1054     long bytesrequested = -1;
1055     size_t bytesread, buffersize, chunksize;
1056     PyObject *v;
1057 
1058     if (f->f_fp == NULL)
1059         return err_closed();
1060     if (!f->readable)
1061         return err_mode("reading");
1062     /* refuse to mix with f.next() */
1063     if (f->f_buf != NULL &&
1064         (f->f_bufend - f->f_bufptr) > 0 &&
1065         f->f_buf[0] != '\0')
1066         return err_iterbuffered();
1067     if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
1068         return NULL;
1069     if (bytesrequested < 0)
1070         buffersize = new_buffersize(f, (size_t)0);
1071     else
1072         buffersize = bytesrequested;
1073     if (buffersize > PY_SSIZE_T_MAX) {
1074         PyErr_SetString(PyExc_OverflowError,
1075     "requested number of bytes is more than a Python string can hold");
1076         return NULL;
1077     }
1078     v = PyString_FromStringAndSize((char *)NULL, buffersize);
1079     if (v == NULL)
1080         return NULL;
1081     bytesread = 0;
1082     for (;;) {
1083         int interrupted;
1084         FILE_BEGIN_ALLOW_THREADS(f)
1085         errno = 0;
1086         chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
1087                   buffersize - bytesread, f->f_fp, (PyObject *)f);
1088         interrupted = ferror(f->f_fp) && errno == EINTR;
1089         FILE_END_ALLOW_THREADS(f)
1090         if (interrupted) {
1091             clearerr(f->f_fp);
1092             if (PyErr_CheckSignals()) {
1093                 Py_DECREF(v);
1094                 return NULL;
1095             }
1096         }
1097         if (chunksize == 0) {
1098             if (interrupted)
1099                 continue;
1100             if (!ferror(f->f_fp))
1101                 break;
1102             clearerr(f->f_fp);
1103             /* When in non-blocking mode, data shouldn't
1104              * be discarded if a blocking signal was
1105              * received. That will also happen if
1106              * chunksize != 0, but bytesread < buffersize. */
1107             if (bytesread > 0 && BLOCKED_ERRNO(errno))
1108                 break;
1109             PyErr_SetFromErrno(PyExc_IOError);
1110             Py_DECREF(v);
1111             return NULL;
1112         }
1113         bytesread += chunksize;
1114         if (bytesread < buffersize && !interrupted) {
1115             clearerr(f->f_fp);
1116             break;
1117         }
1118         if (bytesrequested < 0) {
1119             buffersize = new_buffersize(f, buffersize);
1120             if (_PyString_Resize(&v, buffersize) < 0)
1121                 return NULL;
1122         } else {
1123             /* Got what was requested. */
1124             break;
1125         }
1126     }
1127     if (bytesread != buffersize && _PyString_Resize(&v, bytesread))
1128         return NULL;
1129     return v;
1130 }
1131 
1132 static PyObject *
file_readinto(PyFileObject * f,PyObject * args)1133 file_readinto(PyFileObject *f, PyObject *args)
1134 {
1135     char *ptr;
1136     Py_ssize_t ntodo;
1137     Py_ssize_t ndone, nnow;
1138     Py_buffer pbuf;
1139 
1140     if (f->f_fp == NULL)
1141         return err_closed();
1142     if (!f->readable)
1143         return err_mode("reading");
1144     /* refuse to mix with f.next() */
1145     if (f->f_buf != NULL &&
1146         (f->f_bufend - f->f_bufptr) > 0 &&
1147         f->f_buf[0] != '\0')
1148         return err_iterbuffered();
1149     if (!PyArg_ParseTuple(args, "w*", &pbuf))
1150         return NULL;
1151     ptr = pbuf.buf;
1152     ntodo = pbuf.len;
1153     ndone = 0;
1154     while (ntodo > 0) {
1155         int interrupted;
1156         FILE_BEGIN_ALLOW_THREADS(f)
1157         errno = 0;
1158         nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
1159                                         (PyObject *)f);
1160         interrupted = ferror(f->f_fp) && errno == EINTR;
1161         FILE_END_ALLOW_THREADS(f)
1162         if (interrupted) {
1163             clearerr(f->f_fp);
1164             if (PyErr_CheckSignals()) {
1165                 PyBuffer_Release(&pbuf);
1166                 return NULL;
1167             }
1168         }
1169         if (nnow == 0) {
1170             if (interrupted)
1171                 continue;
1172             if (!ferror(f->f_fp))
1173                 break;
1174             PyErr_SetFromErrno(PyExc_IOError);
1175             clearerr(f->f_fp);
1176             PyBuffer_Release(&pbuf);
1177             return NULL;
1178         }
1179         ndone += nnow;
1180         ntodo -= nnow;
1181     }
1182     PyBuffer_Release(&pbuf);
1183     return PyInt_FromSsize_t(ndone);
1184 }
1185 
1186 /**************************************************************************
1187 Routine to get next line using platform fgets().
1188 
1189 Under MSVC 6:
1190 
1191 + MS threadsafe getc is very slow (multiple layers of function calls before+
1192   after each character, to lock+unlock the stream).
1193 + The stream-locking functions are MS-internal -- can't access them from user
1194   code.
1195 + There's nothing Tim could find in the MS C or platform SDK libraries that
1196   can worm around this.
1197 + MS fgets locks/unlocks only once per line; it's the only hook we have.
1198 
1199 So we use fgets for speed(!), despite that it's painful.
1200 
1201 MS realloc is also slow.
1202 
1203 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
1204 have):
1205     Linux               a wash
1206     Solaris             a wash
1207     Tru64 Unix          getline_via_fgets significantly faster
1208 
1209 CAUTION:  The C std isn't clear about this:  in those cases where fgets
1210 writes something into the buffer, can it write into any position beyond the
1211 required trailing null byte?  MSVC 6 fgets does not, and no platform is (yet)
1212 known on which it does; and it would be a strange way to code fgets. Still,
1213 getline_via_fgets may not work correctly if it does.  The std test
1214 test_bufio.py should fail if platform fgets() routinely writes beyond the
1215 trailing null byte.  #define DONT_USE_FGETS_IN_GETLINE to disable this code.
1216 **************************************************************************/
1217 
1218 /* Use this routine if told to, or by default on non-get_unlocked()
1219  * platforms unless told not to.  Yikes!  Let's spell that out:
1220  * On a platform with getc_unlocked():
1221  *     By default, use getc_unlocked().
1222  *     If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
1223  * On a platform without getc_unlocked():
1224  *     By default, use fgets().
1225  *     If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
1226  */
1227 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
1228 #define USE_FGETS_IN_GETLINE
1229 #endif
1230 
1231 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
1232 #undef USE_FGETS_IN_GETLINE
1233 #endif
1234 
1235 #ifdef USE_FGETS_IN_GETLINE
1236 static PyObject*
getline_via_fgets(PyFileObject * f,FILE * fp)1237 getline_via_fgets(PyFileObject *f, FILE *fp)
1238 {
1239 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
1240  * no-realloc, one-fgets()-call path.  Boosting it isn't free, because we have
1241  * to fill this much of the buffer with a known value in order to figure out
1242  * how much of the buffer fgets() overwrites.  So if INITBUFSIZE is larger
1243  * than "most" lines, we waste time filling unused buffer slots.  100 is
1244  * surely adequate for most peoples' email archives, chewing over source code,
1245  * etc -- "regular old text files".
1246  * MAXBUFSIZE is the maximum line length that lets us get away with the less
1247  * fast (but still zippy) no-realloc, two-fgets()-call path.  See above for
1248  * cautions about boosting that.  300 was chosen because the worst real-life
1249  * text-crunching job reported on Python-Dev was a mail-log crawler where over
1250  * half the lines were 254 chars.
1251  */
1252 #define INITBUFSIZE 100
1253 #define MAXBUFSIZE 300
1254     char* p;            /* temp */
1255     char buf[MAXBUFSIZE];
1256     PyObject* v;        /* the string object result */
1257     char* pvfree;       /* address of next free slot */
1258     char* pvend;    /* address one beyond last free slot */
1259     size_t nfree;       /* # of free buffer slots; pvend-pvfree */
1260     size_t total_v_size;  /* total # of slots in buffer */
1261     size_t increment;           /* amount to increment the buffer */
1262     size_t prev_v_size;
1263 
1264     /* Optimize for normal case:  avoid _PyString_Resize if at all
1265      * possible via first reading into stack buffer "buf".
1266      */
1267     total_v_size = INITBUFSIZE;         /* start small and pray */
1268     pvfree = buf;
1269     for (;;) {
1270         FILE_BEGIN_ALLOW_THREADS(f)
1271         pvend = buf + total_v_size;
1272         nfree = pvend - pvfree;
1273         memset(pvfree, '\n', nfree);
1274         assert(nfree < INT_MAX); /* Should be atmost MAXBUFSIZE */
1275         p = fgets(pvfree, (int)nfree, fp);
1276         FILE_END_ALLOW_THREADS(f)
1277 
1278         if (p == NULL) {
1279             clearerr(fp);
1280             if (PyErr_CheckSignals())
1281                 return NULL;
1282             v = PyString_FromStringAndSize(buf, pvfree - buf);
1283             return v;
1284         }
1285         /* fgets read *something* */
1286         p = memchr(pvfree, '\n', nfree);
1287         if (p != NULL) {
1288             /* Did the \n come from fgets or from us?
1289              * Since fgets stops at the first \n, and then writes
1290              * \0, if it's from fgets a \0 must be next.  But if
1291              * that's so, it could not have come from us, since
1292              * the \n's we filled the buffer with have only more
1293              * \n's to the right.
1294              */
1295             if (p+1 < pvend && *(p+1) == '\0') {
1296                 /* It's from fgets:  we win!  In particular,
1297                  * we haven't done any mallocs yet, and can
1298                  * build the final result on the first try.
1299                  */
1300                 ++p;                    /* include \n from fgets */
1301             }
1302             else {
1303                 /* Must be from us:  fgets didn't fill the
1304                  * buffer and didn't find a newline, so it
1305                  * must be the last and newline-free line of
1306                  * the file.
1307                  */
1308                 assert(p > pvfree && *(p-1) == '\0');
1309                 --p;                    /* don't include \0 from fgets */
1310             }
1311             v = PyString_FromStringAndSize(buf, p - buf);
1312             return v;
1313         }
1314         /* yuck:  fgets overwrote all the newlines, i.e. the entire
1315          * buffer.  So this line isn't over yet, or maybe it is but
1316          * we're exactly at EOF.  If we haven't already, try using the
1317          * rest of the stack buffer.
1318          */
1319         assert(*(pvend-1) == '\0');
1320         if (pvfree == buf) {
1321             pvfree = pvend - 1;                 /* overwrite trailing null */
1322             total_v_size = MAXBUFSIZE;
1323         }
1324         else
1325             break;
1326     }
1327 
1328     /* The stack buffer isn't big enough; malloc a string object and read
1329      * into its buffer.
1330      */
1331     total_v_size = MAXBUFSIZE << 1;
1332     v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1333     if (v == NULL)
1334         return v;
1335     /* copy over everything except the last null byte */
1336     memcpy(BUF(v), buf, MAXBUFSIZE-1);
1337     pvfree = BUF(v) + MAXBUFSIZE - 1;
1338 
1339     /* Keep reading stuff into v; if it ever ends successfully, break
1340      * after setting p one beyond the end of the line.  The code here is
1341      * very much like the code above, except reads into v's buffer; see
1342      * the code above for detailed comments about the logic.
1343      */
1344     for (;;) {
1345         FILE_BEGIN_ALLOW_THREADS(f)
1346         pvend = BUF(v) + total_v_size;
1347         nfree = pvend - pvfree;
1348         memset(pvfree, '\n', nfree);
1349         assert(nfree < INT_MAX);
1350         p = fgets(pvfree, (int)nfree, fp);
1351         FILE_END_ALLOW_THREADS(f)
1352 
1353         if (p == NULL) {
1354             clearerr(fp);
1355             if (PyErr_CheckSignals()) {
1356                 Py_DECREF(v);
1357                 return NULL;
1358             }
1359             p = pvfree;
1360             break;
1361         }
1362         p = memchr(pvfree, '\n', nfree);
1363         if (p != NULL) {
1364             if (p+1 < pvend && *(p+1) == '\0') {
1365                 /* \n came from fgets */
1366                 ++p;
1367                 break;
1368             }
1369             /* \n came from us; last line of file, no newline */
1370             assert(p > pvfree && *(p-1) == '\0');
1371             --p;
1372             break;
1373         }
1374         /* expand buffer and try again */
1375         assert(*(pvend-1) == '\0');
1376         increment = total_v_size >> 2;          /* mild exponential growth */
1377         prev_v_size = total_v_size;
1378         total_v_size += increment;
1379         /* check for overflow */
1380         if (total_v_size <= prev_v_size ||
1381             total_v_size > PY_SSIZE_T_MAX) {
1382             PyErr_SetString(PyExc_OverflowError,
1383                 "line is longer than a Python string can hold");
1384             Py_DECREF(v);
1385             return NULL;
1386         }
1387         if (_PyString_Resize(&v, (int)total_v_size) < 0)
1388             return NULL;
1389         /* overwrite the trailing null byte */
1390         pvfree = BUF(v) + (prev_v_size - 1);
1391     }
1392     if (BUF(v) + total_v_size != p && _PyString_Resize(&v, p - BUF(v)))
1393         return NULL;
1394     return v;
1395 #undef INITBUFSIZE
1396 #undef MAXBUFSIZE
1397 }
1398 #endif  /* ifdef USE_FGETS_IN_GETLINE */
1399 
1400 /* Internal routine to get a line.
1401    Size argument interpretation:
1402    > 0: max length;
1403    <= 0: read arbitrary line
1404 */
1405 
1406 static PyObject *
get_line(PyFileObject * f,int n)1407 get_line(PyFileObject *f, int n)
1408 {
1409     FILE *fp = f->f_fp;
1410     int c;
1411     char *buf, *end;
1412     size_t total_v_size;        /* total # of slots in buffer */
1413     size_t used_v_size;         /* # used slots in buffer */
1414     size_t increment;       /* amount to increment the buffer */
1415     PyObject *v;
1416     int newlinetypes = f->f_newlinetypes;
1417     int skipnextlf = f->f_skipnextlf;
1418     int univ_newline = f->f_univ_newline;
1419 
1420 #if defined(USE_FGETS_IN_GETLINE)
1421     if (n <= 0 && !univ_newline )
1422         return getline_via_fgets(f, fp);
1423 #endif
1424     total_v_size = n > 0 ? n : 100;
1425     v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1426     if (v == NULL)
1427         return NULL;
1428     buf = BUF(v);
1429     end = buf + total_v_size;
1430 
1431     for (;;) {
1432         FILE_BEGIN_ALLOW_THREADS(f)
1433         FLOCKFILE(fp);
1434         if (univ_newline) {
1435             c = 'x'; /* Shut up gcc warning */
1436             while ( buf != end && (c = GETC(fp)) != EOF ) {
1437                 if (skipnextlf ) {
1438                     skipnextlf = 0;
1439                     if (c == '\n') {
1440                         /* Seeing a \n here with
1441                          * skipnextlf true means we
1442                          * saw a \r before.
1443                          */
1444                         newlinetypes |= NEWLINE_CRLF;
1445                         c = GETC(fp);
1446                         if (c == EOF) break;
1447                     } else {
1448                         newlinetypes |= NEWLINE_CR;
1449                     }
1450                 }
1451                 if (c == '\r') {
1452                     skipnextlf = 1;
1453                     c = '\n';
1454                 } else if ( c == '\n')
1455                     newlinetypes |= NEWLINE_LF;
1456                 *buf++ = c;
1457                 if (c == '\n') break;
1458             }
1459             if (c == EOF) {
1460                 if (ferror(fp) && errno == EINTR) {
1461                     FUNLOCKFILE(fp);
1462                     FILE_ABORT_ALLOW_THREADS(f)
1463                     f->f_newlinetypes = newlinetypes;
1464                     f->f_skipnextlf = skipnextlf;
1465 
1466                     if (PyErr_CheckSignals()) {
1467                         Py_DECREF(v);
1468                         return NULL;
1469                     }
1470                     /* We executed Python signal handlers and got no exception.
1471                      * Now back to reading the line where we left off. */
1472                     clearerr(fp);
1473                     continue;
1474                 }
1475                 if (skipnextlf)
1476                     newlinetypes |= NEWLINE_CR;
1477             }
1478         } else /* If not universal newlines use the normal loop */
1479         while ((c = GETC(fp)) != EOF &&
1480                (*buf++ = c) != '\n' &&
1481             buf != end)
1482             ;
1483         FUNLOCKFILE(fp);
1484         FILE_END_ALLOW_THREADS(f)
1485         f->f_newlinetypes = newlinetypes;
1486         f->f_skipnextlf = skipnextlf;
1487         if (c == '\n')
1488             break;
1489         if (c == EOF) {
1490             if (ferror(fp)) {
1491                 if (errno == EINTR) {
1492                     if (PyErr_CheckSignals()) {
1493                         Py_DECREF(v);
1494                         return NULL;
1495                     }
1496                     /* We executed Python signal handlers and got no exception.
1497                      * Now back to reading the line where we left off. */
1498                     clearerr(fp);
1499                     continue;
1500                 }
1501                 PyErr_SetFromErrno(PyExc_IOError);
1502                 clearerr(fp);
1503                 Py_DECREF(v);
1504                 return NULL;
1505             }
1506             clearerr(fp);
1507             if (PyErr_CheckSignals()) {
1508                 Py_DECREF(v);
1509                 return NULL;
1510             }
1511             break;
1512         }
1513         /* Must be because buf == end */
1514         if (n > 0)
1515             break;
1516         used_v_size = total_v_size;
1517         increment = total_v_size >> 2; /* mild exponential growth */
1518         total_v_size += increment;
1519         if (total_v_size > PY_SSIZE_T_MAX) {
1520             PyErr_SetString(PyExc_OverflowError,
1521                 "line is longer than a Python string can hold");
1522             Py_DECREF(v);
1523             return NULL;
1524         }
1525         if (_PyString_Resize(&v, total_v_size) < 0)
1526             return NULL;
1527         buf = BUF(v) + used_v_size;
1528         end = BUF(v) + total_v_size;
1529     }
1530 
1531     used_v_size = buf - BUF(v);
1532     if (used_v_size != total_v_size && _PyString_Resize(&v, used_v_size))
1533         return NULL;
1534     return v;
1535 }
1536 
1537 /* External C interface */
1538 
1539 PyObject *
PyFile_GetLine(PyObject * f,int n)1540 PyFile_GetLine(PyObject *f, int n)
1541 {
1542     PyObject *result;
1543 
1544     if (f == NULL) {
1545         PyErr_BadInternalCall();
1546         return NULL;
1547     }
1548 
1549     if (PyFile_Check(f)) {
1550         PyFileObject *fo = (PyFileObject *)f;
1551         if (fo->f_fp == NULL)
1552             return err_closed();
1553         if (!fo->readable)
1554             return err_mode("reading");
1555         /* refuse to mix with f.next() */
1556         if (fo->f_buf != NULL &&
1557             (fo->f_bufend - fo->f_bufptr) > 0 &&
1558             fo->f_buf[0] != '\0')
1559             return err_iterbuffered();
1560         result = get_line(fo, n);
1561     }
1562     else {
1563         PyObject *reader;
1564         PyObject *args;
1565 
1566         reader = PyObject_GetAttrString(f, "readline");
1567         if (reader == NULL)
1568             return NULL;
1569         if (n <= 0)
1570             args = PyTuple_New(0);
1571         else
1572             args = Py_BuildValue("(i)", n);
1573         if (args == NULL) {
1574             Py_DECREF(reader);
1575             return NULL;
1576         }
1577         result = PyEval_CallObject(reader, args);
1578         Py_DECREF(reader);
1579         Py_DECREF(args);
1580         if (result != NULL && !PyString_Check(result) &&
1581             !PyUnicode_Check(result)) {
1582             Py_DECREF(result);
1583             result = NULL;
1584             PyErr_SetString(PyExc_TypeError,
1585                        "object.readline() returned non-string");
1586         }
1587     }
1588 
1589     if (n < 0 && result != NULL && PyString_Check(result)) {
1590         char *s = PyString_AS_STRING(result);
1591         Py_ssize_t len = PyString_GET_SIZE(result);
1592         if (len == 0) {
1593             Py_DECREF(result);
1594             result = NULL;
1595             PyErr_SetString(PyExc_EOFError,
1596                             "EOF when reading a line");
1597         }
1598         else if (s[len-1] == '\n') {
1599             if (result->ob_refcnt == 1) {
1600                 if (_PyString_Resize(&result, len-1))
1601                     return NULL;
1602             }
1603             else {
1604                 PyObject *v;
1605                 v = PyString_FromStringAndSize(s, len-1);
1606                 Py_DECREF(result);
1607                 result = v;
1608             }
1609         }
1610     }
1611 #ifdef Py_USING_UNICODE
1612     if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1613         Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1614         Py_ssize_t len = PyUnicode_GET_SIZE(result);
1615         if (len == 0) {
1616             Py_DECREF(result);
1617             result = NULL;
1618             PyErr_SetString(PyExc_EOFError,
1619                             "EOF when reading a line");
1620         }
1621         else if (s[len-1] == '\n') {
1622             if (result->ob_refcnt == 1)
1623                 PyUnicode_Resize(&result, len-1);
1624             else {
1625                 PyObject *v;
1626                 v = PyUnicode_FromUnicode(s, len-1);
1627                 Py_DECREF(result);
1628                 result = v;
1629             }
1630         }
1631     }
1632 #endif
1633     return result;
1634 }
1635 
1636 /* Python method */
1637 
1638 static PyObject *
file_readline(PyFileObject * f,PyObject * args)1639 file_readline(PyFileObject *f, PyObject *args)
1640 {
1641     int n = -1;
1642 
1643     if (f->f_fp == NULL)
1644         return err_closed();
1645     if (!f->readable)
1646         return err_mode("reading");
1647     /* refuse to mix with f.next() */
1648     if (f->f_buf != NULL &&
1649         (f->f_bufend - f->f_bufptr) > 0 &&
1650         f->f_buf[0] != '\0')
1651         return err_iterbuffered();
1652     if (!PyArg_ParseTuple(args, "|i:readline", &n))
1653         return NULL;
1654     if (n == 0)
1655         return PyString_FromString("");
1656     if (n < 0)
1657         n = 0;
1658     return get_line(f, n);
1659 }
1660 
1661 static PyObject *
file_readlines(PyFileObject * f,PyObject * args)1662 file_readlines(PyFileObject *f, PyObject *args)
1663 {
1664     long sizehint = 0;
1665     PyObject *list = NULL;
1666     PyObject *line;
1667     char small_buffer[SMALLCHUNK];
1668     char *buffer = small_buffer;
1669     size_t buffersize = SMALLCHUNK;
1670     PyObject *big_buffer = NULL;
1671     size_t nfilled = 0;
1672     size_t nread;
1673     size_t totalread = 0;
1674     char *p, *q, *end;
1675     int err;
1676     int shortread = 0;  /* bool, did the previous read come up short? */
1677 
1678     if (f->f_fp == NULL)
1679         return err_closed();
1680     if (!f->readable)
1681         return err_mode("reading");
1682     /* refuse to mix with f.next() */
1683     if (f->f_buf != NULL &&
1684         (f->f_bufend - f->f_bufptr) > 0 &&
1685         f->f_buf[0] != '\0')
1686         return err_iterbuffered();
1687     if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1688         return NULL;
1689     if ((list = PyList_New(0)) == NULL)
1690         return NULL;
1691     for (;;) {
1692         if (shortread)
1693             nread = 0;
1694         else {
1695             FILE_BEGIN_ALLOW_THREADS(f)
1696             errno = 0;
1697             nread = Py_UniversalNewlineFread(buffer+nfilled,
1698                 buffersize-nfilled, f->f_fp, (PyObject *)f);
1699             FILE_END_ALLOW_THREADS(f)
1700             shortread = (nread < buffersize-nfilled);
1701         }
1702         if (nread == 0) {
1703             sizehint = 0;
1704             if (!ferror(f->f_fp))
1705                 break;
1706             if (errno == EINTR) {
1707                 if (PyErr_CheckSignals()) {
1708                     goto error;
1709                 }
1710                 clearerr(f->f_fp);
1711                 shortread = 0;
1712                 continue;
1713             }
1714             PyErr_SetFromErrno(PyExc_IOError);
1715             clearerr(f->f_fp);
1716             goto error;
1717         }
1718         totalread += nread;
1719         p = (char *)memchr(buffer+nfilled, '\n', nread);
1720         if (p == NULL) {
1721             /* Need a larger buffer to fit this line */
1722             nfilled += nread;
1723             buffersize *= 2;
1724             if (buffersize > PY_SSIZE_T_MAX) {
1725                 PyErr_SetString(PyExc_OverflowError,
1726                 "line is longer than a Python string can hold");
1727                 goto error;
1728             }
1729             if (big_buffer == NULL) {
1730                 /* Create the big buffer */
1731                 big_buffer = PyString_FromStringAndSize(
1732                     NULL, buffersize);
1733                 if (big_buffer == NULL)
1734                     goto error;
1735                 buffer = PyString_AS_STRING(big_buffer);
1736                 memcpy(buffer, small_buffer, nfilled);
1737             }
1738             else {
1739                 /* Grow the big buffer */
1740                 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1741                     goto error;
1742                 buffer = PyString_AS_STRING(big_buffer);
1743             }
1744             continue;
1745         }
1746         end = buffer+nfilled+nread;
1747         q = buffer;
1748         do {
1749             /* Process complete lines */
1750             p++;
1751             line = PyString_FromStringAndSize(q, p-q);
1752             if (line == NULL)
1753                 goto error;
1754             err = PyList_Append(list, line);
1755             Py_DECREF(line);
1756             if (err != 0)
1757                 goto error;
1758             q = p;
1759             p = (char *)memchr(q, '\n', end-q);
1760         } while (p != NULL);
1761         /* Move the remaining incomplete line to the start */
1762         nfilled = end-q;
1763         memmove(buffer, q, nfilled);
1764         if (sizehint > 0)
1765             if (totalread >= (size_t)sizehint)
1766                 break;
1767     }
1768     if (nfilled != 0) {
1769         /* Partial last line */
1770         line = PyString_FromStringAndSize(buffer, nfilled);
1771         if (line == NULL)
1772             goto error;
1773         if (sizehint > 0) {
1774             /* Need to complete the last line */
1775             PyObject *rest = get_line(f, 0);
1776             if (rest == NULL) {
1777                 Py_DECREF(line);
1778                 goto error;
1779             }
1780             PyString_Concat(&line, rest);
1781             Py_DECREF(rest);
1782             if (line == NULL)
1783                 goto error;
1784         }
1785         err = PyList_Append(list, line);
1786         Py_DECREF(line);
1787         if (err != 0)
1788             goto error;
1789     }
1790 
1791 cleanup:
1792     Py_XDECREF(big_buffer);
1793     return list;
1794 
1795 error:
1796     Py_CLEAR(list);
1797     goto cleanup;
1798 }
1799 
1800 static PyObject *
file_write(PyFileObject * f,PyObject * args)1801 file_write(PyFileObject *f, PyObject *args)
1802 {
1803     Py_buffer pbuf;
1804     const char *s;
1805     Py_ssize_t n, n2;
1806     PyObject *encoded = NULL;
1807     int err_flag = 0, err;
1808 
1809     if (f->f_fp == NULL)
1810         return err_closed();
1811     if (!f->writable)
1812         return err_mode("writing");
1813     if (f->f_binary) {
1814         if (!PyArg_ParseTuple(args, "s*", &pbuf))
1815             return NULL;
1816         s = pbuf.buf;
1817         n = pbuf.len;
1818     }
1819     else {
1820         PyObject *text;
1821         if (!PyArg_ParseTuple(args, "O", &text))
1822             return NULL;
1823 
1824         if (PyString_Check(text)) {
1825             s = PyString_AS_STRING(text);
1826             n = PyString_GET_SIZE(text);
1827 #ifdef Py_USING_UNICODE
1828         } else if (PyUnicode_Check(text)) {
1829             const char *encoding, *errors;
1830             if (f->f_encoding != Py_None)
1831                 encoding = PyString_AS_STRING(f->f_encoding);
1832             else
1833                 encoding = PyUnicode_GetDefaultEncoding();
1834             if (f->f_errors != Py_None)
1835                 errors = PyString_AS_STRING(f->f_errors);
1836             else
1837                 errors = "strict";
1838             encoded = PyUnicode_AsEncodedString(text, encoding, errors);
1839             if (encoded == NULL)
1840                 return NULL;
1841             s = PyString_AS_STRING(encoded);
1842             n = PyString_GET_SIZE(encoded);
1843 #endif
1844         } else {
1845             if (PyObject_AsCharBuffer(text, &s, &n))
1846                 return NULL;
1847         }
1848     }
1849     f->f_softspace = 0;
1850     FILE_BEGIN_ALLOW_THREADS(f)
1851     errno = 0;
1852     n2 = fwrite(s, 1, n, f->f_fp);
1853     if (n2 != n || ferror(f->f_fp)) {
1854         err_flag = 1;
1855         err = errno;
1856     }
1857     FILE_END_ALLOW_THREADS(f)
1858     Py_XDECREF(encoded);
1859     if (f->f_binary)
1860         PyBuffer_Release(&pbuf);
1861     if (err_flag) {
1862         errno = err;
1863         PyErr_SetFromErrno(PyExc_IOError);
1864         clearerr(f->f_fp);
1865         return NULL;
1866     }
1867     Py_INCREF(Py_None);
1868     return Py_None;
1869 }
1870 
1871 static PyObject *
file_writelines(PyFileObject * f,PyObject * seq)1872 file_writelines(PyFileObject *f, PyObject *seq)
1873 {
1874 #define CHUNKSIZE 1000
1875     PyObject *list, *line;
1876     PyObject *it;       /* iter(seq) */
1877     PyObject *result;
1878     int index, islist;
1879     Py_ssize_t i, j, nwritten, len;
1880 
1881     assert(seq != NULL);
1882     if (f->f_fp == NULL)
1883         return err_closed();
1884     if (!f->writable)
1885         return err_mode("writing");
1886 
1887     result = NULL;
1888     list = NULL;
1889     islist = PyList_Check(seq);
1890     if  (islist)
1891         it = NULL;
1892     else {
1893         it = PyObject_GetIter(seq);
1894         if (it == NULL) {
1895             PyErr_SetString(PyExc_TypeError,
1896                 "writelines() requires an iterable argument");
1897             return NULL;
1898         }
1899         /* From here on, fail by going to error, to reclaim "it". */
1900         list = PyList_New(CHUNKSIZE);
1901         if (list == NULL)
1902             goto error;
1903     }
1904 
1905     /* Strategy: slurp CHUNKSIZE lines into a private list,
1906        checking that they are all strings, then write that list
1907        without holding the interpreter lock, then come back for more. */
1908     for (index = 0; ; index += CHUNKSIZE) {
1909         if (islist) {
1910             Py_XDECREF(list);
1911             list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1912             if (list == NULL)
1913                 goto error;
1914             j = PyList_GET_SIZE(list);
1915         }
1916         else {
1917             for (j = 0; j < CHUNKSIZE; j++) {
1918                 line = PyIter_Next(it);
1919                 if (line == NULL) {
1920                     if (PyErr_Occurred())
1921                         goto error;
1922                     break;
1923                 }
1924                 PyList_SetItem(list, j, line);
1925             }
1926             /* The iterator might have closed the file on us. */
1927             if (f->f_fp == NULL) {
1928                 err_closed();
1929                 goto error;
1930             }
1931         }
1932         if (j == 0)
1933             break;
1934 
1935         /* Check that all entries are indeed strings. If not,
1936            apply the same rules as for file.write() and
1937            convert the results to strings. This is slow, but
1938            seems to be the only way since all conversion APIs
1939            could potentially execute Python code. */
1940         for (i = 0; i < j; i++) {
1941             PyObject *v = PyList_GET_ITEM(list, i);
1942             if (!PyString_Check(v)) {
1943                 const char *buffer;
1944                 int res;
1945                 if (f->f_binary) {
1946                     res = PyObject_AsReadBuffer(v, (const void**)&buffer, &len);
1947                 } else {
1948                     res = PyObject_AsCharBuffer(v, &buffer, &len);
1949                 }
1950                 if (res) {
1951                     PyErr_SetString(PyExc_TypeError,
1952             "writelines() argument must be a sequence of strings");
1953                             goto error;
1954                 }
1955                 line = PyString_FromStringAndSize(buffer,
1956                                                   len);
1957                 if (line == NULL)
1958                     goto error;
1959                 Py_DECREF(v);
1960                 PyList_SET_ITEM(list, i, line);
1961             }
1962         }
1963 
1964         /* Since we are releasing the global lock, the
1965            following code may *not* execute Python code. */
1966         f->f_softspace = 0;
1967         FILE_BEGIN_ALLOW_THREADS(f)
1968         errno = 0;
1969         for (i = 0; i < j; i++) {
1970             line = PyList_GET_ITEM(list, i);
1971             len = PyString_GET_SIZE(line);
1972             nwritten = fwrite(PyString_AS_STRING(line),
1973                               1, len, f->f_fp);
1974             if (nwritten != len) {
1975                 FILE_ABORT_ALLOW_THREADS(f)
1976                 PyErr_SetFromErrno(PyExc_IOError);
1977                 clearerr(f->f_fp);
1978                 goto error;
1979             }
1980         }
1981         FILE_END_ALLOW_THREADS(f)
1982 
1983         if (j < CHUNKSIZE)
1984             break;
1985     }
1986 
1987     Py_INCREF(Py_None);
1988     result = Py_None;
1989   error:
1990     Py_XDECREF(list);
1991     Py_XDECREF(it);
1992     return result;
1993 #undef CHUNKSIZE
1994 }
1995 
1996 static PyObject *
file_self(PyFileObject * f)1997 file_self(PyFileObject *f)
1998 {
1999     if (f->f_fp == NULL)
2000         return err_closed();
2001     Py_INCREF(f);
2002     return (PyObject *)f;
2003 }
2004 
2005 static PyObject *
file_xreadlines(PyFileObject * f)2006 file_xreadlines(PyFileObject *f)
2007 {
2008     if (PyErr_WarnPy3k("f.xreadlines() not supported in 3.x, "
2009                        "try 'for line in f' instead", 1) < 0)
2010            return NULL;
2011     return file_self(f);
2012 }
2013 
2014 static PyObject *
file_exit(PyObject * f,PyObject * args)2015 file_exit(PyObject *f, PyObject *args)
2016 {
2017     PyObject *ret = PyObject_CallMethod(f, "close", NULL);
2018     if (!ret)
2019         /* If error occurred, pass through */
2020         return NULL;
2021     Py_DECREF(ret);
2022     /* We cannot return the result of close since a true
2023      * value will be interpreted as "yes, swallow the
2024      * exception if one was raised inside the with block". */
2025     Py_RETURN_NONE;
2026 }
2027 
2028 PyDoc_STRVAR(readline_doc,
2029 "readline([size]) -> next line from the file, as a string.\n"
2030 "\n"
2031 "Retain newline.  A non-negative size argument limits the maximum\n"
2032 "number of bytes to return (an incomplete line may be returned then).\n"
2033 "Return an empty string at EOF.");
2034 
2035 PyDoc_STRVAR(read_doc,
2036 "read([size]) -> read at most size bytes, returned as a string.\n"
2037 "\n"
2038 "If the size argument is negative or omitted, read until EOF is reached.\n"
2039 "Notice that when in non-blocking mode, less data than what was requested\n"
2040 "may be returned, even if no size parameter was given.");
2041 
2042 PyDoc_STRVAR(write_doc,
2043 "write(str) -> None.  Write string str to file.\n"
2044 "\n"
2045 "Note that due to buffering, flush() or close() may be needed before\n"
2046 "the file on disk reflects the data written.");
2047 
2048 PyDoc_STRVAR(fileno_doc,
2049 "fileno() -> integer \"file descriptor\".\n"
2050 "\n"
2051 "This is needed for lower-level file interfaces, such os.read().");
2052 
2053 PyDoc_STRVAR(seek_doc,
2054 "seek(offset[, whence]) -> None.  Move to new file position.\n"
2055 "\n"
2056 "Argument offset is a byte count.  Optional argument whence defaults to\n"
2057 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
2058 "(move relative to current position, positive or negative), and 2 (move\n"
2059 "relative to end of file, usually negative, although many platforms allow\n"
2060 "seeking beyond the end of a file).  If the file is opened in text mode,\n"
2061 "only offsets returned by tell() are legal.  Use of other offsets causes\n"
2062 "undefined behavior."
2063 "\n"
2064 "Note that not all file objects are seekable.");
2065 
2066 #ifdef HAVE_FTRUNCATE
2067 PyDoc_STRVAR(truncate_doc,
2068 "truncate([size]) -> None.  Truncate the file to at most size bytes.\n"
2069 "\n"
2070 "Size defaults to the current file position, as returned by tell().");
2071 #endif
2072 
2073 PyDoc_STRVAR(tell_doc,
2074 "tell() -> current file position, an integer (may be a long integer).");
2075 
2076 PyDoc_STRVAR(readinto_doc,
2077 "readinto() -> Undocumented.  Don't use this; it may go away.");
2078 
2079 PyDoc_STRVAR(readlines_doc,
2080 "readlines([size]) -> list of strings, each a line from the file.\n"
2081 "\n"
2082 "Call readline() repeatedly and return a list of the lines so read.\n"
2083 "The optional size argument, if given, is an approximate bound on the\n"
2084 "total number of bytes in the lines returned.");
2085 
2086 PyDoc_STRVAR(xreadlines_doc,
2087 "xreadlines() -> returns self.\n"
2088 "\n"
2089 "For backward compatibility. File objects now include the performance\n"
2090 "optimizations previously implemented in the xreadlines module.");
2091 
2092 PyDoc_STRVAR(writelines_doc,
2093 "writelines(sequence_of_strings) -> None.  Write the strings to the file.\n"
2094 "\n"
2095 "Note that newlines are not added.  The sequence can be any iterable object\n"
2096 "producing strings. This is equivalent to calling write() for each string.");
2097 
2098 PyDoc_STRVAR(flush_doc,
2099 "flush() -> None.  Flush the internal I/O buffer.");
2100 
2101 PyDoc_STRVAR(close_doc,
2102 "close() -> None or (perhaps) an integer.  Close the file.\n"
2103 "\n"
2104 "Sets data attribute .closed to True.  A closed file cannot be used for\n"
2105 "further I/O operations.  close() may be called more than once without\n"
2106 "error.  Some kinds of file objects (for example, opened by popen())\n"
2107 "may return an exit status upon closing.");
2108 
2109 PyDoc_STRVAR(isatty_doc,
2110 "isatty() -> true or false.  True if the file is connected to a tty device.");
2111 
2112 PyDoc_STRVAR(enter_doc,
2113              "__enter__() -> self.");
2114 
2115 PyDoc_STRVAR(exit_doc,
2116              "__exit__(*excinfo) -> None.  Closes the file.");
2117 
2118 static PyMethodDef file_methods[] = {
2119     {"readline",  (PyCFunction)file_readline, METH_VARARGS, readline_doc},
2120     {"read",      (PyCFunction)file_read,     METH_VARARGS, read_doc},
2121     {"write",     (PyCFunction)file_write,    METH_VARARGS, write_doc},
2122     {"fileno",    (PyCFunction)file_fileno,   METH_NOARGS,  fileno_doc},
2123     {"seek",      (PyCFunction)file_seek,     METH_VARARGS, seek_doc},
2124 #ifdef HAVE_FTRUNCATE
2125     {"truncate",  (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
2126 #endif
2127     {"tell",      (PyCFunction)file_tell,     METH_NOARGS,  tell_doc},
2128     {"readinto",  (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
2129     {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc},
2130     {"xreadlines",(PyCFunction)file_xreadlines, METH_NOARGS, xreadlines_doc},
2131     {"writelines",(PyCFunction)file_writelines, METH_O,     writelines_doc},
2132     {"flush",     (PyCFunction)file_flush,    METH_NOARGS,  flush_doc},
2133     {"close",     (PyCFunction)file_close,    METH_NOARGS,  close_doc},
2134     {"isatty",    (PyCFunction)file_isatty,   METH_NOARGS,  isatty_doc},
2135     {"__enter__", (PyCFunction)file_self,     METH_NOARGS,  enter_doc},
2136     {"__exit__",  (PyCFunction)file_exit,     METH_VARARGS, exit_doc},
2137     {NULL,            NULL}             /* sentinel */
2138 };
2139 
2140 #define OFF(x) offsetof(PyFileObject, x)
2141 
2142 static PyMemberDef file_memberlist[] = {
2143     {"mode",            T_OBJECT,       OFF(f_mode),    RO,
2144      "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
2145     {"name",            T_OBJECT,       OFF(f_name),    RO,
2146      "file name"},
2147     {"encoding",        T_OBJECT,       OFF(f_encoding),        RO,
2148      "file encoding"},
2149     {"errors",          T_OBJECT,       OFF(f_errors),  RO,
2150      "Unicode error handler"},
2151     /* getattr(f, "closed") is implemented without this table */
2152     {NULL}      /* Sentinel */
2153 };
2154 
2155 static PyObject *
get_closed(PyFileObject * f,void * closure)2156 get_closed(PyFileObject *f, void *closure)
2157 {
2158     return PyBool_FromLong((long)(f->f_fp == 0));
2159 }
2160 static PyObject *
get_newlines(PyFileObject * f,void * closure)2161 get_newlines(PyFileObject *f, void *closure)
2162 {
2163     switch (f->f_newlinetypes) {
2164     case NEWLINE_UNKNOWN:
2165         Py_INCREF(Py_None);
2166         return Py_None;
2167     case NEWLINE_CR:
2168         return PyString_FromString("\r");
2169     case NEWLINE_LF:
2170         return PyString_FromString("\n");
2171     case NEWLINE_CR|NEWLINE_LF:
2172         return Py_BuildValue("(ss)", "\r", "\n");
2173     case NEWLINE_CRLF:
2174         return PyString_FromString("\r\n");
2175     case NEWLINE_CR|NEWLINE_CRLF:
2176         return Py_BuildValue("(ss)", "\r", "\r\n");
2177     case NEWLINE_LF|NEWLINE_CRLF:
2178         return Py_BuildValue("(ss)", "\n", "\r\n");
2179     case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
2180         return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
2181     default:
2182         PyErr_Format(PyExc_SystemError,
2183                      "Unknown newlines value 0x%x\n",
2184                      f->f_newlinetypes);
2185         return NULL;
2186     }
2187 }
2188 
2189 static PyObject *
get_softspace(PyFileObject * f,void * closure)2190 get_softspace(PyFileObject *f, void *closure)
2191 {
2192     if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2193         return NULL;
2194     return PyInt_FromLong(f->f_softspace);
2195 }
2196 
2197 static int
set_softspace(PyFileObject * f,PyObject * value)2198 set_softspace(PyFileObject *f, PyObject *value)
2199 {
2200     int new;
2201     if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2202         return -1;
2203 
2204     if (value == NULL) {
2205         PyErr_SetString(PyExc_TypeError,
2206                         "can't delete softspace attribute");
2207         return -1;
2208     }
2209 
2210     new = PyInt_AsLong(value);
2211     if (new == -1 && PyErr_Occurred())
2212         return -1;
2213     f->f_softspace = new;
2214     return 0;
2215 }
2216 
2217 static PyGetSetDef file_getsetlist[] = {
2218     {"closed", (getter)get_closed, NULL, "True if the file is closed"},
2219     {"newlines", (getter)get_newlines, NULL,
2220      "end-of-line convention used in this file"},
2221     {"softspace", (getter)get_softspace, (setter)set_softspace,
2222      "flag indicating that a space needs to be printed; used by print"},
2223     {0},
2224 };
2225 
2226 static void
drop_readahead(PyFileObject * f)2227 drop_readahead(PyFileObject *f)
2228 {
2229     if (f->f_buf != NULL) {
2230         PyMem_Free(f->f_buf);
2231         f->f_buf = NULL;
2232     }
2233 }
2234 
2235 /* Make sure that file has a readahead buffer with at least one byte
2236    (unless at EOF) and no more than bufsize.  Returns negative value on
2237    error, will set MemoryError if bufsize bytes cannot be allocated. */
2238 static int
readahead(PyFileObject * f,Py_ssize_t bufsize)2239 readahead(PyFileObject *f, Py_ssize_t bufsize)
2240 {
2241     Py_ssize_t chunksize;
2242 
2243     if (f->f_buf != NULL) {
2244         if( (f->f_bufend - f->f_bufptr) >= 1)
2245             return 0;
2246         else
2247             drop_readahead(f);
2248     }
2249     if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) {
2250         PyErr_NoMemory();
2251         return -1;
2252     }
2253     FILE_BEGIN_ALLOW_THREADS(f)
2254     errno = 0;
2255     chunksize = Py_UniversalNewlineFread(
2256         f->f_buf, bufsize, f->f_fp, (PyObject *)f);
2257     FILE_END_ALLOW_THREADS(f)
2258     if (chunksize == 0) {
2259         if (ferror(f->f_fp)) {
2260             PyErr_SetFromErrno(PyExc_IOError);
2261             clearerr(f->f_fp);
2262             drop_readahead(f);
2263             return -1;
2264         }
2265     }
2266     f->f_bufptr = f->f_buf;
2267     f->f_bufend = f->f_buf + chunksize;
2268     return 0;
2269 }
2270 
2271 /* Used by file_iternext.  The returned string will start with 'skip'
2272    uninitialized bytes followed by the remainder of the line. Don't be
2273    horrified by the recursive call: maximum recursion depth is limited by
2274    logarithmic buffer growth to about 50 even when reading a 1gb line. */
2275 
2276 static PyStringObject *
readahead_get_line_skip(PyFileObject * f,Py_ssize_t skip,Py_ssize_t bufsize)2277 readahead_get_line_skip(PyFileObject *f, Py_ssize_t skip, Py_ssize_t bufsize)
2278 {
2279     PyStringObject* s;
2280     char *bufptr;
2281     char *buf;
2282     Py_ssize_t len;
2283 
2284     if (f->f_buf == NULL)
2285         if (readahead(f, bufsize) < 0)
2286             return NULL;
2287 
2288     len = f->f_bufend - f->f_bufptr;
2289     if (len == 0)
2290         return (PyStringObject *)
2291             PyString_FromStringAndSize(NULL, skip);
2292     bufptr = (char *)memchr(f->f_bufptr, '\n', len);
2293     if (bufptr != NULL) {
2294         bufptr++;                               /* Count the '\n' */
2295         len = bufptr - f->f_bufptr;
2296         s = (PyStringObject *)
2297             PyString_FromStringAndSize(NULL, skip + len);
2298         if (s == NULL)
2299             return NULL;
2300         memcpy(PyString_AS_STRING(s) + skip, f->f_bufptr, len);
2301         f->f_bufptr = bufptr;
2302         if (bufptr == f->f_bufend)
2303             drop_readahead(f);
2304     } else {
2305         bufptr = f->f_bufptr;
2306         buf = f->f_buf;
2307         f->f_buf = NULL;                /* Force new readahead buffer */
2308         assert(len <= PY_SSIZE_T_MAX - skip);
2309         s = readahead_get_line_skip(f, skip + len, bufsize + (bufsize>>2));
2310         if (s == NULL) {
2311             PyMem_Free(buf);
2312             return NULL;
2313         }
2314         memcpy(PyString_AS_STRING(s) + skip, bufptr, len);
2315         PyMem_Free(buf);
2316     }
2317     return s;
2318 }
2319 
2320 /* A larger buffer size may actually decrease performance. */
2321 #define READAHEAD_BUFSIZE 8192
2322 
2323 static PyObject *
file_iternext(PyFileObject * f)2324 file_iternext(PyFileObject *f)
2325 {
2326     PyStringObject* l;
2327 
2328     if (f->f_fp == NULL)
2329         return err_closed();
2330     if (!f->readable)
2331         return err_mode("reading");
2332 
2333     l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
2334     if (l == NULL || PyString_GET_SIZE(l) == 0) {
2335         Py_XDECREF(l);
2336         return NULL;
2337     }
2338     return (PyObject *)l;
2339 }
2340 
2341 
2342 static PyObject *
file_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2343 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2344 {
2345     PyObject *self;
2346     static PyObject *not_yet_string;
2347 
2348     assert(type != NULL && type->tp_alloc != NULL);
2349 
2350     if (not_yet_string == NULL) {
2351         not_yet_string = PyString_InternFromString("<uninitialized file>");
2352         if (not_yet_string == NULL)
2353             return NULL;
2354     }
2355 
2356     self = type->tp_alloc(type, 0);
2357     if (self != NULL) {
2358         /* Always fill in the name and mode, so that nobody else
2359            needs to special-case NULLs there. */
2360         Py_INCREF(not_yet_string);
2361         ((PyFileObject *)self)->f_name = not_yet_string;
2362         Py_INCREF(not_yet_string);
2363         ((PyFileObject *)self)->f_mode = not_yet_string;
2364         Py_INCREF(Py_None);
2365         ((PyFileObject *)self)->f_encoding = Py_None;
2366         Py_INCREF(Py_None);
2367         ((PyFileObject *)self)->f_errors = Py_None;
2368         ((PyFileObject *)self)->weakreflist = NULL;
2369         ((PyFileObject *)self)->unlocked_count = 0;
2370     }
2371     return self;
2372 }
2373 
2374 static int
file_init(PyObject * self,PyObject * args,PyObject * kwds)2375 file_init(PyObject *self, PyObject *args, PyObject *kwds)
2376 {
2377     PyFileObject *foself = (PyFileObject *)self;
2378     int ret = 0;
2379     static char *kwlist[] = {"name", "mode", "buffering", 0};
2380     char *name = NULL;
2381     char *mode = "r";
2382     int bufsize = -1;
2383     int wideargument = 0;
2384 #ifdef MS_WINDOWS
2385     PyObject *po;
2386 #endif
2387 
2388     assert(PyFile_Check(self));
2389     if (foself->f_fp != NULL) {
2390         /* Have to close the existing file first. */
2391         PyObject *closeresult = file_close(foself);
2392         if (closeresult == NULL)
2393             return -1;
2394         Py_DECREF(closeresult);
2395     }
2396 
2397 #ifdef MS_WINDOWS
2398     if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
2399                                     kwlist, &po, &mode, &bufsize)) {
2400         wideargument = 1;
2401         if (fill_file_fields(foself, NULL, po, mode,
2402                              fclose) == NULL)
2403             goto Error;
2404     } else {
2405         /* Drop the argument parsing error as narrow
2406            strings are also valid. */
2407         PyErr_Clear();
2408     }
2409 #endif
2410 
2411     if (!wideargument) {
2412         PyObject *o_name;
2413 
2414         if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
2415                                          Py_FileSystemDefaultEncoding,
2416                                          &name,
2417                                          &mode, &bufsize))
2418             return -1;
2419 
2420         /* We parse again to get the name as a PyObject */
2421         if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
2422                                          kwlist, &o_name, &mode,
2423                                          &bufsize))
2424             goto Error;
2425 
2426         if (fill_file_fields(foself, NULL, o_name, mode,
2427                              fclose) == NULL)
2428             goto Error;
2429     }
2430     if (open_the_file(foself, name, mode) == NULL)
2431         goto Error;
2432     foself->f_setbuf = NULL;
2433     PyFile_SetBufSize(self, bufsize);
2434     goto Done;
2435 
2436 Error:
2437     ret = -1;
2438     /* fall through */
2439 Done:
2440     PyMem_Free(name); /* free the encoded string */
2441     return ret;
2442 }
2443 
2444 PyDoc_VAR(file_doc) =
2445 PyDoc_STR(
2446 "file(name[, mode[, buffering]]) -> file object\n"
2447 "\n"
2448 "Open a file.  The mode can be 'r', 'w' or 'a' for reading (default),\n"
2449 "writing or appending.  The file will be created if it doesn't exist\n"
2450 "when opened for writing or appending; it will be truncated when\n"
2451 "opened for writing.  Add a 'b' to the mode for binary files.\n"
2452 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
2453 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2454 "buffered, and larger numbers specify the buffer size.  The preferred way\n"
2455 "to open a file is with the builtin open() function.\n"
2456 )
2457 PyDoc_STR(
2458 "Add a 'U' to mode to open the file for input with universal newline\n"
2459 "support.  Any line ending in the input file will be seen as a '\\n'\n"
2460 "in Python.  Also, a file so opened gains the attribute 'newlines';\n"
2461 "the value for this attribute is one of None (no newline read yet),\n"
2462 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2463 "\n"
2464 "'U' cannot be combined with 'w' or '+' mode.\n"
2465 );
2466 
2467 PyTypeObject PyFile_Type = {
2468     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2469     "file",
2470     sizeof(PyFileObject),
2471     0,
2472     (destructor)file_dealloc,                   /* tp_dealloc */
2473     0,                                          /* tp_print */
2474     0,                                          /* tp_getattr */
2475     0,                                          /* tp_setattr */
2476     0,                                          /* tp_compare */
2477     (reprfunc)file_repr,                        /* tp_repr */
2478     0,                                          /* tp_as_number */
2479     0,                                          /* tp_as_sequence */
2480     0,                                          /* tp_as_mapping */
2481     0,                                          /* tp_hash */
2482     0,                                          /* tp_call */
2483     0,                                          /* tp_str */
2484     PyObject_GenericGetAttr,                    /* tp_getattro */
2485     /* softspace is writable:  we must supply tp_setattro */
2486     PyObject_GenericSetAttr,                    /* tp_setattro */
2487     0,                                          /* tp_as_buffer */
2488     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2489     file_doc,                                   /* tp_doc */
2490     0,                                          /* tp_traverse */
2491     0,                                          /* tp_clear */
2492     0,                                          /* tp_richcompare */
2493     offsetof(PyFileObject, weakreflist),        /* tp_weaklistoffset */
2494     (getiterfunc)file_self,                     /* tp_iter */
2495     (iternextfunc)file_iternext,                /* tp_iternext */
2496     file_methods,                               /* tp_methods */
2497     file_memberlist,                            /* tp_members */
2498     file_getsetlist,                            /* tp_getset */
2499     0,                                          /* tp_base */
2500     0,                                          /* tp_dict */
2501     0,                                          /* tp_descr_get */
2502     0,                                          /* tp_descr_set */
2503     0,                                          /* tp_dictoffset */
2504     file_init,                                  /* tp_init */
2505     PyType_GenericAlloc,                        /* tp_alloc */
2506     file_new,                                   /* tp_new */
2507     PyObject_Del,                           /* tp_free */
2508 };
2509 
2510 /* Interface for the 'soft space' between print items. */
2511 
2512 int
PyFile_SoftSpace(PyObject * f,int newflag)2513 PyFile_SoftSpace(PyObject *f, int newflag)
2514 {
2515     long oldflag = 0;
2516     if (f == NULL) {
2517         /* Do nothing */
2518     }
2519     else if (PyFile_Check(f)) {
2520         oldflag = ((PyFileObject *)f)->f_softspace;
2521         ((PyFileObject *)f)->f_softspace = newflag;
2522     }
2523     else {
2524         PyObject *v;
2525         v = PyObject_GetAttrString(f, "softspace");
2526         if (v == NULL)
2527             PyErr_Clear();
2528         else {
2529             if (PyInt_Check(v))
2530                 oldflag = PyInt_AsLong(v);
2531             assert(oldflag < INT_MAX);
2532             Py_DECREF(v);
2533         }
2534         v = PyInt_FromLong((long)newflag);
2535         if (v == NULL)
2536             PyErr_Clear();
2537         else {
2538             if (PyObject_SetAttrString(f, "softspace", v) != 0)
2539                 PyErr_Clear();
2540             Py_DECREF(v);
2541         }
2542     }
2543     return (int)oldflag;
2544 }
2545 
2546 /* Interfaces to write objects/strings to file-like objects */
2547 
2548 int
PyFile_WriteObject(PyObject * v,PyObject * f,int flags)2549 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2550 {
2551     PyObject *writer, *value, *args, *result;
2552     if (f == NULL) {
2553         PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2554         return -1;
2555     }
2556     else if (PyFile_Check(f)) {
2557         PyFileObject *fobj = (PyFileObject *) f;
2558 #ifdef Py_USING_UNICODE
2559         PyObject *enc = fobj->f_encoding;
2560         int result;
2561 #endif
2562         if (fobj->f_fp == NULL) {
2563             err_closed();
2564             return -1;
2565         }
2566 #ifdef Py_USING_UNICODE
2567         if ((flags & Py_PRINT_RAW) &&
2568             PyUnicode_Check(v) && enc != Py_None) {
2569             char *cenc = PyString_AS_STRING(enc);
2570             char *errors = fobj->f_errors == Py_None ?
2571               "strict" : PyString_AS_STRING(fobj->f_errors);
2572             value = PyUnicode_AsEncodedString(v, cenc, errors);
2573             if (value == NULL)
2574                 return -1;
2575         } else {
2576             value = v;
2577             Py_INCREF(value);
2578         }
2579         result = file_PyObject_Print(value, fobj, flags);
2580         Py_DECREF(value);
2581         return result;
2582 #else
2583         return file_PyObject_Print(v, fobj, flags);
2584 #endif
2585     }
2586     writer = PyObject_GetAttrString(f, "write");
2587     if (writer == NULL)
2588         return -1;
2589     if (flags & Py_PRINT_RAW) {
2590         if (PyUnicode_Check(v)) {
2591             value = v;
2592             Py_INCREF(value);
2593         } else
2594             value = PyObject_Str(v);
2595     }
2596     else
2597         value = PyObject_Repr(v);
2598     if (value == NULL) {
2599         Py_DECREF(writer);
2600         return -1;
2601     }
2602     args = PyTuple_Pack(1, value);
2603     if (args == NULL) {
2604         Py_DECREF(value);
2605         Py_DECREF(writer);
2606         return -1;
2607     }
2608     result = PyEval_CallObject(writer, args);
2609     Py_DECREF(args);
2610     Py_DECREF(value);
2611     Py_DECREF(writer);
2612     if (result == NULL)
2613         return -1;
2614     Py_DECREF(result);
2615     return 0;
2616 }
2617 
2618 int
PyFile_WriteString(const char * s,PyObject * f)2619 PyFile_WriteString(const char *s, PyObject *f)
2620 {
2621 
2622     if (f == NULL) {
2623         /* Should be caused by a pre-existing error */
2624         if (!PyErr_Occurred())
2625             PyErr_SetString(PyExc_SystemError,
2626                             "null file for PyFile_WriteString");
2627         return -1;
2628     }
2629     else if (PyFile_Check(f)) {
2630         PyFileObject *fobj = (PyFileObject *) f;
2631         FILE *fp = PyFile_AsFile(f);
2632         if (fp == NULL) {
2633             err_closed();
2634             return -1;
2635         }
2636         FILE_BEGIN_ALLOW_THREADS(fobj)
2637         fputs(s, fp);
2638         FILE_END_ALLOW_THREADS(fobj)
2639         return 0;
2640     }
2641     else if (!PyErr_Occurred()) {
2642         PyObject *v = PyString_FromString(s);
2643         int err;
2644         if (v == NULL)
2645             return -1;
2646         err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2647         Py_DECREF(v);
2648         return err;
2649     }
2650     else
2651         return -1;
2652 }
2653 
2654 /* Try to get a file-descriptor from a Python object.  If the object
2655    is an integer or long integer, its value is returned.  If not, the
2656    object's fileno() method is called if it exists; the method must return
2657    an integer or long integer, which is returned as the file descriptor value.
2658    -1 is returned on failure.
2659 */
2660 
PyObject_AsFileDescriptor(PyObject * o)2661 int PyObject_AsFileDescriptor(PyObject *o)
2662 {
2663     int fd;
2664     PyObject *meth;
2665 
2666     if (PyInt_Check(o)) {
2667         fd = _PyInt_AsInt(o);
2668     }
2669     else if (PyLong_Check(o)) {
2670         fd = _PyLong_AsInt(o);
2671     }
2672     else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2673     {
2674         PyObject *fno = PyEval_CallObject(meth, NULL);
2675         Py_DECREF(meth);
2676         if (fno == NULL)
2677             return -1;
2678 
2679         if (PyInt_Check(fno)) {
2680             fd = _PyInt_AsInt(fno);
2681             Py_DECREF(fno);
2682         }
2683         else if (PyLong_Check(fno)) {
2684             fd = _PyLong_AsInt(fno);
2685             Py_DECREF(fno);
2686         }
2687         else {
2688             PyErr_SetString(PyExc_TypeError,
2689                             "fileno() returned a non-integer");
2690             Py_DECREF(fno);
2691             return -1;
2692         }
2693     }
2694     else {
2695         PyErr_SetString(PyExc_TypeError,
2696                         "argument must be an int, or have a fileno() method.");
2697         return -1;
2698     }
2699 
2700     if (fd < 0) {
2701         PyErr_Format(PyExc_ValueError,
2702                      "file descriptor cannot be a negative integer (%i)",
2703                      fd);
2704         return -1;
2705     }
2706     return fd;
2707 }
2708 
2709 /* From here on we need access to the real fgets and fread */
2710 #undef fgets
2711 #undef fread
2712 
2713 /*
2714 ** Py_UniversalNewlineFgets is an fgets variation that understands
2715 ** all of \r, \n and \r\n conventions.
2716 ** The stream should be opened in binary mode.
2717 ** If fobj is NULL the routine always does newline conversion, and
2718 ** it may peek one char ahead to gobble the second char in \r\n.
2719 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2720 ** is no readahead but in stead a flag is used to skip a following
2721 ** \n on the next read. Also, if the file is open in binary mode
2722 ** the whole conversion is skipped. Finally, the routine keeps track of
2723 ** the different types of newlines seen.
2724 ** Note that we need no error handling: fgets() treats error and eof
2725 ** identically.
2726 */
2727 char *
Py_UniversalNewlineFgets(char * buf,int n,FILE * stream,PyObject * fobj)2728 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2729 {
2730     char *p = buf;
2731     int c;
2732     int newlinetypes = 0;
2733     int skipnextlf = 0;
2734     int univ_newline = 1;
2735 
2736     if (fobj) {
2737         if (!PyFile_Check(fobj)) {
2738             errno = ENXIO;              /* What can you do... */
2739             return NULL;
2740         }
2741         univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2742         if ( !univ_newline )
2743             return fgets(buf, n, stream);
2744         newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2745         skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2746     }
2747     FLOCKFILE(stream);
2748     c = 'x'; /* Shut up gcc warning */
2749     while (--n > 0 && (c = GETC(stream)) != EOF ) {
2750         if (skipnextlf ) {
2751             skipnextlf = 0;
2752             if (c == '\n') {
2753                 /* Seeing a \n here with skipnextlf true
2754                 ** means we saw a \r before.
2755                 */
2756                 newlinetypes |= NEWLINE_CRLF;
2757                 c = GETC(stream);
2758                 if (c == EOF) break;
2759             } else {
2760                 /*
2761                 ** Note that c == EOF also brings us here,
2762                 ** so we're okay if the last char in the file
2763                 ** is a CR.
2764                 */
2765                 newlinetypes |= NEWLINE_CR;
2766             }
2767         }
2768         if (c == '\r') {
2769             /* A \r is translated into a \n, and we skip
2770             ** an adjacent \n, if any. We don't set the
2771             ** newlinetypes flag until we've seen the next char.
2772             */
2773             skipnextlf = 1;
2774             c = '\n';
2775         } else if ( c == '\n') {
2776             newlinetypes |= NEWLINE_LF;
2777         }
2778         *p++ = c;
2779         if (c == '\n') break;
2780     }
2781     if ( c == EOF && skipnextlf )
2782         newlinetypes |= NEWLINE_CR;
2783     FUNLOCKFILE(stream);
2784     *p = '\0';
2785     if (fobj) {
2786         ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2787         ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2788     } else if ( skipnextlf ) {
2789         /* If we have no file object we cannot save the
2790         ** skipnextlf flag. We have to readahead, which
2791         ** will cause a pause if we're reading from an
2792         ** interactive stream, but that is very unlikely
2793         ** unless we're doing something silly like
2794         ** execfile("/dev/tty").
2795         */
2796         c = GETC(stream);
2797         if ( c != '\n' )
2798             ungetc(c, stream);
2799     }
2800     if (p == buf)
2801         return NULL;
2802     return buf;
2803 }
2804 
2805 /*
2806 ** Py_UniversalNewlineFread is an fread variation that understands
2807 ** all of \r, \n and \r\n conventions.
2808 ** The stream should be opened in binary mode.
2809 ** fobj must be a PyFileObject. In this case there
2810 ** is no readahead but in stead a flag is used to skip a following
2811 ** \n on the next read. Also, if the file is open in binary mode
2812 ** the whole conversion is skipped. Finally, the routine keeps track of
2813 ** the different types of newlines seen.
2814 */
2815 size_t
Py_UniversalNewlineFread(char * buf,size_t n,FILE * stream,PyObject * fobj)2816 Py_UniversalNewlineFread(char *buf, size_t n,
2817                          FILE *stream, PyObject *fobj)
2818 {
2819     char *dst = buf;
2820     PyFileObject *f = (PyFileObject *)fobj;
2821     int newlinetypes, skipnextlf;
2822 
2823     assert(buf != NULL);
2824     assert(stream != NULL);
2825 
2826     if (!fobj || !PyFile_Check(fobj)) {
2827         errno = ENXIO;          /* What can you do... */
2828         return 0;
2829     }
2830     if (!f->f_univ_newline)
2831         return fread(buf, 1, n, stream);
2832     newlinetypes = f->f_newlinetypes;
2833     skipnextlf = f->f_skipnextlf;
2834     /* Invariant:  n is the number of bytes remaining to be filled
2835      * in the buffer.
2836      */
2837     while (n) {
2838         size_t nread;
2839         int shortread;
2840         char *src = dst;
2841 
2842         nread = fread(dst, 1, n, stream);
2843         assert(nread <= n);
2844         if (nread == 0)
2845             break;
2846 
2847         n -= nread; /* assuming 1 byte out for each in; will adjust */
2848         shortread = n != 0;             /* true iff EOF or error */
2849         while (nread--) {
2850             char c = *src++;
2851             if (c == '\r') {
2852                 /* Save as LF and set flag to skip next LF. */
2853                 *dst++ = '\n';
2854                 skipnextlf = 1;
2855             }
2856             else if (skipnextlf && c == '\n') {
2857                 /* Skip LF, and remember we saw CR LF. */
2858                 skipnextlf = 0;
2859                 newlinetypes |= NEWLINE_CRLF;
2860                 ++n;
2861             }
2862             else {
2863                 /* Normal char to be stored in buffer.  Also
2864                  * update the newlinetypes flag if either this
2865                  * is an LF or the previous char was a CR.
2866                  */
2867                 if (c == '\n')
2868                     newlinetypes |= NEWLINE_LF;
2869                 else if (skipnextlf)
2870                     newlinetypes |= NEWLINE_CR;
2871                 *dst++ = c;
2872                 skipnextlf = 0;
2873             }
2874         }
2875         if (shortread) {
2876             /* If this is EOF, update type flags. */
2877             if (skipnextlf && feof(stream))
2878                 newlinetypes |= NEWLINE_CR;
2879             break;
2880         }
2881     }
2882     f->f_newlinetypes = newlinetypes;
2883     f->f_skipnextlf = skipnextlf;
2884     return dst - buf;
2885 }
2886 
2887 #ifdef __cplusplus
2888 }
2889 #endif
2890