• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* File object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
6 
7 #ifdef HAVE_SYS_TYPES_H
8 #include <sys/types.h>
9 #endif /* HAVE_SYS_TYPES_H */
10 
11 #ifdef MS_WINDOWS
12 #define fileno _fileno
13 /* can simulate truncate with Win32 API functions; see file_truncate */
14 #define HAVE_FTRUNCATE
15 #define WIN32_LEAN_AND_MEAN
16 #include <windows.h>
17 #endif
18 
19 #if defined(PYOS_OS2) && defined(PYCC_GCC)
20 #include <io.h>
21 #endif
22 
23 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
24 
25 #ifdef HAVE_ERRNO_H
26 #include <errno.h>
27 #endif
28 
29 #ifdef HAVE_GETC_UNLOCKED
30 #define GETC(f) getc_unlocked(f)
31 #define FLOCKFILE(f) flockfile(f)
32 #define FUNLOCKFILE(f) funlockfile(f)
33 #else
34 #define GETC(f) getc(f)
35 #define FLOCKFILE(f)
36 #define FUNLOCKFILE(f)
37 #endif
38 
39 /* Bits in f_newlinetypes */
40 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
41 #define NEWLINE_CR 1            /* \r newline seen */
42 #define NEWLINE_LF 2            /* \n newline seen */
43 #define NEWLINE_CRLF 4          /* \r\n newline seen */
44 
45 /*
46  * These macros release the GIL while preventing the f_close() function being
47  * called in the interval between them.  For that purpose, a running total of
48  * the number of currently running unlocked code sections is kept in
49  * the unlocked_count field of the PyFileObject. The close() method raises
50  * an IOError if that field is non-zero.  See issue #815646, #595601.
51  */
52 
53 #define FILE_BEGIN_ALLOW_THREADS(fobj) \
54 { \
55     fobj->unlocked_count++; \
56     Py_BEGIN_ALLOW_THREADS
57 
58 #define FILE_END_ALLOW_THREADS(fobj) \
59     Py_END_ALLOW_THREADS \
60     fobj->unlocked_count--; \
61     assert(fobj->unlocked_count >= 0); \
62 }
63 
64 #define FILE_ABORT_ALLOW_THREADS(fobj) \
65     Py_BLOCK_THREADS \
66     fobj->unlocked_count--; \
67     assert(fobj->unlocked_count >= 0);
68 
69 #ifdef __cplusplus
70 extern "C" {
71 #endif
72 
73 FILE *
PyFile_AsFile(PyObject * f)74 PyFile_AsFile(PyObject *f)
75 {
76     if (f == NULL || !PyFile_Check(f))
77         return NULL;
78     else
79         return ((PyFileObject *)f)->f_fp;
80 }
81 
PyFile_IncUseCount(PyFileObject * fobj)82 void PyFile_IncUseCount(PyFileObject *fobj)
83 {
84     fobj->unlocked_count++;
85 }
86 
PyFile_DecUseCount(PyFileObject * fobj)87 void PyFile_DecUseCount(PyFileObject *fobj)
88 {
89     fobj->unlocked_count--;
90     assert(fobj->unlocked_count >= 0);
91 }
92 
93 PyObject *
PyFile_Name(PyObject * f)94 PyFile_Name(PyObject *f)
95 {
96     if (f == NULL || !PyFile_Check(f))
97         return NULL;
98     else
99         return ((PyFileObject *)f)->f_name;
100 }
101 
102 /* This is a safe wrapper around PyObject_Print to print to the FILE
103    of a PyFileObject. PyObject_Print releases the GIL but knows nothing
104    about PyFileObject. */
105 static int
file_PyObject_Print(PyObject * op,PyFileObject * f,int flags)106 file_PyObject_Print(PyObject *op, PyFileObject *f, int flags)
107 {
108     int result;
109     PyFile_IncUseCount(f);
110     result = PyObject_Print(op, f->f_fp, flags);
111     PyFile_DecUseCount(f);
112     return result;
113 }
114 
115 /* On Unix, fopen will succeed for directories.
116    In Python, there should be no file objects referring to
117    directories, so we need a check.  */
118 
119 static PyFileObject*
dircheck(PyFileObject * f)120 dircheck(PyFileObject* f)
121 {
122 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
123     struct stat buf;
124     if (f->f_fp == NULL)
125         return f;
126     if (fstat(fileno(f->f_fp), &buf) == 0 &&
127         S_ISDIR(buf.st_mode)) {
128         char *msg = strerror(EISDIR);
129         PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(isO)",
130                                               EISDIR, msg, f->f_name);
131         PyErr_SetObject(PyExc_IOError, exc);
132         Py_XDECREF(exc);
133         return NULL;
134     }
135 #endif
136     return f;
137 }
138 
139 
140 static PyObject *
fill_file_fields(PyFileObject * f,FILE * fp,PyObject * name,char * mode,int (* close)(FILE *))141 fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
142                  int (*close)(FILE *))
143 {
144     assert(name != NULL);
145     assert(f != NULL);
146     assert(PyFile_Check(f));
147     assert(f->f_fp == NULL);
148 
149     Py_DECREF(f->f_name);
150     Py_DECREF(f->f_mode);
151     Py_DECREF(f->f_encoding);
152     Py_DECREF(f->f_errors);
153 
154     Py_INCREF(name);
155     f->f_name = name;
156 
157     f->f_mode = PyString_FromString(mode);
158 
159     f->f_close = close;
160     f->f_softspace = 0;
161     f->f_binary = strchr(mode,'b') != NULL;
162     f->f_buf = NULL;
163     f->f_univ_newline = (strchr(mode, 'U') != NULL);
164     f->f_newlinetypes = NEWLINE_UNKNOWN;
165     f->f_skipnextlf = 0;
166     Py_INCREF(Py_None);
167     f->f_encoding = Py_None;
168     Py_INCREF(Py_None);
169     f->f_errors = Py_None;
170     f->readable = f->writable = 0;
171     if (strchr(mode, 'r') != NULL || f->f_univ_newline)
172         f->readable = 1;
173     if (strchr(mode, 'w') != NULL || strchr(mode, 'a') != NULL)
174         f->writable = 1;
175     if (strchr(mode, '+') != NULL)
176         f->readable = f->writable = 1;
177 
178     if (f->f_mode == NULL)
179         return NULL;
180     f->f_fp = fp;
181     f = dircheck(f);
182     return (PyObject *) f;
183 }
184 
185 #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
186 #define Py_VERIFY_WINNT
187 /* The CRT on windows compiled with Visual Studio 2005 and higher may
188  * assert if given invalid mode strings.  This is all fine and well
189  * in static languages like C where the mode string is typcially hard
190  * coded.  But in Python, were we pass in the mode string from the user,
191  * we need to verify it first manually
192  */
_PyVerify_Mode_WINNT(const char * mode)193 static int _PyVerify_Mode_WINNT(const char *mode)
194 {
195     /* See if mode string is valid on Windows to avoid hard assertions */
196     /* remove leading spacese */
197     int singles = 0;
198     int pairs = 0;
199     int encoding = 0;
200     const char *s, *c;
201 
202     while(*mode == ' ') /* strip initial spaces */
203         ++mode;
204     if (!strchr("rwa", *mode)) /* must start with one of these */
205         return 0;
206     while (*++mode) {
207         if (*mode == ' ' || *mode == 'N') /* ignore spaces and N */
208             continue;
209         s = "+TD"; /* each of this can appear only once */
210         c = strchr(s, *mode);
211         if (c) {
212             ptrdiff_t idx = s-c;
213             if (singles & (1<<idx))
214                 return 0;
215             singles |= (1<<idx);
216             continue;
217         }
218         s = "btcnSR"; /* only one of each letter in the pairs allowed */
219         c = strchr(s, *mode);
220         if (c) {
221             ptrdiff_t idx = (s-c)/2;
222             if (pairs & (1<<idx))
223                 return 0;
224             pairs |= (1<<idx);
225             continue;
226         }
227         if (*mode == ',') {
228             encoding = 1;
229             break;
230         }
231         return 0; /* found an invalid char */
232     }
233 
234     if (encoding) {
235         char *e[] = {"UTF-8", "UTF-16LE", "UNICODE"};
236         while (*mode == ' ')
237             ++mode;
238         /* find 'ccs =' */
239         if (strncmp(mode, "ccs", 3))
240             return 0;
241         mode += 3;
242         while (*mode == ' ')
243             ++mode;
244         if (*mode != '=')
245             return 0;
246         while (*mode == ' ')
247             ++mode;
248         for(encoding = 0; encoding<_countof(e); ++encoding) {
249             size_t l = strlen(e[encoding]);
250             if (!strncmp(mode, e[encoding], l)) {
251                 mode += l; /* found a valid encoding */
252                 break;
253             }
254         }
255         if (encoding == _countof(e))
256             return 0;
257     }
258     /* skip trailing spaces */
259     while (*mode == ' ')
260         ++mode;
261 
262     return *mode == '\0'; /* must be at the end of the string */
263 }
264 #endif
265 
266 /* check for known incorrect mode strings - problem is, platforms are
267    free to accept any mode characters they like and are supposed to
268    ignore stuff they don't understand... write or append mode with
269    universal newline support is expressly forbidden by PEP 278.
270    Additionally, remove the 'U' from the mode string as platforms
271    won't know what it is. Non-zero return signals an exception */
272 int
_PyFile_SanitizeMode(char * mode)273 _PyFile_SanitizeMode(char *mode)
274 {
275     char *upos;
276     size_t len = strlen(mode);
277 
278     if (!len) {
279         PyErr_SetString(PyExc_ValueError, "empty mode string");
280         return -1;
281     }
282 
283     upos = strchr(mode, 'U');
284     if (upos) {
285         memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
286 
287         if (mode[0] == 'w' || mode[0] == 'a') {
288             PyErr_Format(PyExc_ValueError, "universal newline "
289                          "mode can only be used with modes "
290                          "starting with 'r'");
291             return -1;
292         }
293 
294         if (mode[0] != 'r') {
295             memmove(mode+1, mode, strlen(mode)+1);
296             mode[0] = 'r';
297         }
298 
299         if (!strchr(mode, 'b')) {
300             memmove(mode+2, mode+1, strlen(mode));
301             mode[1] = 'b';
302         }
303     } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
304         PyErr_Format(PyExc_ValueError, "mode string must begin with "
305                     "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
306         return -1;
307     }
308 #ifdef Py_VERIFY_WINNT
309     /* additional checks on NT with visual studio 2005 and higher */
310     if (!_PyVerify_Mode_WINNT(mode)) {
311         PyErr_Format(PyExc_ValueError, "Invalid mode ('%.50s')", mode);
312         return -1;
313     }
314 #endif
315     return 0;
316 }
317 
318 static PyObject *
open_the_file(PyFileObject * f,char * name,char * mode)319 open_the_file(PyFileObject *f, char *name, char *mode)
320 {
321     char *newmode;
322     assert(f != NULL);
323     assert(PyFile_Check(f));
324 #ifdef MS_WINDOWS
325     /* windows ignores the passed name in order to support Unicode */
326     assert(f->f_name != NULL);
327 #else
328     assert(name != NULL);
329 #endif
330     assert(mode != NULL);
331     assert(f->f_fp == NULL);
332 
333     /* probably need to replace 'U' by 'rb' */
334     newmode = PyMem_MALLOC(strlen(mode) + 3);
335     if (!newmode) {
336         PyErr_NoMemory();
337         return NULL;
338     }
339     strcpy(newmode, mode);
340 
341     if (_PyFile_SanitizeMode(newmode)) {
342         f = NULL;
343         goto cleanup;
344     }
345 
346     /* rexec.py can't stop a user from getting the file() constructor --
347        all they have to do is get *any* file object f, and then do
348        type(f).  Here we prevent them from doing damage with it. */
349     if (PyEval_GetRestricted()) {
350         PyErr_SetString(PyExc_IOError,
351         "file() constructor not accessible in restricted mode");
352         f = NULL;
353         goto cleanup;
354     }
355     errno = 0;
356 
357 #ifdef MS_WINDOWS
358     if (PyUnicode_Check(f->f_name)) {
359         PyObject *wmode;
360         wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
361         if (f->f_name && wmode) {
362             FILE_BEGIN_ALLOW_THREADS(f)
363             /* PyUnicode_AS_UNICODE OK without thread
364                lock as it is a simple dereference. */
365             f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
366                               PyUnicode_AS_UNICODE(wmode));
367             FILE_END_ALLOW_THREADS(f)
368         }
369         Py_XDECREF(wmode);
370     }
371 #endif
372     if (NULL == f->f_fp && NULL != name) {
373         FILE_BEGIN_ALLOW_THREADS(f)
374         f->f_fp = fopen(name, newmode);
375         FILE_END_ALLOW_THREADS(f)
376     }
377 
378     if (f->f_fp == NULL) {
379 #if defined  _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
380         /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
381          * across all Windows flavors.  When it sets EINVAL varies
382          * across Windows flavors, the exact conditions aren't
383          * documented, and the answer lies in the OS's implementation
384          * of Win32's CreateFile function (whose source is secret).
385          * Seems the best we can do is map EINVAL to ENOENT.
386          * Starting with Visual Studio .NET 2005, EINVAL is correctly
387          * set by our CRT error handler (set in exceptions.c.)
388          */
389         if (errno == 0)         /* bad mode string */
390             errno = EINVAL;
391         else if (errno == EINVAL) /* unknown, but not a mode string */
392             errno = ENOENT;
393 #endif
394         /* EINVAL is returned when an invalid filename or
395          * an invalid mode is supplied. */
396         if (errno == EINVAL) {
397             PyObject *v;
398             char message[100];
399             PyOS_snprintf(message, 100,
400                 "invalid mode ('%.50s') or filename", mode);
401             v = Py_BuildValue("(isO)", errno, message, f->f_name);
402             if (v != NULL) {
403                 PyErr_SetObject(PyExc_IOError, v);
404                 Py_DECREF(v);
405             }
406         }
407         else
408             PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
409         f = NULL;
410     }
411     if (f != NULL)
412         f = dircheck(f);
413 
414 cleanup:
415     PyMem_FREE(newmode);
416 
417     return (PyObject *)f;
418 }
419 
420 static PyObject *
close_the_file(PyFileObject * f)421 close_the_file(PyFileObject *f)
422 {
423     int sts = 0;
424     int (*local_close)(FILE *);
425     FILE *local_fp = f->f_fp;
426     char *local_setbuf = f->f_setbuf;
427     if (local_fp != NULL) {
428         local_close = f->f_close;
429         if (local_close != NULL && f->unlocked_count > 0) {
430             if (f->ob_refcnt > 0) {
431                 PyErr_SetString(PyExc_IOError,
432                     "close() called during concurrent "
433                     "operation on the same file object.");
434             } else {
435                 /* This should not happen unless someone is
436                  * carelessly playing with the PyFileObject
437                  * struct fields and/or its associated FILE
438                  * pointer. */
439                 PyErr_SetString(PyExc_SystemError,
440                     "PyFileObject locking error in "
441                     "destructor (refcnt <= 0 at close).");
442             }
443             return NULL;
444         }
445         /* NULL out the FILE pointer before releasing the GIL, because
446          * it will not be valid anymore after the close() function is
447          * called. */
448         f->f_fp = NULL;
449         if (local_close != NULL) {
450             /* Issue #9295: must temporarily reset f_setbuf so that another
451                thread doesn't free it when running file_close() concurrently.
452                Otherwise this close() will crash when flushing the buffer. */
453             f->f_setbuf = NULL;
454             Py_BEGIN_ALLOW_THREADS
455             errno = 0;
456             sts = (*local_close)(local_fp);
457             Py_END_ALLOW_THREADS
458             f->f_setbuf = local_setbuf;
459             if (sts == EOF)
460                 return PyErr_SetFromErrno(PyExc_IOError);
461             if (sts != 0)
462                 return PyInt_FromLong((long)sts);
463         }
464     }
465     Py_RETURN_NONE;
466 }
467 
468 PyObject *
PyFile_FromFile(FILE * fp,char * name,char * mode,int (* close)(FILE *))469 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
470 {
471     PyFileObject *f;
472     PyObject *o_name;
473 
474     f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type, NULL, NULL);
475     if (f == NULL)
476         return NULL;
477     o_name = PyString_FromString(name);
478     if (o_name == NULL) {
479         if (close != NULL && fp != NULL)
480             close(fp);
481         Py_DECREF(f);
482         return NULL;
483     }
484     if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
485         Py_DECREF(f);
486         Py_DECREF(o_name);
487         return NULL;
488     }
489     Py_DECREF(o_name);
490     return (PyObject *)f;
491 }
492 
493 PyObject *
PyFile_FromString(char * name,char * mode)494 PyFile_FromString(char *name, char *mode)
495 {
496     extern int fclose(FILE *);
497     PyFileObject *f;
498 
499     f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
500     if (f != NULL) {
501         if (open_the_file(f, name, mode) == NULL) {
502             Py_DECREF(f);
503             f = NULL;
504         }
505     }
506     return (PyObject *)f;
507 }
508 
509 void
PyFile_SetBufSize(PyObject * f,int bufsize)510 PyFile_SetBufSize(PyObject *f, int bufsize)
511 {
512     PyFileObject *file = (PyFileObject *)f;
513     if (bufsize >= 0) {
514         int type;
515         switch (bufsize) {
516         case 0:
517             type = _IONBF;
518             break;
519 #ifdef HAVE_SETVBUF
520         case 1:
521             type = _IOLBF;
522             bufsize = BUFSIZ;
523             break;
524 #endif
525         default:
526             type = _IOFBF;
527 #ifndef HAVE_SETVBUF
528             bufsize = BUFSIZ;
529 #endif
530             break;
531         }
532         fflush(file->f_fp);
533         if (type == _IONBF) {
534             PyMem_Free(file->f_setbuf);
535             file->f_setbuf = NULL;
536         } else {
537             file->f_setbuf = (char *)PyMem_Realloc(file->f_setbuf,
538                                                     bufsize);
539         }
540 #ifdef HAVE_SETVBUF
541         setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
542 #else /* !HAVE_SETVBUF */
543         setbuf(file->f_fp, file->f_setbuf);
544 #endif /* !HAVE_SETVBUF */
545     }
546 }
547 
548 /* Set the encoding used to output Unicode strings.
549    Return 1 on success, 0 on failure. */
550 
551 int
PyFile_SetEncoding(PyObject * f,const char * enc)552 PyFile_SetEncoding(PyObject *f, const char *enc)
553 {
554     return PyFile_SetEncodingAndErrors(f, enc, NULL);
555 }
556 
557 int
PyFile_SetEncodingAndErrors(PyObject * f,const char * enc,char * errors)558 PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
559 {
560     PyFileObject *file = (PyFileObject*)f;
561     PyObject *str, *oerrors;
562 
563     assert(PyFile_Check(f));
564     str = PyString_FromString(enc);
565     if (!str)
566         return 0;
567     if (errors) {
568         oerrors = PyString_FromString(errors);
569         if (!oerrors) {
570             Py_DECREF(str);
571             return 0;
572         }
573     } else {
574         oerrors = Py_None;
575         Py_INCREF(Py_None);
576     }
577     Py_SETREF(file->f_encoding, str);
578     Py_SETREF(file->f_errors, oerrors);
579     return 1;
580 }
581 
582 static PyObject *
err_closed(void)583 err_closed(void)
584 {
585     PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
586     return NULL;
587 }
588 
589 static PyObject *
err_mode(char * action)590 err_mode(char *action)
591 {
592     PyErr_Format(PyExc_IOError, "File not open for %s", action);
593     return NULL;
594 }
595 
596 /* Refuse regular file I/O if there's data in the iteration-buffer.
597  * Mixing them would cause data to arrive out of order, as the read*
598  * methods don't use the iteration buffer. */
599 static PyObject *
err_iterbuffered(void)600 err_iterbuffered(void)
601 {
602     PyErr_SetString(PyExc_ValueError,
603         "Mixing iteration and read methods would lose data");
604     return NULL;
605 }
606 
607 static void drop_readahead(PyFileObject *);
608 
609 /* Methods */
610 
611 static void
file_dealloc(PyFileObject * f)612 file_dealloc(PyFileObject *f)
613 {
614     PyObject *ret;
615     if (f->weakreflist != NULL)
616         PyObject_ClearWeakRefs((PyObject *) f);
617     ret = close_the_file(f);
618     if (!ret) {
619         PySys_WriteStderr("close failed in file object destructor:\n");
620         PyErr_Print();
621     }
622     else {
623         Py_DECREF(ret);
624     }
625     PyMem_Free(f->f_setbuf);
626     Py_XDECREF(f->f_name);
627     Py_XDECREF(f->f_mode);
628     Py_XDECREF(f->f_encoding);
629     Py_XDECREF(f->f_errors);
630     drop_readahead(f);
631     Py_TYPE(f)->tp_free((PyObject *)f);
632 }
633 
634 static PyObject *
file_repr(PyFileObject * f)635 file_repr(PyFileObject *f)
636 {
637     PyObject *ret = NULL;
638     PyObject *name = NULL;
639     if (PyUnicode_Check(f->f_name)) {
640 #ifdef Py_USING_UNICODE
641         const char *name_str;
642         name = PyUnicode_AsUnicodeEscapeString(f->f_name);
643         name_str = name ? PyString_AsString(name) : "?";
644         ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
645                            f->f_fp == NULL ? "closed" : "open",
646                            name_str,
647                            PyString_AsString(f->f_mode),
648                            f);
649         Py_XDECREF(name);
650         return ret;
651 #endif
652     } else {
653         name = PyObject_Repr(f->f_name);
654         if (name == NULL)
655             return NULL;
656         ret = PyString_FromFormat("<%s file %s, mode '%s' at %p>",
657                            f->f_fp == NULL ? "closed" : "open",
658                            PyString_AsString(name),
659                            PyString_AsString(f->f_mode),
660                            f);
661         Py_XDECREF(name);
662         return ret;
663     }
664 }
665 
666 static PyObject *
file_close(PyFileObject * f)667 file_close(PyFileObject *f)
668 {
669     PyObject *sts = close_the_file(f);
670     if (sts) {
671         PyMem_Free(f->f_setbuf);
672         f->f_setbuf = NULL;
673     }
674     return sts;
675 }
676 
677 
678 /* Our very own off_t-like type, 64-bit if possible */
679 #if !defined(HAVE_LARGEFILE_SUPPORT)
680 typedef off_t Py_off_t;
681 #elif SIZEOF_OFF_T >= 8
682 typedef off_t Py_off_t;
683 #elif SIZEOF_FPOS_T >= 8
684 typedef fpos_t Py_off_t;
685 #else
686 #error "Large file support, but neither off_t nor fpos_t is large enough."
687 #endif
688 
689 
690 /* a portable fseek() function
691    return 0 on success, non-zero on failure (with errno set) */
692 static int
_portable_fseek(FILE * fp,Py_off_t offset,int whence)693 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
694 {
695 #if !defined(HAVE_LARGEFILE_SUPPORT)
696     return fseek(fp, offset, whence);
697 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
698     return fseeko(fp, offset, whence);
699 #elif defined(HAVE_FSEEK64)
700     return fseek64(fp, offset, whence);
701 #elif defined(__BEOS__)
702     return _fseek(fp, offset, whence);
703 #elif SIZEOF_FPOS_T >= 8
704     /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
705        and fgetpos() to implement fseek()*/
706     fpos_t pos;
707     switch (whence) {
708     case SEEK_END:
709 #ifdef MS_WINDOWS
710         fflush(fp);
711         if (_lseeki64(fileno(fp), 0, 2) == -1)
712             return -1;
713 #else
714         if (fseek(fp, 0, SEEK_END) != 0)
715             return -1;
716 #endif
717         /* fall through */
718     case SEEK_CUR:
719         if (fgetpos(fp, &pos) != 0)
720             return -1;
721         offset += pos;
722         break;
723     /* case SEEK_SET: break; */
724     }
725     return fsetpos(fp, &offset);
726 #else
727 #error "Large file support, but no way to fseek."
728 #endif
729 }
730 
731 
732 /* a portable ftell() function
733    Return -1 on failure with errno set appropriately, current file
734    position on success */
735 static Py_off_t
_portable_ftell(FILE * fp)736 _portable_ftell(FILE* fp)
737 {
738 #if !defined(HAVE_LARGEFILE_SUPPORT)
739     return ftell(fp);
740 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
741     return ftello(fp);
742 #elif defined(HAVE_FTELL64)
743     return ftell64(fp);
744 #elif SIZEOF_FPOS_T >= 8
745     fpos_t pos;
746     if (fgetpos(fp, &pos) != 0)
747         return -1;
748     return pos;
749 #else
750 #error "Large file support, but no way to ftell."
751 #endif
752 }
753 
754 
755 static PyObject *
file_seek(PyFileObject * f,PyObject * args)756 file_seek(PyFileObject *f, PyObject *args)
757 {
758     int whence;
759     int ret;
760     Py_off_t offset;
761     PyObject *offobj, *off_index;
762 
763     if (f->f_fp == NULL)
764         return err_closed();
765     drop_readahead(f);
766     whence = 0;
767     if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
768         return NULL;
769     off_index = PyNumber_Index(offobj);
770     if (!off_index) {
771         if (!PyFloat_Check(offobj))
772             return NULL;
773         /* Deprecated in 2.6 */
774         PyErr_Clear();
775         if (PyErr_WarnEx(PyExc_DeprecationWarning,
776                          "integer argument expected, got float",
777                          1) < 0)
778             return NULL;
779         off_index = offobj;
780         Py_INCREF(offobj);
781     }
782 #if !defined(HAVE_LARGEFILE_SUPPORT)
783     offset = PyInt_AsLong(off_index);
784 #else
785     offset = PyLong_Check(off_index) ?
786         PyLong_AsLongLong(off_index) : PyInt_AsLong(off_index);
787 #endif
788     Py_DECREF(off_index);
789     if (PyErr_Occurred())
790         return NULL;
791 
792     FILE_BEGIN_ALLOW_THREADS(f)
793     errno = 0;
794     ret = _portable_fseek(f->f_fp, offset, whence);
795     FILE_END_ALLOW_THREADS(f)
796 
797     if (ret != 0) {
798         PyErr_SetFromErrno(PyExc_IOError);
799         clearerr(f->f_fp);
800         return NULL;
801     }
802     f->f_skipnextlf = 0;
803     Py_INCREF(Py_None);
804     return Py_None;
805 }
806 
807 
808 #ifdef HAVE_FTRUNCATE
809 static PyObject *
file_truncate(PyFileObject * f,PyObject * args)810 file_truncate(PyFileObject *f, PyObject *args)
811 {
812     Py_off_t newsize;
813     PyObject *newsizeobj = NULL;
814     Py_off_t initialpos;
815     int ret;
816 
817     if (f->f_fp == NULL)
818         return err_closed();
819     if (!f->writable)
820         return err_mode("writing");
821     if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
822         return NULL;
823 
824     /* Get current file position.  If the file happens to be open for
825      * update and the last operation was an input operation, C doesn't
826      * define what the later fflush() will do, but we promise truncate()
827      * won't change the current position (and fflush() *does* change it
828      * then at least on Windows).  The easiest thing is to capture
829      * current pos now and seek back to it at the end.
830      */
831     FILE_BEGIN_ALLOW_THREADS(f)
832     errno = 0;
833     initialpos = _portable_ftell(f->f_fp);
834     FILE_END_ALLOW_THREADS(f)
835     if (initialpos == -1)
836         goto onioerror;
837 
838     /* Set newsize to current position if newsizeobj NULL, else to the
839      * specified value.
840      */
841     if (newsizeobj != NULL) {
842 #if !defined(HAVE_LARGEFILE_SUPPORT)
843         newsize = PyInt_AsLong(newsizeobj);
844 #else
845         newsize = PyLong_Check(newsizeobj) ?
846                         PyLong_AsLongLong(newsizeobj) :
847                 PyInt_AsLong(newsizeobj);
848 #endif
849         if (PyErr_Occurred())
850             return NULL;
851     }
852     else /* default to current position */
853         newsize = initialpos;
854 
855     /* Flush the stream.  We're mixing stream-level I/O with lower-level
856      * I/O, and a flush may be necessary to synch both platform views
857      * of the current file state.
858      */
859     FILE_BEGIN_ALLOW_THREADS(f)
860     errno = 0;
861     ret = fflush(f->f_fp);
862     FILE_END_ALLOW_THREADS(f)
863     if (ret != 0)
864         goto onioerror;
865 
866 #ifdef MS_WINDOWS
867     /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
868        so don't even try using it. */
869     {
870         HANDLE hFile;
871 
872         /* Have to move current pos to desired endpoint on Windows. */
873         FILE_BEGIN_ALLOW_THREADS(f)
874         errno = 0;
875         ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
876         FILE_END_ALLOW_THREADS(f)
877         if (ret)
878             goto onioerror;
879 
880         /* Truncate.  Note that this may grow the file! */
881         FILE_BEGIN_ALLOW_THREADS(f)
882         errno = 0;
883         hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
884         ret = hFile == (HANDLE)-1;
885         if (ret == 0) {
886             ret = SetEndOfFile(hFile) == 0;
887             if (ret)
888                 errno = EACCES;
889         }
890         FILE_END_ALLOW_THREADS(f)
891         if (ret)
892             goto onioerror;
893     }
894 #else
895     FILE_BEGIN_ALLOW_THREADS(f)
896     errno = 0;
897     ret = ftruncate(fileno(f->f_fp), newsize);
898     FILE_END_ALLOW_THREADS(f)
899     if (ret != 0)
900         goto onioerror;
901 #endif /* !MS_WINDOWS */
902 
903     /* Restore original file position. */
904     FILE_BEGIN_ALLOW_THREADS(f)
905     errno = 0;
906     ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
907     FILE_END_ALLOW_THREADS(f)
908     if (ret)
909         goto onioerror;
910 
911     Py_INCREF(Py_None);
912     return Py_None;
913 
914 onioerror:
915     PyErr_SetFromErrno(PyExc_IOError);
916     clearerr(f->f_fp);
917     return NULL;
918 }
919 #endif /* HAVE_FTRUNCATE */
920 
921 static PyObject *
file_tell(PyFileObject * f)922 file_tell(PyFileObject *f)
923 {
924     Py_off_t pos;
925 
926     if (f->f_fp == NULL)
927         return err_closed();
928     FILE_BEGIN_ALLOW_THREADS(f)
929     errno = 0;
930     pos = _portable_ftell(f->f_fp);
931     FILE_END_ALLOW_THREADS(f)
932 
933     if (pos == -1) {
934         PyErr_SetFromErrno(PyExc_IOError);
935         clearerr(f->f_fp);
936         return NULL;
937     }
938     if (f->f_skipnextlf) {
939         int c;
940         c = GETC(f->f_fp);
941         if (c == '\n') {
942             f->f_newlinetypes |= NEWLINE_CRLF;
943             pos++;
944             f->f_skipnextlf = 0;
945         } else if (c != EOF) ungetc(c, f->f_fp);
946     }
947 #if !defined(HAVE_LARGEFILE_SUPPORT)
948     return PyInt_FromLong(pos);
949 #else
950     return PyLong_FromLongLong(pos);
951 #endif
952 }
953 
954 static PyObject *
file_fileno(PyFileObject * f)955 file_fileno(PyFileObject *f)
956 {
957     if (f->f_fp == NULL)
958         return err_closed();
959     return PyInt_FromLong((long) fileno(f->f_fp));
960 }
961 
962 static PyObject *
file_flush(PyFileObject * f)963 file_flush(PyFileObject *f)
964 {
965     int res;
966 
967     if (f->f_fp == NULL)
968         return err_closed();
969     FILE_BEGIN_ALLOW_THREADS(f)
970     errno = 0;
971     res = fflush(f->f_fp);
972     FILE_END_ALLOW_THREADS(f)
973     if (res != 0) {
974         PyErr_SetFromErrno(PyExc_IOError);
975         clearerr(f->f_fp);
976         return NULL;
977     }
978     Py_INCREF(Py_None);
979     return Py_None;
980 }
981 
982 static PyObject *
file_isatty(PyFileObject * f)983 file_isatty(PyFileObject *f)
984 {
985     long res;
986     if (f->f_fp == NULL)
987         return err_closed();
988     FILE_BEGIN_ALLOW_THREADS(f)
989     res = isatty((int)fileno(f->f_fp));
990     FILE_END_ALLOW_THREADS(f)
991     return PyBool_FromLong(res);
992 }
993 
994 
995 #if BUFSIZ < 8192
996 #define SMALLCHUNK 8192
997 #else
998 #define SMALLCHUNK BUFSIZ
999 #endif
1000 
1001 static size_t
new_buffersize(PyFileObject * f,size_t currentsize)1002 new_buffersize(PyFileObject *f, size_t currentsize)
1003 {
1004 #ifdef HAVE_FSTAT
1005     off_t pos, end;
1006     struct stat st;
1007     if (fstat(fileno(f->f_fp), &st) == 0) {
1008         end = st.st_size;
1009         /* The following is not a bug: we really need to call lseek()
1010            *and* ftell().  The reason is that some stdio libraries
1011            mistakenly flush their buffer when ftell() is called and
1012            the lseek() call it makes fails, thereby throwing away
1013            data that cannot be recovered in any way.  To avoid this,
1014            we first test lseek(), and only call ftell() if lseek()
1015            works.  We can't use the lseek() value either, because we
1016            need to take the amount of buffered data into account.
1017            (Yet another reason why stdio stinks. :-) */
1018         pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
1019         if (pos >= 0) {
1020             pos = ftell(f->f_fp);
1021         }
1022         if (pos < 0)
1023             clearerr(f->f_fp);
1024         if (end > pos && pos >= 0)
1025             return currentsize + end - pos + 1;
1026         /* Add 1 so if the file were to grow we'd notice. */
1027     }
1028 #endif
1029     /* Expand the buffer by an amount proportional to the current size,
1030        giving us amortized linear-time behavior. Use a less-than-double
1031        growth factor to avoid excessive allocation. */
1032     return currentsize + (currentsize >> 3) + 6;
1033 }
1034 
1035 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
1036 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
1037 #else
1038 #ifdef EWOULDBLOCK
1039 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
1040 #else
1041 #ifdef EAGAIN
1042 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
1043 #else
1044 #define BLOCKED_ERRNO(x) 0
1045 #endif
1046 #endif
1047 #endif
1048 
1049 static PyObject *
file_read(PyFileObject * f,PyObject * args)1050 file_read(PyFileObject *f, PyObject *args)
1051 {
1052     long bytesrequested = -1;
1053     size_t bytesread, buffersize, chunksize;
1054     PyObject *v;
1055 
1056     if (f->f_fp == NULL)
1057         return err_closed();
1058     if (!f->readable)
1059         return err_mode("reading");
1060     /* refuse to mix with f.next() */
1061     if (f->f_buf != NULL &&
1062         (f->f_bufend - f->f_bufptr) > 0 &&
1063         f->f_buf[0] != '\0')
1064         return err_iterbuffered();
1065     if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
1066         return NULL;
1067     if (bytesrequested < 0)
1068         buffersize = new_buffersize(f, (size_t)0);
1069     else
1070         buffersize = bytesrequested;
1071     if (buffersize > PY_SSIZE_T_MAX) {
1072         PyErr_SetString(PyExc_OverflowError,
1073     "requested number of bytes is more than a Python string can hold");
1074         return NULL;
1075     }
1076     v = PyString_FromStringAndSize((char *)NULL, buffersize);
1077     if (v == NULL)
1078         return NULL;
1079     bytesread = 0;
1080     for (;;) {
1081         int interrupted;
1082         FILE_BEGIN_ALLOW_THREADS(f)
1083         errno = 0;
1084         chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
1085                   buffersize - bytesread, f->f_fp, (PyObject *)f);
1086         interrupted = ferror(f->f_fp) && errno == EINTR;
1087         FILE_END_ALLOW_THREADS(f)
1088         if (interrupted) {
1089             clearerr(f->f_fp);
1090             if (PyErr_CheckSignals()) {
1091                 Py_DECREF(v);
1092                 return NULL;
1093             }
1094         }
1095         if (chunksize == 0) {
1096             if (interrupted)
1097                 continue;
1098             if (!ferror(f->f_fp))
1099                 break;
1100             clearerr(f->f_fp);
1101             /* When in non-blocking mode, data shouldn't
1102              * be discarded if a blocking signal was
1103              * received. That will also happen if
1104              * chunksize != 0, but bytesread < buffersize. */
1105             if (bytesread > 0 && BLOCKED_ERRNO(errno))
1106                 break;
1107             PyErr_SetFromErrno(PyExc_IOError);
1108             Py_DECREF(v);
1109             return NULL;
1110         }
1111         bytesread += chunksize;
1112         if (bytesread < buffersize && !interrupted) {
1113             clearerr(f->f_fp);
1114             break;
1115         }
1116         if (bytesrequested < 0) {
1117             buffersize = new_buffersize(f, buffersize);
1118             if (_PyString_Resize(&v, buffersize) < 0)
1119                 return NULL;
1120         } else {
1121             /* Got what was requested. */
1122             break;
1123         }
1124     }
1125     if (bytesread != buffersize && _PyString_Resize(&v, bytesread))
1126         return NULL;
1127     return v;
1128 }
1129 
1130 static PyObject *
file_readinto(PyFileObject * f,PyObject * args)1131 file_readinto(PyFileObject *f, PyObject *args)
1132 {
1133     char *ptr;
1134     Py_ssize_t ntodo;
1135     Py_ssize_t ndone, nnow;
1136     Py_buffer pbuf;
1137 
1138     if (f->f_fp == NULL)
1139         return err_closed();
1140     if (!f->readable)
1141         return err_mode("reading");
1142     /* refuse to mix with f.next() */
1143     if (f->f_buf != NULL &&
1144         (f->f_bufend - f->f_bufptr) > 0 &&
1145         f->f_buf[0] != '\0')
1146         return err_iterbuffered();
1147     if (!PyArg_ParseTuple(args, "w*", &pbuf))
1148         return NULL;
1149     ptr = pbuf.buf;
1150     ntodo = pbuf.len;
1151     ndone = 0;
1152     while (ntodo > 0) {
1153         int interrupted;
1154         FILE_BEGIN_ALLOW_THREADS(f)
1155         errno = 0;
1156         nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
1157                                         (PyObject *)f);
1158         interrupted = ferror(f->f_fp) && errno == EINTR;
1159         FILE_END_ALLOW_THREADS(f)
1160         if (interrupted) {
1161             clearerr(f->f_fp);
1162             if (PyErr_CheckSignals()) {
1163                 PyBuffer_Release(&pbuf);
1164                 return NULL;
1165             }
1166         }
1167         if (nnow == 0) {
1168             if (interrupted)
1169                 continue;
1170             if (!ferror(f->f_fp))
1171                 break;
1172             PyErr_SetFromErrno(PyExc_IOError);
1173             clearerr(f->f_fp);
1174             PyBuffer_Release(&pbuf);
1175             return NULL;
1176         }
1177         ndone += nnow;
1178         ntodo -= nnow;
1179     }
1180     PyBuffer_Release(&pbuf);
1181     return PyInt_FromSsize_t(ndone);
1182 }
1183 
1184 /**************************************************************************
1185 Routine to get next line using platform fgets().
1186 
1187 Under MSVC 6:
1188 
1189 + MS threadsafe getc is very slow (multiple layers of function calls before+
1190   after each character, to lock+unlock the stream).
1191 + The stream-locking functions are MS-internal -- can't access them from user
1192   code.
1193 + There's nothing Tim could find in the MS C or platform SDK libraries that
1194   can worm around this.
1195 + MS fgets locks/unlocks only once per line; it's the only hook we have.
1196 
1197 So we use fgets for speed(!), despite that it's painful.
1198 
1199 MS realloc is also slow.
1200 
1201 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
1202 have):
1203     Linux               a wash
1204     Solaris             a wash
1205     Tru64 Unix          getline_via_fgets significantly faster
1206 
1207 CAUTION:  The C std isn't clear about this:  in those cases where fgets
1208 writes something into the buffer, can it write into any position beyond the
1209 required trailing null byte?  MSVC 6 fgets does not, and no platform is (yet)
1210 known on which it does; and it would be a strange way to code fgets. Still,
1211 getline_via_fgets may not work correctly if it does.  The std test
1212 test_bufio.py should fail if platform fgets() routinely writes beyond the
1213 trailing null byte.  #define DONT_USE_FGETS_IN_GETLINE to disable this code.
1214 **************************************************************************/
1215 
1216 /* Use this routine if told to, or by default on non-get_unlocked()
1217  * platforms unless told not to.  Yikes!  Let's spell that out:
1218  * On a platform with getc_unlocked():
1219  *     By default, use getc_unlocked().
1220  *     If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
1221  * On a platform without getc_unlocked():
1222  *     By default, use fgets().
1223  *     If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
1224  */
1225 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
1226 #define USE_FGETS_IN_GETLINE
1227 #endif
1228 
1229 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
1230 #undef USE_FGETS_IN_GETLINE
1231 #endif
1232 
1233 #ifdef USE_FGETS_IN_GETLINE
1234 static PyObject*
getline_via_fgets(PyFileObject * f,FILE * fp)1235 getline_via_fgets(PyFileObject *f, FILE *fp)
1236 {
1237 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
1238  * no-realloc, one-fgets()-call path.  Boosting it isn't free, because we have
1239  * to fill this much of the buffer with a known value in order to figure out
1240  * how much of the buffer fgets() overwrites.  So if INITBUFSIZE is larger
1241  * than "most" lines, we waste time filling unused buffer slots.  100 is
1242  * surely adequate for most peoples' email archives, chewing over source code,
1243  * etc -- "regular old text files".
1244  * MAXBUFSIZE is the maximum line length that lets us get away with the less
1245  * fast (but still zippy) no-realloc, two-fgets()-call path.  See above for
1246  * cautions about boosting that.  300 was chosen because the worst real-life
1247  * text-crunching job reported on Python-Dev was a mail-log crawler where over
1248  * half the lines were 254 chars.
1249  */
1250 #define INITBUFSIZE 100
1251 #define MAXBUFSIZE 300
1252     char* p;            /* temp */
1253     char buf[MAXBUFSIZE];
1254     PyObject* v;        /* the string object result */
1255     char* pvfree;       /* address of next free slot */
1256     char* pvend;    /* address one beyond last free slot */
1257     size_t nfree;       /* # of free buffer slots; pvend-pvfree */
1258     size_t total_v_size;  /* total # of slots in buffer */
1259     size_t increment;           /* amount to increment the buffer */
1260     size_t prev_v_size;
1261 
1262     /* Optimize for normal case:  avoid _PyString_Resize if at all
1263      * possible via first reading into stack buffer "buf".
1264      */
1265     total_v_size = INITBUFSIZE;         /* start small and pray */
1266     pvfree = buf;
1267     for (;;) {
1268         FILE_BEGIN_ALLOW_THREADS(f)
1269         pvend = buf + total_v_size;
1270         nfree = pvend - pvfree;
1271         memset(pvfree, '\n', nfree);
1272         assert(nfree < INT_MAX); /* Should be atmost MAXBUFSIZE */
1273         p = fgets(pvfree, (int)nfree, fp);
1274         FILE_END_ALLOW_THREADS(f)
1275 
1276         if (p == NULL) {
1277             clearerr(fp);
1278             if (PyErr_CheckSignals())
1279                 return NULL;
1280             v = PyString_FromStringAndSize(buf, pvfree - buf);
1281             return v;
1282         }
1283         /* fgets read *something* */
1284         p = memchr(pvfree, '\n', nfree);
1285         if (p != NULL) {
1286             /* Did the \n come from fgets or from us?
1287              * Since fgets stops at the first \n, and then writes
1288              * \0, if it's from fgets a \0 must be next.  But if
1289              * that's so, it could not have come from us, since
1290              * the \n's we filled the buffer with have only more
1291              * \n's to the right.
1292              */
1293             if (p+1 < pvend && *(p+1) == '\0') {
1294                 /* It's from fgets:  we win!  In particular,
1295                  * we haven't done any mallocs yet, and can
1296                  * build the final result on the first try.
1297                  */
1298                 ++p;                    /* include \n from fgets */
1299             }
1300             else {
1301                 /* Must be from us:  fgets didn't fill the
1302                  * buffer and didn't find a newline, so it
1303                  * must be the last and newline-free line of
1304                  * the file.
1305                  */
1306                 assert(p > pvfree && *(p-1) == '\0');
1307                 --p;                    /* don't include \0 from fgets */
1308             }
1309             v = PyString_FromStringAndSize(buf, p - buf);
1310             return v;
1311         }
1312         /* yuck:  fgets overwrote all the newlines, i.e. the entire
1313          * buffer.  So this line isn't over yet, or maybe it is but
1314          * we're exactly at EOF.  If we haven't already, try using the
1315          * rest of the stack buffer.
1316          */
1317         assert(*(pvend-1) == '\0');
1318         if (pvfree == buf) {
1319             pvfree = pvend - 1;                 /* overwrite trailing null */
1320             total_v_size = MAXBUFSIZE;
1321         }
1322         else
1323             break;
1324     }
1325 
1326     /* The stack buffer isn't big enough; malloc a string object and read
1327      * into its buffer.
1328      */
1329     total_v_size = MAXBUFSIZE << 1;
1330     v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1331     if (v == NULL)
1332         return v;
1333     /* copy over everything except the last null byte */
1334     memcpy(BUF(v), buf, MAXBUFSIZE-1);
1335     pvfree = BUF(v) + MAXBUFSIZE - 1;
1336 
1337     /* Keep reading stuff into v; if it ever ends successfully, break
1338      * after setting p one beyond the end of the line.  The code here is
1339      * very much like the code above, except reads into v's buffer; see
1340      * the code above for detailed comments about the logic.
1341      */
1342     for (;;) {
1343         FILE_BEGIN_ALLOW_THREADS(f)
1344         pvend = BUF(v) + total_v_size;
1345         nfree = pvend - pvfree;
1346         memset(pvfree, '\n', nfree);
1347         assert(nfree < INT_MAX);
1348         p = fgets(pvfree, (int)nfree, fp);
1349         FILE_END_ALLOW_THREADS(f)
1350 
1351         if (p == NULL) {
1352             clearerr(fp);
1353             if (PyErr_CheckSignals()) {
1354                 Py_DECREF(v);
1355                 return NULL;
1356             }
1357             p = pvfree;
1358             break;
1359         }
1360         p = memchr(pvfree, '\n', nfree);
1361         if (p != NULL) {
1362             if (p+1 < pvend && *(p+1) == '\0') {
1363                 /* \n came from fgets */
1364                 ++p;
1365                 break;
1366             }
1367             /* \n came from us; last line of file, no newline */
1368             assert(p > pvfree && *(p-1) == '\0');
1369             --p;
1370             break;
1371         }
1372         /* expand buffer and try again */
1373         assert(*(pvend-1) == '\0');
1374         increment = total_v_size >> 2;          /* mild exponential growth */
1375         prev_v_size = total_v_size;
1376         total_v_size += increment;
1377         /* check for overflow */
1378         if (total_v_size <= prev_v_size ||
1379             total_v_size > PY_SSIZE_T_MAX) {
1380             PyErr_SetString(PyExc_OverflowError,
1381                 "line is longer than a Python string can hold");
1382             Py_DECREF(v);
1383             return NULL;
1384         }
1385         if (_PyString_Resize(&v, (int)total_v_size) < 0)
1386             return NULL;
1387         /* overwrite the trailing null byte */
1388         pvfree = BUF(v) + (prev_v_size - 1);
1389     }
1390     if (BUF(v) + total_v_size != p && _PyString_Resize(&v, p - BUF(v)))
1391         return NULL;
1392     return v;
1393 #undef INITBUFSIZE
1394 #undef MAXBUFSIZE
1395 }
1396 #endif  /* ifdef USE_FGETS_IN_GETLINE */
1397 
1398 /* Internal routine to get a line.
1399    Size argument interpretation:
1400    > 0: max length;
1401    <= 0: read arbitrary line
1402 */
1403 
1404 static PyObject *
get_line(PyFileObject * f,int n)1405 get_line(PyFileObject *f, int n)
1406 {
1407     FILE *fp = f->f_fp;
1408     int c;
1409     char *buf, *end;
1410     size_t total_v_size;        /* total # of slots in buffer */
1411     size_t used_v_size;         /* # used slots in buffer */
1412     size_t increment;       /* amount to increment the buffer */
1413     PyObject *v;
1414     int newlinetypes = f->f_newlinetypes;
1415     int skipnextlf = f->f_skipnextlf;
1416     int univ_newline = f->f_univ_newline;
1417 
1418 #if defined(USE_FGETS_IN_GETLINE)
1419     if (n <= 0 && !univ_newline )
1420         return getline_via_fgets(f, fp);
1421 #endif
1422     total_v_size = n > 0 ? n : 100;
1423     v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1424     if (v == NULL)
1425         return NULL;
1426     buf = BUF(v);
1427     end = buf + total_v_size;
1428 
1429     for (;;) {
1430         FILE_BEGIN_ALLOW_THREADS(f)
1431         FLOCKFILE(fp);
1432         if (univ_newline) {
1433             c = 'x'; /* Shut up gcc warning */
1434             while ( buf != end && (c = GETC(fp)) != EOF ) {
1435                 if (skipnextlf ) {
1436                     skipnextlf = 0;
1437                     if (c == '\n') {
1438                         /* Seeing a \n here with
1439                          * skipnextlf true means we
1440                          * saw a \r before.
1441                          */
1442                         newlinetypes |= NEWLINE_CRLF;
1443                         c = GETC(fp);
1444                         if (c == EOF) break;
1445                     } else {
1446                         newlinetypes |= NEWLINE_CR;
1447                     }
1448                 }
1449                 if (c == '\r') {
1450                     skipnextlf = 1;
1451                     c = '\n';
1452                 } else if ( c == '\n')
1453                     newlinetypes |= NEWLINE_LF;
1454                 *buf++ = c;
1455                 if (c == '\n') break;
1456             }
1457             if (c == EOF) {
1458                 if (ferror(fp) && errno == EINTR) {
1459                     FUNLOCKFILE(fp);
1460                     FILE_ABORT_ALLOW_THREADS(f)
1461                     f->f_newlinetypes = newlinetypes;
1462                     f->f_skipnextlf = skipnextlf;
1463 
1464                     if (PyErr_CheckSignals()) {
1465                         Py_DECREF(v);
1466                         return NULL;
1467                     }
1468                     /* We executed Python signal handlers and got no exception.
1469                      * Now back to reading the line where we left off. */
1470                     clearerr(fp);
1471                     continue;
1472                 }
1473                 if (skipnextlf)
1474                     newlinetypes |= NEWLINE_CR;
1475             }
1476         } else /* If not universal newlines use the normal loop */
1477         while ((c = GETC(fp)) != EOF &&
1478                (*buf++ = c) != '\n' &&
1479             buf != end)
1480             ;
1481         FUNLOCKFILE(fp);
1482         FILE_END_ALLOW_THREADS(f)
1483         f->f_newlinetypes = newlinetypes;
1484         f->f_skipnextlf = skipnextlf;
1485         if (c == '\n')
1486             break;
1487         if (c == EOF) {
1488             if (ferror(fp)) {
1489                 if (errno == EINTR) {
1490                     if (PyErr_CheckSignals()) {
1491                         Py_DECREF(v);
1492                         return NULL;
1493                     }
1494                     /* We executed Python signal handlers and got no exception.
1495                      * Now back to reading the line where we left off. */
1496                     clearerr(fp);
1497                     continue;
1498                 }
1499                 PyErr_SetFromErrno(PyExc_IOError);
1500                 clearerr(fp);
1501                 Py_DECREF(v);
1502                 return NULL;
1503             }
1504             clearerr(fp);
1505             if (PyErr_CheckSignals()) {
1506                 Py_DECREF(v);
1507                 return NULL;
1508             }
1509             break;
1510         }
1511         /* Must be because buf == end */
1512         if (n > 0)
1513             break;
1514         used_v_size = total_v_size;
1515         increment = total_v_size >> 2; /* mild exponential growth */
1516         total_v_size += increment;
1517         if (total_v_size > PY_SSIZE_T_MAX) {
1518             PyErr_SetString(PyExc_OverflowError,
1519                 "line is longer than a Python string can hold");
1520             Py_DECREF(v);
1521             return NULL;
1522         }
1523         if (_PyString_Resize(&v, total_v_size) < 0)
1524             return NULL;
1525         buf = BUF(v) + used_v_size;
1526         end = BUF(v) + total_v_size;
1527     }
1528 
1529     used_v_size = buf - BUF(v);
1530     if (used_v_size != total_v_size && _PyString_Resize(&v, used_v_size))
1531         return NULL;
1532     return v;
1533 }
1534 
1535 /* External C interface */
1536 
1537 PyObject *
PyFile_GetLine(PyObject * f,int n)1538 PyFile_GetLine(PyObject *f, int n)
1539 {
1540     PyObject *result;
1541 
1542     if (f == NULL) {
1543         PyErr_BadInternalCall();
1544         return NULL;
1545     }
1546 
1547     if (PyFile_Check(f)) {
1548         PyFileObject *fo = (PyFileObject *)f;
1549         if (fo->f_fp == NULL)
1550             return err_closed();
1551         if (!fo->readable)
1552             return err_mode("reading");
1553         /* refuse to mix with f.next() */
1554         if (fo->f_buf != NULL &&
1555             (fo->f_bufend - fo->f_bufptr) > 0 &&
1556             fo->f_buf[0] != '\0')
1557             return err_iterbuffered();
1558         result = get_line(fo, n);
1559     }
1560     else {
1561         PyObject *reader;
1562         PyObject *args;
1563 
1564         reader = PyObject_GetAttrString(f, "readline");
1565         if (reader == NULL)
1566             return NULL;
1567         if (n <= 0)
1568             args = PyTuple_New(0);
1569         else
1570             args = Py_BuildValue("(i)", n);
1571         if (args == NULL) {
1572             Py_DECREF(reader);
1573             return NULL;
1574         }
1575         result = PyEval_CallObject(reader, args);
1576         Py_DECREF(reader);
1577         Py_DECREF(args);
1578         if (result != NULL && !PyString_Check(result) &&
1579             !PyUnicode_Check(result)) {
1580             Py_DECREF(result);
1581             result = NULL;
1582             PyErr_SetString(PyExc_TypeError,
1583                        "object.readline() returned non-string");
1584         }
1585     }
1586 
1587     if (n < 0 && result != NULL && PyString_Check(result)) {
1588         char *s = PyString_AS_STRING(result);
1589         Py_ssize_t len = PyString_GET_SIZE(result);
1590         if (len == 0) {
1591             Py_DECREF(result);
1592             result = NULL;
1593             PyErr_SetString(PyExc_EOFError,
1594                             "EOF when reading a line");
1595         }
1596         else if (s[len-1] == '\n') {
1597             if (result->ob_refcnt == 1) {
1598                 if (_PyString_Resize(&result, len-1))
1599                     return NULL;
1600             }
1601             else {
1602                 PyObject *v;
1603                 v = PyString_FromStringAndSize(s, len-1);
1604                 Py_DECREF(result);
1605                 result = v;
1606             }
1607         }
1608     }
1609 #ifdef Py_USING_UNICODE
1610     if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1611         Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1612         Py_ssize_t len = PyUnicode_GET_SIZE(result);
1613         if (len == 0) {
1614             Py_DECREF(result);
1615             result = NULL;
1616             PyErr_SetString(PyExc_EOFError,
1617                             "EOF when reading a line");
1618         }
1619         else if (s[len-1] == '\n') {
1620             if (result->ob_refcnt == 1)
1621                 PyUnicode_Resize(&result, len-1);
1622             else {
1623                 PyObject *v;
1624                 v = PyUnicode_FromUnicode(s, len-1);
1625                 Py_DECREF(result);
1626                 result = v;
1627             }
1628         }
1629     }
1630 #endif
1631     return result;
1632 }
1633 
1634 /* Python method */
1635 
1636 static PyObject *
file_readline(PyFileObject * f,PyObject * args)1637 file_readline(PyFileObject *f, PyObject *args)
1638 {
1639     int n = -1;
1640 
1641     if (f->f_fp == NULL)
1642         return err_closed();
1643     if (!f->readable)
1644         return err_mode("reading");
1645     /* refuse to mix with f.next() */
1646     if (f->f_buf != NULL &&
1647         (f->f_bufend - f->f_bufptr) > 0 &&
1648         f->f_buf[0] != '\0')
1649         return err_iterbuffered();
1650     if (!PyArg_ParseTuple(args, "|i:readline", &n))
1651         return NULL;
1652     if (n == 0)
1653         return PyString_FromString("");
1654     if (n < 0)
1655         n = 0;
1656     return get_line(f, n);
1657 }
1658 
1659 static PyObject *
file_readlines(PyFileObject * f,PyObject * args)1660 file_readlines(PyFileObject *f, PyObject *args)
1661 {
1662     long sizehint = 0;
1663     PyObject *list = NULL;
1664     PyObject *line;
1665     char small_buffer[SMALLCHUNK];
1666     char *buffer = small_buffer;
1667     size_t buffersize = SMALLCHUNK;
1668     PyObject *big_buffer = NULL;
1669     size_t nfilled = 0;
1670     size_t nread;
1671     size_t totalread = 0;
1672     char *p, *q, *end;
1673     int err;
1674     int shortread = 0;  /* bool, did the previous read come up short? */
1675 
1676     if (f->f_fp == NULL)
1677         return err_closed();
1678     if (!f->readable)
1679         return err_mode("reading");
1680     /* refuse to mix with f.next() */
1681     if (f->f_buf != NULL &&
1682         (f->f_bufend - f->f_bufptr) > 0 &&
1683         f->f_buf[0] != '\0')
1684         return err_iterbuffered();
1685     if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1686         return NULL;
1687     if ((list = PyList_New(0)) == NULL)
1688         return NULL;
1689     for (;;) {
1690         if (shortread)
1691             nread = 0;
1692         else {
1693             FILE_BEGIN_ALLOW_THREADS(f)
1694             errno = 0;
1695             nread = Py_UniversalNewlineFread(buffer+nfilled,
1696                 buffersize-nfilled, f->f_fp, (PyObject *)f);
1697             FILE_END_ALLOW_THREADS(f)
1698             shortread = (nread < buffersize-nfilled);
1699         }
1700         if (nread == 0) {
1701             sizehint = 0;
1702             if (!ferror(f->f_fp))
1703                 break;
1704             if (errno == EINTR) {
1705                 if (PyErr_CheckSignals()) {
1706                     goto error;
1707                 }
1708                 clearerr(f->f_fp);
1709                 shortread = 0;
1710                 continue;
1711             }
1712             PyErr_SetFromErrno(PyExc_IOError);
1713             clearerr(f->f_fp);
1714             goto error;
1715         }
1716         totalread += nread;
1717         p = (char *)memchr(buffer+nfilled, '\n', nread);
1718         if (p == NULL) {
1719             /* Need a larger buffer to fit this line */
1720             nfilled += nread;
1721             buffersize *= 2;
1722             if (buffersize > PY_SSIZE_T_MAX) {
1723                 PyErr_SetString(PyExc_OverflowError,
1724                 "line is longer than a Python string can hold");
1725                 goto error;
1726             }
1727             if (big_buffer == NULL) {
1728                 /* Create the big buffer */
1729                 big_buffer = PyString_FromStringAndSize(
1730                     NULL, buffersize);
1731                 if (big_buffer == NULL)
1732                     goto error;
1733                 buffer = PyString_AS_STRING(big_buffer);
1734                 memcpy(buffer, small_buffer, nfilled);
1735             }
1736             else {
1737                 /* Grow the big buffer */
1738                 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1739                     goto error;
1740                 buffer = PyString_AS_STRING(big_buffer);
1741             }
1742             continue;
1743         }
1744         end = buffer+nfilled+nread;
1745         q = buffer;
1746         do {
1747             /* Process complete lines */
1748             p++;
1749             line = PyString_FromStringAndSize(q, p-q);
1750             if (line == NULL)
1751                 goto error;
1752             err = PyList_Append(list, line);
1753             Py_DECREF(line);
1754             if (err != 0)
1755                 goto error;
1756             q = p;
1757             p = (char *)memchr(q, '\n', end-q);
1758         } while (p != NULL);
1759         /* Move the remaining incomplete line to the start */
1760         nfilled = end-q;
1761         memmove(buffer, q, nfilled);
1762         if (sizehint > 0)
1763             if (totalread >= (size_t)sizehint)
1764                 break;
1765     }
1766     if (nfilled != 0) {
1767         /* Partial last line */
1768         line = PyString_FromStringAndSize(buffer, nfilled);
1769         if (line == NULL)
1770             goto error;
1771         if (sizehint > 0) {
1772             /* Need to complete the last line */
1773             PyObject *rest = get_line(f, 0);
1774             if (rest == NULL) {
1775                 Py_DECREF(line);
1776                 goto error;
1777             }
1778             PyString_Concat(&line, rest);
1779             Py_DECREF(rest);
1780             if (line == NULL)
1781                 goto error;
1782         }
1783         err = PyList_Append(list, line);
1784         Py_DECREF(line);
1785         if (err != 0)
1786             goto error;
1787     }
1788 
1789 cleanup:
1790     Py_XDECREF(big_buffer);
1791     return list;
1792 
1793 error:
1794     Py_CLEAR(list);
1795     goto cleanup;
1796 }
1797 
1798 static PyObject *
file_write(PyFileObject * f,PyObject * args)1799 file_write(PyFileObject *f, PyObject *args)
1800 {
1801     Py_buffer pbuf;
1802     const char *s;
1803     Py_ssize_t n, n2;
1804     PyObject *encoded = NULL;
1805     int err_flag = 0, err;
1806 
1807     if (f->f_fp == NULL)
1808         return err_closed();
1809     if (!f->writable)
1810         return err_mode("writing");
1811     if (f->f_binary) {
1812         if (!PyArg_ParseTuple(args, "s*", &pbuf))
1813             return NULL;
1814         s = pbuf.buf;
1815         n = pbuf.len;
1816     }
1817     else {
1818         PyObject *text;
1819         if (!PyArg_ParseTuple(args, "O", &text))
1820             return NULL;
1821 
1822         if (PyString_Check(text)) {
1823             s = PyString_AS_STRING(text);
1824             n = PyString_GET_SIZE(text);
1825 #ifdef Py_USING_UNICODE
1826         } else if (PyUnicode_Check(text)) {
1827             const char *encoding, *errors;
1828             if (f->f_encoding != Py_None)
1829                 encoding = PyString_AS_STRING(f->f_encoding);
1830             else
1831                 encoding = PyUnicode_GetDefaultEncoding();
1832             if (f->f_errors != Py_None)
1833                 errors = PyString_AS_STRING(f->f_errors);
1834             else
1835                 errors = "strict";
1836             encoded = PyUnicode_AsEncodedString(text, encoding, errors);
1837             if (encoded == NULL)
1838                 return NULL;
1839             s = PyString_AS_STRING(encoded);
1840             n = PyString_GET_SIZE(encoded);
1841 #endif
1842         } else {
1843             if (PyObject_AsCharBuffer(text, &s, &n))
1844                 return NULL;
1845         }
1846     }
1847     f->f_softspace = 0;
1848     FILE_BEGIN_ALLOW_THREADS(f)
1849     errno = 0;
1850     n2 = fwrite(s, 1, n, f->f_fp);
1851     if (n2 != n || ferror(f->f_fp)) {
1852         err_flag = 1;
1853         err = errno;
1854     }
1855     FILE_END_ALLOW_THREADS(f)
1856     Py_XDECREF(encoded);
1857     if (f->f_binary)
1858         PyBuffer_Release(&pbuf);
1859     if (err_flag) {
1860         errno = err;
1861         PyErr_SetFromErrno(PyExc_IOError);
1862         clearerr(f->f_fp);
1863         return NULL;
1864     }
1865     Py_INCREF(Py_None);
1866     return Py_None;
1867 }
1868 
1869 static PyObject *
file_writelines(PyFileObject * f,PyObject * seq)1870 file_writelines(PyFileObject *f, PyObject *seq)
1871 {
1872 #define CHUNKSIZE 1000
1873     PyObject *list, *line;
1874     PyObject *it;       /* iter(seq) */
1875     PyObject *result;
1876     int index, islist;
1877     Py_ssize_t i, j, nwritten, len;
1878 
1879     assert(seq != NULL);
1880     if (f->f_fp == NULL)
1881         return err_closed();
1882     if (!f->writable)
1883         return err_mode("writing");
1884 
1885     result = NULL;
1886     list = NULL;
1887     islist = PyList_Check(seq);
1888     if  (islist)
1889         it = NULL;
1890     else {
1891         it = PyObject_GetIter(seq);
1892         if (it == NULL) {
1893             PyErr_SetString(PyExc_TypeError,
1894                 "writelines() requires an iterable argument");
1895             return NULL;
1896         }
1897         /* From here on, fail by going to error, to reclaim "it". */
1898         list = PyList_New(CHUNKSIZE);
1899         if (list == NULL)
1900             goto error;
1901     }
1902 
1903     /* Strategy: slurp CHUNKSIZE lines into a private list,
1904        checking that they are all strings, then write that list
1905        without holding the interpreter lock, then come back for more. */
1906     for (index = 0; ; index += CHUNKSIZE) {
1907         if (islist) {
1908             Py_XDECREF(list);
1909             list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1910             if (list == NULL)
1911                 goto error;
1912             j = PyList_GET_SIZE(list);
1913         }
1914         else {
1915             for (j = 0; j < CHUNKSIZE; j++) {
1916                 line = PyIter_Next(it);
1917                 if (line == NULL) {
1918                     if (PyErr_Occurred())
1919                         goto error;
1920                     break;
1921                 }
1922                 PyList_SetItem(list, j, line);
1923             }
1924             /* The iterator might have closed the file on us. */
1925             if (f->f_fp == NULL) {
1926                 err_closed();
1927                 goto error;
1928             }
1929         }
1930         if (j == 0)
1931             break;
1932 
1933         /* Check that all entries are indeed strings. If not,
1934            apply the same rules as for file.write() and
1935            convert the results to strings. This is slow, but
1936            seems to be the only way since all conversion APIs
1937            could potentially execute Python code. */
1938         for (i = 0; i < j; i++) {
1939             PyObject *v = PyList_GET_ITEM(list, i);
1940             if (!PyString_Check(v)) {
1941                 const char *buffer;
1942                 int res;
1943                 if (f->f_binary) {
1944                     res = PyObject_AsReadBuffer(v, (const void**)&buffer, &len);
1945                 } else {
1946                     res = PyObject_AsCharBuffer(v, &buffer, &len);
1947                 }
1948                 if (res) {
1949                     PyErr_SetString(PyExc_TypeError,
1950             "writelines() argument must be a sequence of strings");
1951                             goto error;
1952                 }
1953                 line = PyString_FromStringAndSize(buffer,
1954                                                   len);
1955                 if (line == NULL)
1956                     goto error;
1957                 Py_DECREF(v);
1958                 PyList_SET_ITEM(list, i, line);
1959             }
1960         }
1961 
1962         /* Since we are releasing the global lock, the
1963            following code may *not* execute Python code. */
1964         f->f_softspace = 0;
1965         FILE_BEGIN_ALLOW_THREADS(f)
1966         errno = 0;
1967         for (i = 0; i < j; i++) {
1968             line = PyList_GET_ITEM(list, i);
1969             len = PyString_GET_SIZE(line);
1970             nwritten = fwrite(PyString_AS_STRING(line),
1971                               1, len, f->f_fp);
1972             if (nwritten != len) {
1973                 FILE_ABORT_ALLOW_THREADS(f)
1974                 PyErr_SetFromErrno(PyExc_IOError);
1975                 clearerr(f->f_fp);
1976                 goto error;
1977             }
1978         }
1979         FILE_END_ALLOW_THREADS(f)
1980 
1981         if (j < CHUNKSIZE)
1982             break;
1983     }
1984 
1985     Py_INCREF(Py_None);
1986     result = Py_None;
1987   error:
1988     Py_XDECREF(list);
1989     Py_XDECREF(it);
1990     return result;
1991 #undef CHUNKSIZE
1992 }
1993 
1994 static PyObject *
file_self(PyFileObject * f)1995 file_self(PyFileObject *f)
1996 {
1997     if (f->f_fp == NULL)
1998         return err_closed();
1999     Py_INCREF(f);
2000     return (PyObject *)f;
2001 }
2002 
2003 static PyObject *
file_xreadlines(PyFileObject * f)2004 file_xreadlines(PyFileObject *f)
2005 {
2006     if (PyErr_WarnPy3k("f.xreadlines() not supported in 3.x, "
2007                        "try 'for line in f' instead", 1) < 0)
2008            return NULL;
2009     return file_self(f);
2010 }
2011 
2012 static PyObject *
file_exit(PyObject * f,PyObject * args)2013 file_exit(PyObject *f, PyObject *args)
2014 {
2015     PyObject *ret = PyObject_CallMethod(f, "close", NULL);
2016     if (!ret)
2017         /* If error occurred, pass through */
2018         return NULL;
2019     Py_DECREF(ret);
2020     /* We cannot return the result of close since a true
2021      * value will be interpreted as "yes, swallow the
2022      * exception if one was raised inside the with block". */
2023     Py_RETURN_NONE;
2024 }
2025 
2026 PyDoc_STRVAR(readline_doc,
2027 "readline([size]) -> next line from the file, as a string.\n"
2028 "\n"
2029 "Retain newline.  A non-negative size argument limits the maximum\n"
2030 "number of bytes to return (an incomplete line may be returned then).\n"
2031 "Return an empty string at EOF.");
2032 
2033 PyDoc_STRVAR(read_doc,
2034 "read([size]) -> read at most size bytes, returned as a string.\n"
2035 "\n"
2036 "If the size argument is negative or omitted, read until EOF is reached.\n"
2037 "Notice that when in non-blocking mode, less data than what was requested\n"
2038 "may be returned, even if no size parameter was given.");
2039 
2040 PyDoc_STRVAR(write_doc,
2041 "write(str) -> None.  Write string str to file.\n"
2042 "\n"
2043 "Note that due to buffering, flush() or close() may be needed before\n"
2044 "the file on disk reflects the data written.");
2045 
2046 PyDoc_STRVAR(fileno_doc,
2047 "fileno() -> integer \"file descriptor\".\n"
2048 "\n"
2049 "This is needed for lower-level file interfaces, such os.read().");
2050 
2051 PyDoc_STRVAR(seek_doc,
2052 "seek(offset[, whence]) -> None.  Move to new file position.\n"
2053 "\n"
2054 "Argument offset is a byte count.  Optional argument whence defaults to\n"
2055 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
2056 "(move relative to current position, positive or negative), and 2 (move\n"
2057 "relative to end of file, usually negative, although many platforms allow\n"
2058 "seeking beyond the end of a file).  If the file is opened in text mode,\n"
2059 "only offsets returned by tell() are legal.  Use of other offsets causes\n"
2060 "undefined behavior."
2061 "\n"
2062 "Note that not all file objects are seekable.");
2063 
2064 #ifdef HAVE_FTRUNCATE
2065 PyDoc_STRVAR(truncate_doc,
2066 "truncate([size]) -> None.  Truncate the file to at most size bytes.\n"
2067 "\n"
2068 "Size defaults to the current file position, as returned by tell().");
2069 #endif
2070 
2071 PyDoc_STRVAR(tell_doc,
2072 "tell() -> current file position, an integer (may be a long integer).");
2073 
2074 PyDoc_STRVAR(readinto_doc,
2075 "readinto() -> Undocumented.  Don't use this; it may go away.");
2076 
2077 PyDoc_STRVAR(readlines_doc,
2078 "readlines([size]) -> list of strings, each a line from the file.\n"
2079 "\n"
2080 "Call readline() repeatedly and return a list of the lines so read.\n"
2081 "The optional size argument, if given, is an approximate bound on the\n"
2082 "total number of bytes in the lines returned.");
2083 
2084 PyDoc_STRVAR(xreadlines_doc,
2085 "xreadlines() -> returns self.\n"
2086 "\n"
2087 "For backward compatibility. File objects now include the performance\n"
2088 "optimizations previously implemented in the xreadlines module.");
2089 
2090 PyDoc_STRVAR(writelines_doc,
2091 "writelines(sequence_of_strings) -> None.  Write the strings to the file.\n"
2092 "\n"
2093 "Note that newlines are not added.  The sequence can be any iterable object\n"
2094 "producing strings. This is equivalent to calling write() for each string.");
2095 
2096 PyDoc_STRVAR(flush_doc,
2097 "flush() -> None.  Flush the internal I/O buffer.");
2098 
2099 PyDoc_STRVAR(close_doc,
2100 "close() -> None or (perhaps) an integer.  Close the file.\n"
2101 "\n"
2102 "Sets data attribute .closed to True.  A closed file cannot be used for\n"
2103 "further I/O operations.  close() may be called more than once without\n"
2104 "error.  Some kinds of file objects (for example, opened by popen())\n"
2105 "may return an exit status upon closing.");
2106 
2107 PyDoc_STRVAR(isatty_doc,
2108 "isatty() -> true or false.  True if the file is connected to a tty device.");
2109 
2110 PyDoc_STRVAR(enter_doc,
2111              "__enter__() -> self.");
2112 
2113 PyDoc_STRVAR(exit_doc,
2114              "__exit__(*excinfo) -> None.  Closes the file.");
2115 
2116 static PyMethodDef file_methods[] = {
2117     {"readline",  (PyCFunction)file_readline, METH_VARARGS, readline_doc},
2118     {"read",      (PyCFunction)file_read,     METH_VARARGS, read_doc},
2119     {"write",     (PyCFunction)file_write,    METH_VARARGS, write_doc},
2120     {"fileno",    (PyCFunction)file_fileno,   METH_NOARGS,  fileno_doc},
2121     {"seek",      (PyCFunction)file_seek,     METH_VARARGS, seek_doc},
2122 #ifdef HAVE_FTRUNCATE
2123     {"truncate",  (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
2124 #endif
2125     {"tell",      (PyCFunction)file_tell,     METH_NOARGS,  tell_doc},
2126     {"readinto",  (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
2127     {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc},
2128     {"xreadlines",(PyCFunction)file_xreadlines, METH_NOARGS, xreadlines_doc},
2129     {"writelines",(PyCFunction)file_writelines, METH_O,     writelines_doc},
2130     {"flush",     (PyCFunction)file_flush,    METH_NOARGS,  flush_doc},
2131     {"close",     (PyCFunction)file_close,    METH_NOARGS,  close_doc},
2132     {"isatty",    (PyCFunction)file_isatty,   METH_NOARGS,  isatty_doc},
2133     {"__enter__", (PyCFunction)file_self,     METH_NOARGS,  enter_doc},
2134     {"__exit__",  (PyCFunction)file_exit,     METH_VARARGS, exit_doc},
2135     {NULL,            NULL}             /* sentinel */
2136 };
2137 
2138 #define OFF(x) offsetof(PyFileObject, x)
2139 
2140 static PyMemberDef file_memberlist[] = {
2141     {"mode",            T_OBJECT,       OFF(f_mode),    RO,
2142      "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
2143     {"name",            T_OBJECT,       OFF(f_name),    RO,
2144      "file name"},
2145     {"encoding",        T_OBJECT,       OFF(f_encoding),        RO,
2146      "file encoding"},
2147     {"errors",          T_OBJECT,       OFF(f_errors),  RO,
2148      "Unicode error handler"},
2149     /* getattr(f, "closed") is implemented without this table */
2150     {NULL}      /* Sentinel */
2151 };
2152 
2153 static PyObject *
get_closed(PyFileObject * f,void * closure)2154 get_closed(PyFileObject *f, void *closure)
2155 {
2156     return PyBool_FromLong((long)(f->f_fp == 0));
2157 }
2158 static PyObject *
get_newlines(PyFileObject * f,void * closure)2159 get_newlines(PyFileObject *f, void *closure)
2160 {
2161     switch (f->f_newlinetypes) {
2162     case NEWLINE_UNKNOWN:
2163         Py_INCREF(Py_None);
2164         return Py_None;
2165     case NEWLINE_CR:
2166         return PyString_FromString("\r");
2167     case NEWLINE_LF:
2168         return PyString_FromString("\n");
2169     case NEWLINE_CR|NEWLINE_LF:
2170         return Py_BuildValue("(ss)", "\r", "\n");
2171     case NEWLINE_CRLF:
2172         return PyString_FromString("\r\n");
2173     case NEWLINE_CR|NEWLINE_CRLF:
2174         return Py_BuildValue("(ss)", "\r", "\r\n");
2175     case NEWLINE_LF|NEWLINE_CRLF:
2176         return Py_BuildValue("(ss)", "\n", "\r\n");
2177     case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
2178         return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
2179     default:
2180         PyErr_Format(PyExc_SystemError,
2181                      "Unknown newlines value 0x%x\n",
2182                      f->f_newlinetypes);
2183         return NULL;
2184     }
2185 }
2186 
2187 static PyObject *
get_softspace(PyFileObject * f,void * closure)2188 get_softspace(PyFileObject *f, void *closure)
2189 {
2190     if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2191         return NULL;
2192     return PyInt_FromLong(f->f_softspace);
2193 }
2194 
2195 static int
set_softspace(PyFileObject * f,PyObject * value)2196 set_softspace(PyFileObject *f, PyObject *value)
2197 {
2198     int new;
2199     if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2200         return -1;
2201 
2202     if (value == NULL) {
2203         PyErr_SetString(PyExc_TypeError,
2204                         "can't delete softspace attribute");
2205         return -1;
2206     }
2207 
2208     new = PyInt_AsLong(value);
2209     if (new == -1 && PyErr_Occurred())
2210         return -1;
2211     f->f_softspace = new;
2212     return 0;
2213 }
2214 
2215 static PyGetSetDef file_getsetlist[] = {
2216     {"closed", (getter)get_closed, NULL, "True if the file is closed"},
2217     {"newlines", (getter)get_newlines, NULL,
2218      "end-of-line convention used in this file"},
2219     {"softspace", (getter)get_softspace, (setter)set_softspace,
2220      "flag indicating that a space needs to be printed; used by print"},
2221     {0},
2222 };
2223 
2224 static void
drop_readahead(PyFileObject * f)2225 drop_readahead(PyFileObject *f)
2226 {
2227     if (f->f_buf != NULL) {
2228         PyMem_Free(f->f_buf);
2229         f->f_buf = NULL;
2230     }
2231 }
2232 
2233 /* Make sure that file has a readahead buffer with at least one byte
2234    (unless at EOF) and no more than bufsize.  Returns negative value on
2235    error, will set MemoryError if bufsize bytes cannot be allocated. */
2236 static int
readahead(PyFileObject * f,Py_ssize_t bufsize)2237 readahead(PyFileObject *f, Py_ssize_t bufsize)
2238 {
2239     Py_ssize_t chunksize;
2240 
2241     if (f->f_buf != NULL) {
2242         if( (f->f_bufend - f->f_bufptr) >= 1)
2243             return 0;
2244         else
2245             drop_readahead(f);
2246     }
2247     if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) {
2248         PyErr_NoMemory();
2249         return -1;
2250     }
2251     FILE_BEGIN_ALLOW_THREADS(f)
2252     errno = 0;
2253     chunksize = Py_UniversalNewlineFread(
2254         f->f_buf, bufsize, f->f_fp, (PyObject *)f);
2255     FILE_END_ALLOW_THREADS(f)
2256     if (chunksize == 0) {
2257         if (ferror(f->f_fp)) {
2258             PyErr_SetFromErrno(PyExc_IOError);
2259             clearerr(f->f_fp);
2260             drop_readahead(f);
2261             return -1;
2262         }
2263     }
2264     f->f_bufptr = f->f_buf;
2265     f->f_bufend = f->f_buf + chunksize;
2266     return 0;
2267 }
2268 
2269 /* Used by file_iternext.  The returned string will start with 'skip'
2270    uninitialized bytes followed by the remainder of the line. Don't be
2271    horrified by the recursive call: maximum recursion depth is limited by
2272    logarithmic buffer growth to about 50 even when reading a 1gb line. */
2273 
2274 static PyStringObject *
readahead_get_line_skip(PyFileObject * f,Py_ssize_t skip,Py_ssize_t bufsize)2275 readahead_get_line_skip(PyFileObject *f, Py_ssize_t skip, Py_ssize_t bufsize)
2276 {
2277     PyStringObject* s;
2278     char *bufptr;
2279     char *buf;
2280     Py_ssize_t len;
2281 
2282     if (f->f_buf == NULL)
2283         if (readahead(f, bufsize) < 0)
2284             return NULL;
2285 
2286     len = f->f_bufend - f->f_bufptr;
2287     if (len == 0)
2288         return (PyStringObject *)
2289             PyString_FromStringAndSize(NULL, skip);
2290     bufptr = (char *)memchr(f->f_bufptr, '\n', len);
2291     if (bufptr != NULL) {
2292         bufptr++;                               /* Count the '\n' */
2293         len = bufptr - f->f_bufptr;
2294         s = (PyStringObject *)
2295             PyString_FromStringAndSize(NULL, skip + len);
2296         if (s == NULL)
2297             return NULL;
2298         memcpy(PyString_AS_STRING(s) + skip, f->f_bufptr, len);
2299         f->f_bufptr = bufptr;
2300         if (bufptr == f->f_bufend)
2301             drop_readahead(f);
2302     } else {
2303         bufptr = f->f_bufptr;
2304         buf = f->f_buf;
2305         f->f_buf = NULL;                /* Force new readahead buffer */
2306         assert(len <= PY_SSIZE_T_MAX - skip);
2307         s = readahead_get_line_skip(f, skip + len, bufsize + (bufsize>>2));
2308         if (s == NULL) {
2309             PyMem_Free(buf);
2310             return NULL;
2311         }
2312         memcpy(PyString_AS_STRING(s) + skip, bufptr, len);
2313         PyMem_Free(buf);
2314     }
2315     return s;
2316 }
2317 
2318 /* A larger buffer size may actually decrease performance. */
2319 #define READAHEAD_BUFSIZE 8192
2320 
2321 static PyObject *
file_iternext(PyFileObject * f)2322 file_iternext(PyFileObject *f)
2323 {
2324     PyStringObject* l;
2325 
2326     if (f->f_fp == NULL)
2327         return err_closed();
2328     if (!f->readable)
2329         return err_mode("reading");
2330 
2331     l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
2332     if (l == NULL || PyString_GET_SIZE(l) == 0) {
2333         Py_XDECREF(l);
2334         return NULL;
2335     }
2336     return (PyObject *)l;
2337 }
2338 
2339 
2340 static PyObject *
file_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2341 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2342 {
2343     PyObject *self;
2344     static PyObject *not_yet_string;
2345 
2346     assert(type != NULL && type->tp_alloc != NULL);
2347 
2348     if (not_yet_string == NULL) {
2349         not_yet_string = PyString_InternFromString("<uninitialized file>");
2350         if (not_yet_string == NULL)
2351             return NULL;
2352     }
2353 
2354     self = type->tp_alloc(type, 0);
2355     if (self != NULL) {
2356         /* Always fill in the name and mode, so that nobody else
2357            needs to special-case NULLs there. */
2358         Py_INCREF(not_yet_string);
2359         ((PyFileObject *)self)->f_name = not_yet_string;
2360         Py_INCREF(not_yet_string);
2361         ((PyFileObject *)self)->f_mode = not_yet_string;
2362         Py_INCREF(Py_None);
2363         ((PyFileObject *)self)->f_encoding = Py_None;
2364         Py_INCREF(Py_None);
2365         ((PyFileObject *)self)->f_errors = Py_None;
2366         ((PyFileObject *)self)->weakreflist = NULL;
2367         ((PyFileObject *)self)->unlocked_count = 0;
2368     }
2369     return self;
2370 }
2371 
2372 static int
file_init(PyObject * self,PyObject * args,PyObject * kwds)2373 file_init(PyObject *self, PyObject *args, PyObject *kwds)
2374 {
2375     PyFileObject *foself = (PyFileObject *)self;
2376     int ret = 0;
2377     static char *kwlist[] = {"name", "mode", "buffering", 0};
2378     char *name = NULL;
2379     char *mode = "r";
2380     int bufsize = -1;
2381     int wideargument = 0;
2382 #ifdef MS_WINDOWS
2383     PyObject *po;
2384 #endif
2385 
2386     assert(PyFile_Check(self));
2387     if (foself->f_fp != NULL) {
2388         /* Have to close the existing file first. */
2389         PyObject *closeresult = file_close(foself);
2390         if (closeresult == NULL)
2391             return -1;
2392         Py_DECREF(closeresult);
2393     }
2394 
2395 #ifdef MS_WINDOWS
2396     if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
2397                                     kwlist, &po, &mode, &bufsize) &&
2398         wcslen(PyUnicode_AS_UNICODE(po)) == (size_t)PyUnicode_GET_SIZE(po)) {
2399         wideargument = 1;
2400         if (fill_file_fields(foself, NULL, po, mode,
2401                              fclose) == NULL)
2402             goto Error;
2403     } else {
2404         /* Drop the argument parsing error as narrow
2405            strings are also valid. */
2406         PyErr_Clear();
2407     }
2408 #endif
2409 
2410     if (!wideargument) {
2411         PyObject *o_name;
2412 
2413         if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
2414                                          Py_FileSystemDefaultEncoding,
2415                                          &name,
2416                                          &mode, &bufsize))
2417             return -1;
2418 
2419         /* We parse again to get the name as a PyObject */
2420         if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
2421                                          kwlist, &o_name, &mode,
2422                                          &bufsize))
2423             goto Error;
2424 
2425         if (fill_file_fields(foself, NULL, o_name, mode,
2426                              fclose) == NULL)
2427             goto Error;
2428     }
2429     if (open_the_file(foself, name, mode) == NULL)
2430         goto Error;
2431     foself->f_setbuf = NULL;
2432     PyFile_SetBufSize(self, bufsize);
2433     goto Done;
2434 
2435 Error:
2436     ret = -1;
2437     /* fall through */
2438 Done:
2439     PyMem_Free(name); /* free the encoded string */
2440     return ret;
2441 }
2442 
2443 PyDoc_VAR(file_doc) =
2444 PyDoc_STR(
2445 "file(name[, mode[, buffering]]) -> file object\n"
2446 "\n"
2447 "Open a file.  The mode can be 'r', 'w' or 'a' for reading (default),\n"
2448 "writing or appending.  The file will be created if it doesn't exist\n"
2449 "when opened for writing or appending; it will be truncated when\n"
2450 "opened for writing.  Add a 'b' to the mode for binary files.\n"
2451 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
2452 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2453 "buffered, and larger numbers specify the buffer size.  The preferred way\n"
2454 "to open a file is with the builtin open() function.\n"
2455 )
2456 PyDoc_STR(
2457 "Add a 'U' to mode to open the file for input with universal newline\n"
2458 "support.  Any line ending in the input file will be seen as a '\\n'\n"
2459 "in Python.  Also, a file so opened gains the attribute 'newlines';\n"
2460 "the value for this attribute is one of None (no newline read yet),\n"
2461 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2462 "\n"
2463 "'U' cannot be combined with 'w' or '+' mode.\n"
2464 );
2465 
2466 PyTypeObject PyFile_Type = {
2467     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2468     "file",
2469     sizeof(PyFileObject),
2470     0,
2471     (destructor)file_dealloc,                   /* tp_dealloc */
2472     0,                                          /* tp_print */
2473     0,                                          /* tp_getattr */
2474     0,                                          /* tp_setattr */
2475     0,                                          /* tp_compare */
2476     (reprfunc)file_repr,                        /* tp_repr */
2477     0,                                          /* tp_as_number */
2478     0,                                          /* tp_as_sequence */
2479     0,                                          /* tp_as_mapping */
2480     0,                                          /* tp_hash */
2481     0,                                          /* tp_call */
2482     0,                                          /* tp_str */
2483     PyObject_GenericGetAttr,                    /* tp_getattro */
2484     /* softspace is writable:  we must supply tp_setattro */
2485     PyObject_GenericSetAttr,                    /* tp_setattro */
2486     0,                                          /* tp_as_buffer */
2487     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2488     file_doc,                                   /* tp_doc */
2489     0,                                          /* tp_traverse */
2490     0,                                          /* tp_clear */
2491     0,                                          /* tp_richcompare */
2492     offsetof(PyFileObject, weakreflist),        /* tp_weaklistoffset */
2493     (getiterfunc)file_self,                     /* tp_iter */
2494     (iternextfunc)file_iternext,                /* tp_iternext */
2495     file_methods,                               /* tp_methods */
2496     file_memberlist,                            /* tp_members */
2497     file_getsetlist,                            /* tp_getset */
2498     0,                                          /* tp_base */
2499     0,                                          /* tp_dict */
2500     0,                                          /* tp_descr_get */
2501     0,                                          /* tp_descr_set */
2502     0,                                          /* tp_dictoffset */
2503     file_init,                                  /* tp_init */
2504     PyType_GenericAlloc,                        /* tp_alloc */
2505     file_new,                                   /* tp_new */
2506     PyObject_Del,                           /* tp_free */
2507 };
2508 
2509 /* Interface for the 'soft space' between print items. */
2510 
2511 int
PyFile_SoftSpace(PyObject * f,int newflag)2512 PyFile_SoftSpace(PyObject *f, int newflag)
2513 {
2514     long oldflag = 0;
2515     if (f == NULL) {
2516         /* Do nothing */
2517     }
2518     else if (PyFile_Check(f)) {
2519         oldflag = ((PyFileObject *)f)->f_softspace;
2520         ((PyFileObject *)f)->f_softspace = newflag;
2521     }
2522     else {
2523         PyObject *v;
2524         v = PyObject_GetAttrString(f, "softspace");
2525         if (v == NULL)
2526             PyErr_Clear();
2527         else {
2528             if (PyInt_Check(v))
2529                 oldflag = PyInt_AsLong(v);
2530             assert(oldflag < INT_MAX);
2531             Py_DECREF(v);
2532         }
2533         v = PyInt_FromLong((long)newflag);
2534         if (v == NULL)
2535             PyErr_Clear();
2536         else {
2537             if (PyObject_SetAttrString(f, "softspace", v) != 0)
2538                 PyErr_Clear();
2539             Py_DECREF(v);
2540         }
2541     }
2542     return (int)oldflag;
2543 }
2544 
2545 /* Interfaces to write objects/strings to file-like objects */
2546 
2547 int
PyFile_WriteObject(PyObject * v,PyObject * f,int flags)2548 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2549 {
2550     PyObject *writer, *value, *args, *result;
2551     if (f == NULL) {
2552         PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2553         return -1;
2554     }
2555     else if (PyFile_Check(f)) {
2556         PyFileObject *fobj = (PyFileObject *) f;
2557 #ifdef Py_USING_UNICODE
2558         PyObject *enc = fobj->f_encoding;
2559         int result;
2560 #endif
2561         if (fobj->f_fp == NULL) {
2562             err_closed();
2563             return -1;
2564         }
2565 #ifdef Py_USING_UNICODE
2566         if ((flags & Py_PRINT_RAW) &&
2567             PyUnicode_Check(v) && enc != Py_None) {
2568             char *cenc = PyString_AS_STRING(enc);
2569             char *errors = fobj->f_errors == Py_None ?
2570               "strict" : PyString_AS_STRING(fobj->f_errors);
2571             value = PyUnicode_AsEncodedString(v, cenc, errors);
2572             if (value == NULL)
2573                 return -1;
2574         } else {
2575             value = v;
2576             Py_INCREF(value);
2577         }
2578         result = file_PyObject_Print(value, fobj, flags);
2579         Py_DECREF(value);
2580         return result;
2581 #else
2582         return file_PyObject_Print(v, fobj, flags);
2583 #endif
2584     }
2585     writer = PyObject_GetAttrString(f, "write");
2586     if (writer == NULL)
2587         return -1;
2588     if (flags & Py_PRINT_RAW) {
2589         if (PyUnicode_Check(v)) {
2590             value = v;
2591             Py_INCREF(value);
2592         } else
2593             value = PyObject_Str(v);
2594     }
2595     else
2596         value = PyObject_Repr(v);
2597     if (value == NULL) {
2598         Py_DECREF(writer);
2599         return -1;
2600     }
2601     args = PyTuple_Pack(1, value);
2602     if (args == NULL) {
2603         Py_DECREF(value);
2604         Py_DECREF(writer);
2605         return -1;
2606     }
2607     result = PyEval_CallObject(writer, args);
2608     Py_DECREF(args);
2609     Py_DECREF(value);
2610     Py_DECREF(writer);
2611     if (result == NULL)
2612         return -1;
2613     Py_DECREF(result);
2614     return 0;
2615 }
2616 
2617 int
PyFile_WriteString(const char * s,PyObject * f)2618 PyFile_WriteString(const char *s, PyObject *f)
2619 {
2620 
2621     if (f == NULL) {
2622         /* Should be caused by a pre-existing error */
2623         if (!PyErr_Occurred())
2624             PyErr_SetString(PyExc_SystemError,
2625                             "null file for PyFile_WriteString");
2626         return -1;
2627     }
2628     else if (PyFile_Check(f)) {
2629         PyFileObject *fobj = (PyFileObject *) f;
2630         FILE *fp = PyFile_AsFile(f);
2631         if (fp == NULL) {
2632             err_closed();
2633             return -1;
2634         }
2635         FILE_BEGIN_ALLOW_THREADS(fobj)
2636         fputs(s, fp);
2637         FILE_END_ALLOW_THREADS(fobj)
2638         return 0;
2639     }
2640     else if (!PyErr_Occurred()) {
2641         PyObject *v = PyString_FromString(s);
2642         int err;
2643         if (v == NULL)
2644             return -1;
2645         err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2646         Py_DECREF(v);
2647         return err;
2648     }
2649     else
2650         return -1;
2651 }
2652 
2653 /* Try to get a file-descriptor from a Python object.  If the object
2654    is an integer or long integer, its value is returned.  If not, the
2655    object's fileno() method is called if it exists; the method must return
2656    an integer or long integer, which is returned as the file descriptor value.
2657    -1 is returned on failure.
2658 */
2659 
PyObject_AsFileDescriptor(PyObject * o)2660 int PyObject_AsFileDescriptor(PyObject *o)
2661 {
2662     int fd;
2663     PyObject *meth;
2664 
2665     if (PyInt_Check(o)) {
2666         fd = _PyInt_AsInt(o);
2667     }
2668     else if (PyLong_Check(o)) {
2669         fd = _PyLong_AsInt(o);
2670     }
2671     else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2672     {
2673         PyObject *fno = PyEval_CallObject(meth, NULL);
2674         Py_DECREF(meth);
2675         if (fno == NULL)
2676             return -1;
2677 
2678         if (PyInt_Check(fno)) {
2679             fd = _PyInt_AsInt(fno);
2680             Py_DECREF(fno);
2681         }
2682         else if (PyLong_Check(fno)) {
2683             fd = _PyLong_AsInt(fno);
2684             Py_DECREF(fno);
2685         }
2686         else {
2687             PyErr_SetString(PyExc_TypeError,
2688                             "fileno() returned a non-integer");
2689             Py_DECREF(fno);
2690             return -1;
2691         }
2692     }
2693     else {
2694         PyErr_SetString(PyExc_TypeError,
2695                         "argument must be an int, or have a fileno() method.");
2696         return -1;
2697     }
2698 
2699     if (fd < 0) {
2700         PyErr_Format(PyExc_ValueError,
2701                      "file descriptor cannot be a negative integer (%i)",
2702                      fd);
2703         return -1;
2704     }
2705     return fd;
2706 }
2707 
2708 /* From here on we need access to the real fgets and fread */
2709 #undef fgets
2710 #undef fread
2711 
2712 /*
2713 ** Py_UniversalNewlineFgets is an fgets variation that understands
2714 ** all of \r, \n and \r\n conventions.
2715 ** The stream should be opened in binary mode.
2716 ** If fobj is NULL the routine always does newline conversion, and
2717 ** it may peek one char ahead to gobble the second char in \r\n.
2718 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2719 ** is no readahead but in stead a flag is used to skip a following
2720 ** \n on the next read. Also, if the file is open in binary mode
2721 ** the whole conversion is skipped. Finally, the routine keeps track of
2722 ** the different types of newlines seen.
2723 ** Note that we need no error handling: fgets() treats error and eof
2724 ** identically.
2725 */
2726 char *
Py_UniversalNewlineFgets(char * buf,int n,FILE * stream,PyObject * fobj)2727 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2728 {
2729     char *p = buf;
2730     int c;
2731     int newlinetypes = 0;
2732     int skipnextlf = 0;
2733     int univ_newline = 1;
2734 
2735     if (fobj) {
2736         if (!PyFile_Check(fobj)) {
2737             errno = ENXIO;              /* What can you do... */
2738             return NULL;
2739         }
2740         univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2741         if ( !univ_newline )
2742             return fgets(buf, n, stream);
2743         newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2744         skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2745     }
2746     FLOCKFILE(stream);
2747     c = 'x'; /* Shut up gcc warning */
2748     while (--n > 0 && (c = GETC(stream)) != EOF ) {
2749         if (skipnextlf ) {
2750             skipnextlf = 0;
2751             if (c == '\n') {
2752                 /* Seeing a \n here with skipnextlf true
2753                 ** means we saw a \r before.
2754                 */
2755                 newlinetypes |= NEWLINE_CRLF;
2756                 c = GETC(stream);
2757                 if (c == EOF) break;
2758             } else {
2759                 /*
2760                 ** Note that c == EOF also brings us here,
2761                 ** so we're okay if the last char in the file
2762                 ** is a CR.
2763                 */
2764                 newlinetypes |= NEWLINE_CR;
2765             }
2766         }
2767         if (c == '\r') {
2768             /* A \r is translated into a \n, and we skip
2769             ** an adjacent \n, if any. We don't set the
2770             ** newlinetypes flag until we've seen the next char.
2771             */
2772             skipnextlf = 1;
2773             c = '\n';
2774         } else if ( c == '\n') {
2775             newlinetypes |= NEWLINE_LF;
2776         }
2777         *p++ = c;
2778         if (c == '\n') break;
2779     }
2780     if ( c == EOF && skipnextlf )
2781         newlinetypes |= NEWLINE_CR;
2782     FUNLOCKFILE(stream);
2783     *p = '\0';
2784     if (fobj) {
2785         ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2786         ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2787     } else if ( skipnextlf ) {
2788         /* If we have no file object we cannot save the
2789         ** skipnextlf flag. We have to readahead, which
2790         ** will cause a pause if we're reading from an
2791         ** interactive stream, but that is very unlikely
2792         ** unless we're doing something silly like
2793         ** execfile("/dev/tty").
2794         */
2795         c = GETC(stream);
2796         if ( c != '\n' )
2797             ungetc(c, stream);
2798     }
2799     if (p == buf)
2800         return NULL;
2801     return buf;
2802 }
2803 
2804 /*
2805 ** Py_UniversalNewlineFread is an fread variation that understands
2806 ** all of \r, \n and \r\n conventions.
2807 ** The stream should be opened in binary mode.
2808 ** fobj must be a PyFileObject. In this case there
2809 ** is no readahead but in stead a flag is used to skip a following
2810 ** \n on the next read. Also, if the file is open in binary mode
2811 ** the whole conversion is skipped. Finally, the routine keeps track of
2812 ** the different types of newlines seen.
2813 */
2814 size_t
Py_UniversalNewlineFread(char * buf,size_t n,FILE * stream,PyObject * fobj)2815 Py_UniversalNewlineFread(char *buf, size_t n,
2816                          FILE *stream, PyObject *fobj)
2817 {
2818     char *dst = buf;
2819     PyFileObject *f = (PyFileObject *)fobj;
2820     int newlinetypes, skipnextlf;
2821 
2822     assert(buf != NULL);
2823     assert(stream != NULL);
2824 
2825     if (!fobj || !PyFile_Check(fobj)) {
2826         errno = ENXIO;          /* What can you do... */
2827         return 0;
2828     }
2829     if (!f->f_univ_newline)
2830         return fread(buf, 1, n, stream);
2831     newlinetypes = f->f_newlinetypes;
2832     skipnextlf = f->f_skipnextlf;
2833     /* Invariant:  n is the number of bytes remaining to be filled
2834      * in the buffer.
2835      */
2836     while (n) {
2837         size_t nread;
2838         int shortread;
2839         char *src = dst;
2840 
2841         nread = fread(dst, 1, n, stream);
2842         assert(nread <= n);
2843         if (nread == 0)
2844             break;
2845 
2846         n -= nread; /* assuming 1 byte out for each in; will adjust */
2847         shortread = n != 0;             /* true iff EOF or error */
2848         while (nread--) {
2849             char c = *src++;
2850             if (c == '\r') {
2851                 /* Save as LF and set flag to skip next LF. */
2852                 *dst++ = '\n';
2853                 skipnextlf = 1;
2854             }
2855             else if (skipnextlf && c == '\n') {
2856                 /* Skip LF, and remember we saw CR LF. */
2857                 skipnextlf = 0;
2858                 newlinetypes |= NEWLINE_CRLF;
2859                 ++n;
2860             }
2861             else {
2862                 /* Normal char to be stored in buffer.  Also
2863                  * update the newlinetypes flag if either this
2864                  * is an LF or the previous char was a CR.
2865                  */
2866                 if (c == '\n')
2867                     newlinetypes |= NEWLINE_LF;
2868                 else if (skipnextlf)
2869                     newlinetypes |= NEWLINE_CR;
2870                 *dst++ = c;
2871                 skipnextlf = 0;
2872             }
2873         }
2874         if (shortread) {
2875             /* If this is EOF, update type flags. */
2876             if (skipnextlf && feof(stream))
2877                 newlinetypes |= NEWLINE_CR;
2878             break;
2879         }
2880     }
2881     f->f_newlinetypes = newlinetypes;
2882     f->f_skipnextlf = skipnextlf;
2883     return dst - buf;
2884 }
2885 
2886 #ifdef __cplusplus
2887 }
2888 #endif
2889