• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Helper class to quickly write a loop over all standard input files.
2
3Typical use is:
4
5    import fileinput
6    for line in fileinput.input(encoding="utf-8"):
7        process(line)
8
9This iterates over the lines of all files listed in sys.argv[1:],
10defaulting to sys.stdin if the list is empty.  If a filename is '-' it
11is also replaced by sys.stdin and the optional arguments mode and
12openhook are ignored.  To specify an alternative list of filenames,
13pass it as the argument to input().  A single file name is also allowed.
14
15Functions filename(), lineno() return the filename and cumulative line
16number of the line that has just been read; filelineno() returns its
17line number in the current file; isfirstline() returns true iff the
18line just read is the first line of its file; isstdin() returns true
19iff the line was read from sys.stdin.  Function nextfile() closes the
20current file so that the next iteration will read the first line from
21the next file (if any); lines not read from the file will not count
22towards the cumulative line count; the filename is not changed until
23after the first line of the next file has been read.  Function close()
24closes the sequence.
25
26Before any lines have been read, filename() returns None and both line
27numbers are zero; nextfile() has no effect.  After all lines have been
28read, filename() and the line number functions return the values
29pertaining to the last line read; nextfile() has no effect.
30
31All files are opened in text mode by default, you can override this by
32setting the mode parameter to input() or FileInput.__init__().
33If an I/O error occurs during opening or reading a file, the OSError
34exception is raised.
35
36If sys.stdin is used more than once, the second and further use will
37return no lines, except perhaps for interactive use, or if it has been
38explicitly reset (e.g. using sys.stdin.seek(0)).
39
40Empty files are opened and immediately closed; the only time their
41presence in the list of filenames is noticeable at all is when the
42last file opened is empty.
43
44It is possible that the last line of a file doesn't end in a newline
45character; otherwise lines are returned including the trailing
46newline.
47
48Class FileInput is the implementation; its methods filename(),
49lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
50correspond to the functions in the module.  In addition it has a
51readline() method which returns the next input line, and a
52__getitem__() method which implements the sequence behavior.  The
53sequence must be accessed in strictly sequential order; sequence
54access and readline() cannot be mixed.
55
56Optional in-place filtering: if the keyword argument inplace=1 is
57passed to input() or to the FileInput constructor, the file is moved
58to a backup file and standard output is directed to the input file.
59This makes it possible to write a filter that rewrites its input file
60in place.  If the keyword argument backup=".<some extension>" is also
61given, it specifies the extension for the backup file, and the backup
62file remains around; by default, the extension is ".bak" and it is
63deleted when the output file is closed.  In-place filtering is
64disabled when standard input is read.  XXX The current implementation
65does not work for MS-DOS 8+3 filesystems.
66"""
67
68import io
69import sys, os
70from types import GenericAlias
71
72__all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno",
73           "fileno", "isfirstline", "isstdin", "FileInput", "hook_compressed",
74           "hook_encoded"]
75
76_state = None
77
78def input(files=None, inplace=False, backup="", *, mode="r", openhook=None,
79          encoding=None, errors=None):
80    """Return an instance of the FileInput class, which can be iterated.
81
82    The parameters are passed to the constructor of the FileInput class.
83    The returned instance, in addition to being an iterator,
84    keeps global state for the functions of this module,.
85    """
86    global _state
87    if _state and _state._file:
88        raise RuntimeError("input() already active")
89    _state = FileInput(files, inplace, backup, mode=mode, openhook=openhook,
90                       encoding=encoding, errors=errors)
91    return _state
92
93def close():
94    """Close the sequence."""
95    global _state
96    state = _state
97    _state = None
98    if state:
99        state.close()
100
101def nextfile():
102    """
103    Close the current file so that the next iteration will read the first
104    line from the next file (if any); lines not read from the file will
105    not count towards the cumulative line count. The filename is not
106    changed until after the first line of the next file has been read.
107    Before the first line has been read, this function has no effect;
108    it cannot be used to skip the first file. After the last line of the
109    last file has been read, this function has no effect.
110    """
111    if not _state:
112        raise RuntimeError("no active input()")
113    return _state.nextfile()
114
115def filename():
116    """
117    Return the name of the file currently being read.
118    Before the first line has been read, returns None.
119    """
120    if not _state:
121        raise RuntimeError("no active input()")
122    return _state.filename()
123
124def lineno():
125    """
126    Return the cumulative line number of the line that has just been read.
127    Before the first line has been read, returns 0. After the last line
128    of the last file has been read, returns the line number of that line.
129    """
130    if not _state:
131        raise RuntimeError("no active input()")
132    return _state.lineno()
133
134def filelineno():
135    """
136    Return the line number in the current file. Before the first line
137    has been read, returns 0. After the last line of the last file has
138    been read, returns the line number of that line within the file.
139    """
140    if not _state:
141        raise RuntimeError("no active input()")
142    return _state.filelineno()
143
144def fileno():
145    """
146    Return the file number of the current file. When no file is currently
147    opened, returns -1.
148    """
149    if not _state:
150        raise RuntimeError("no active input()")
151    return _state.fileno()
152
153def isfirstline():
154    """
155    Returns true the line just read is the first line of its file,
156    otherwise returns false.
157    """
158    if not _state:
159        raise RuntimeError("no active input()")
160    return _state.isfirstline()
161
162def isstdin():
163    """
164    Returns true if the last line was read from sys.stdin,
165    otherwise returns false.
166    """
167    if not _state:
168        raise RuntimeError("no active input()")
169    return _state.isstdin()
170
171class FileInput:
172    """FileInput([files[, inplace[, backup]]], *, mode=None, openhook=None)
173
174    Class FileInput is the implementation of the module; its methods
175    filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
176    nextfile() and close() correspond to the functions of the same name
177    in the module.
178    In addition it has a readline() method which returns the next
179    input line, and a __getitem__() method which implements the
180    sequence behavior. The sequence must be accessed in strictly
181    sequential order; random access and readline() cannot be mixed.
182    """
183
184    def __init__(self, files=None, inplace=False, backup="", *,
185                 mode="r", openhook=None, encoding=None, errors=None):
186        if isinstance(files, str):
187            files = (files,)
188        elif isinstance(files, os.PathLike):
189            files = (os.fspath(files), )
190        else:
191            if files is None:
192                files = sys.argv[1:]
193            if not files:
194                files = ('-',)
195            else:
196                files = tuple(files)
197        self._files = files
198        self._inplace = inplace
199        self._backup = backup
200        self._savestdout = None
201        self._output = None
202        self._filename = None
203        self._startlineno = 0
204        self._filelineno = 0
205        self._file = None
206        self._isstdin = False
207        self._backupfilename = None
208        self._encoding = encoding
209        self._errors = errors
210
211        # We can not use io.text_encoding() here because old openhook doesn't
212        # take encoding parameter.
213        if (sys.flags.warn_default_encoding and
214                "b" not in mode and encoding is None and openhook is None):
215            import warnings
216            warnings.warn("'encoding' argument not specified.",
217                          EncodingWarning, 2)
218
219        # restrict mode argument to reading modes
220        if mode not in ('r', 'rU', 'U', 'rb'):
221            raise ValueError("FileInput opening mode must be one of "
222                             "'r', 'rU', 'U' and 'rb'")
223        if 'U' in mode:
224            import warnings
225            warnings.warn("'U' mode is deprecated",
226                          DeprecationWarning, 2)
227        self._mode = mode
228        self._write_mode = mode.replace('r', 'w') if 'U' not in mode else 'w'
229        if openhook:
230            if inplace:
231                raise ValueError("FileInput cannot use an opening hook in inplace mode")
232            if not callable(openhook):
233                raise ValueError("FileInput openhook must be callable")
234        self._openhook = openhook
235
236    def __del__(self):
237        self.close()
238
239    def close(self):
240        try:
241            self.nextfile()
242        finally:
243            self._files = ()
244
245    def __enter__(self):
246        return self
247
248    def __exit__(self, type, value, traceback):
249        self.close()
250
251    def __iter__(self):
252        return self
253
254    def __next__(self):
255        while True:
256            line = self._readline()
257            if line:
258                self._filelineno += 1
259                return line
260            if not self._file:
261                raise StopIteration
262            self.nextfile()
263            # repeat with next file
264
265    def __getitem__(self, i):
266        import warnings
267        warnings.warn(
268            "Support for indexing FileInput objects is deprecated. "
269            "Use iterator protocol instead.",
270            DeprecationWarning,
271            stacklevel=2
272        )
273        if i != self.lineno():
274            raise RuntimeError("accessing lines out of order")
275        try:
276            return self.__next__()
277        except StopIteration:
278            raise IndexError("end of input reached")
279
280    def nextfile(self):
281        savestdout = self._savestdout
282        self._savestdout = None
283        if savestdout:
284            sys.stdout = savestdout
285
286        output = self._output
287        self._output = None
288        try:
289            if output:
290                output.close()
291        finally:
292            file = self._file
293            self._file = None
294            try:
295                del self._readline  # restore FileInput._readline
296            except AttributeError:
297                pass
298            try:
299                if file and not self._isstdin:
300                    file.close()
301            finally:
302                backupfilename = self._backupfilename
303                self._backupfilename = None
304                if backupfilename and not self._backup:
305                    try: os.unlink(backupfilename)
306                    except OSError: pass
307
308                self._isstdin = False
309
310    def readline(self):
311        while True:
312            line = self._readline()
313            if line:
314                self._filelineno += 1
315                return line
316            if not self._file:
317                return line
318            self.nextfile()
319            # repeat with next file
320
321    def _readline(self):
322        if not self._files:
323            if 'b' in self._mode:
324                return b''
325            else:
326                return ''
327        self._filename = self._files[0]
328        self._files = self._files[1:]
329        self._startlineno = self.lineno()
330        self._filelineno = 0
331        self._file = None
332        self._isstdin = False
333        self._backupfilename = 0
334
335        # EncodingWarning is emitted in __init__() already
336        if "b" not in self._mode:
337            encoding = self._encoding or "locale"
338        else:
339            encoding = None
340
341        if self._filename == '-':
342            self._filename = '<stdin>'
343            if 'b' in self._mode:
344                self._file = getattr(sys.stdin, 'buffer', sys.stdin)
345            else:
346                self._file = sys.stdin
347            self._isstdin = True
348        else:
349            if self._inplace:
350                self._backupfilename = (
351                    os.fspath(self._filename) + (self._backup or ".bak"))
352                try:
353                    os.unlink(self._backupfilename)
354                except OSError:
355                    pass
356                # The next few lines may raise OSError
357                os.rename(self._filename, self._backupfilename)
358                self._file = open(self._backupfilename, self._mode, encoding=encoding)
359                try:
360                    perm = os.fstat(self._file.fileno()).st_mode
361                except OSError:
362                    self._output = open(self._filename, self._write_mode, encoding=encoding)
363                else:
364                    mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC
365                    if hasattr(os, 'O_BINARY'):
366                        mode |= os.O_BINARY
367
368                    fd = os.open(self._filename, mode, perm)
369                    self._output = os.fdopen(fd, self._write_mode, encoding=encoding)
370                    try:
371                        os.chmod(self._filename, perm)
372                    except OSError:
373                        pass
374                self._savestdout = sys.stdout
375                sys.stdout = self._output
376            else:
377                # This may raise OSError
378                if self._openhook:
379                    # Custom hooks made previous to Python 3.10 didn't have
380                    # encoding argument
381                    if self._encoding is None:
382                        self._file = self._openhook(self._filename, self._mode)
383                    else:
384                        self._file = self._openhook(
385                            self._filename, self._mode, encoding=self._encoding, errors=self._errors)
386                else:
387                    self._file = open(self._filename, self._mode, encoding=encoding, errors=self._errors)
388        self._readline = self._file.readline  # hide FileInput._readline
389        return self._readline()
390
391    def filename(self):
392        return self._filename
393
394    def lineno(self):
395        return self._startlineno + self._filelineno
396
397    def filelineno(self):
398        return self._filelineno
399
400    def fileno(self):
401        if self._file:
402            try:
403                return self._file.fileno()
404            except ValueError:
405                return -1
406        else:
407            return -1
408
409    def isfirstline(self):
410        return self._filelineno == 1
411
412    def isstdin(self):
413        return self._isstdin
414
415    __class_getitem__ = classmethod(GenericAlias)
416
417
418def hook_compressed(filename, mode, *, encoding=None, errors=None):
419    if encoding is None:  # EncodingWarning is emitted in FileInput() already.
420        encoding = "locale"
421    ext = os.path.splitext(filename)[1]
422    if ext == '.gz':
423        import gzip
424        stream = gzip.open(filename, mode)
425    elif ext == '.bz2':
426        import bz2
427        stream = bz2.BZ2File(filename, mode)
428    else:
429        return open(filename, mode, encoding=encoding, errors=errors)
430
431    # gzip and bz2 are binary mode by default.
432    if "b" not in mode:
433        stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors)
434    return stream
435
436
437def hook_encoded(encoding, errors=None):
438    def openhook(filename, mode):
439        return open(filename, mode, encoding=encoding, errors=errors)
440    return openhook
441
442
443def _test():
444    import getopt
445    inplace = False
446    backup = False
447    opts, args = getopt.getopt(sys.argv[1:], "ib:")
448    for o, a in opts:
449        if o == '-i': inplace = True
450        if o == '-b': backup = a
451    for line in input(args, inplace=inplace, backup=backup):
452        if line[-1:] == '\n': line = line[:-1]
453        if line[-1:] == '\r': line = line[:-1]
454        print("%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
455                                   isfirstline() and "*" or "", line))
456    print("%d: %s[%d]" % (lineno(), filename(), filelineno()))
457
458if __name__ == '__main__':
459    _test()
460