• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Helper class to quickly write a loop over all standard input files.
2
3Typical use is:
4
5    import fileinput
6    for line in fileinput.input():
7        process(line)
8
9This iterates over the lines of all files listed in sys.argv[1:],
10defaulting to sys.stdin if the list is empty.  If a filename is '-' it
11is also replaced by sys.stdin and the optional arguments mode and
12openhook are ignored.  To specify an alternative list of filenames,
13pass it as the argument to input().  A single file name is also allowed.
14
15Functions filename(), lineno() return the filename and cumulative line
16number of the line that has just been read; filelineno() returns its
17line number in the current file; isfirstline() returns true iff the
18line just read is the first line of its file; isstdin() returns true
19iff the line was read from sys.stdin.  Function nextfile() closes the
20current file so that the next iteration will read the first line from
21the next file (if any); lines not read from the file will not count
22towards the cumulative line count; the filename is not changed until
23after the first line of the next file has been read.  Function close()
24closes the sequence.
25
26Before any lines have been read, filename() returns None and both line
27numbers are zero; nextfile() has no effect.  After all lines have been
28read, filename() and the line number functions return the values
29pertaining to the last line read; nextfile() has no effect.
30
31All files are opened in text mode by default, you can override this by
32setting the mode parameter to input() or FileInput.__init__().
33If an I/O error occurs during opening or reading a file, the OSError
34exception is raised.
35
36If sys.stdin is used more than once, the second and further use will
37return no lines, except perhaps for interactive use, or if it has been
38explicitly reset (e.g. using sys.stdin.seek(0)).
39
40Empty files are opened and immediately closed; the only time their
41presence in the list of filenames is noticeable at all is when the
42last file opened is empty.
43
44It is possible that the last line of a file doesn't end in a newline
45character; otherwise lines are returned including the trailing
46newline.
47
48Class FileInput is the implementation; its methods filename(),
49lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
50correspond to the functions in the module.  In addition it has a
51readline() method which returns the next input line, and a
52__getitem__() method which implements the sequence behavior.  The
53sequence must be accessed in strictly sequential order; sequence
54access and readline() cannot be mixed.
55
56Optional in-place filtering: if the keyword argument inplace=1 is
57passed to input() or to the FileInput constructor, the file is moved
58to a backup file and standard output is directed to the input file.
59This makes it possible to write a filter that rewrites its input file
60in place.  If the keyword argument backup=".<some extension>" is also
61given, it specifies the extension for the backup file, and the backup
62file remains around; by default, the extension is ".bak" and it is
63deleted when the output file is closed.  In-place filtering is
64disabled when standard input is read.  XXX The current implementation
65does not work for MS-DOS 8+3 filesystems.
66
67XXX Possible additions:
68
69- optional getopt argument processing
70- isatty()
71- read(), read(size), even readlines()
72
73"""
74
75import sys, os
76
77__all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno",
78           "fileno", "isfirstline", "isstdin", "FileInput", "hook_compressed",
79           "hook_encoded"]
80
81_state = None
82
83def input(files=None, inplace=False, backup="", *, mode="r", openhook=None):
84    """Return an instance of the FileInput class, which can be iterated.
85
86    The parameters are passed to the constructor of the FileInput class.
87    The returned instance, in addition to being an iterator,
88    keeps global state for the functions of this module,.
89    """
90    global _state
91    if _state and _state._file:
92        raise RuntimeError("input() already active")
93    _state = FileInput(files, inplace, backup, mode=mode, openhook=openhook)
94    return _state
95
96def close():
97    """Close the sequence."""
98    global _state
99    state = _state
100    _state = None
101    if state:
102        state.close()
103
104def nextfile():
105    """
106    Close the current file so that the next iteration will read the first
107    line from the next file (if any); lines not read from the file will
108    not count towards the cumulative line count. The filename is not
109    changed until after the first line of the next file has been read.
110    Before the first line has been read, this function has no effect;
111    it cannot be used to skip the first file. After the last line of the
112    last file has been read, this function has no effect.
113    """
114    if not _state:
115        raise RuntimeError("no active input()")
116    return _state.nextfile()
117
118def filename():
119    """
120    Return the name of the file currently being read.
121    Before the first line has been read, returns None.
122    """
123    if not _state:
124        raise RuntimeError("no active input()")
125    return _state.filename()
126
127def lineno():
128    """
129    Return the cumulative line number of the line that has just been read.
130    Before the first line has been read, returns 0. After the last line
131    of the last file has been read, returns the line number of that line.
132    """
133    if not _state:
134        raise RuntimeError("no active input()")
135    return _state.lineno()
136
137def filelineno():
138    """
139    Return the line number in the current file. Before the first line
140    has been read, returns 0. After the last line of the last file has
141    been read, returns the line number of that line within the file.
142    """
143    if not _state:
144        raise RuntimeError("no active input()")
145    return _state.filelineno()
146
147def fileno():
148    """
149    Return the file number of the current file. When no file is currently
150    opened, returns -1.
151    """
152    if not _state:
153        raise RuntimeError("no active input()")
154    return _state.fileno()
155
156def isfirstline():
157    """
158    Returns true the line just read is the first line of its file,
159    otherwise returns false.
160    """
161    if not _state:
162        raise RuntimeError("no active input()")
163    return _state.isfirstline()
164
165def isstdin():
166    """
167    Returns true if the last line was read from sys.stdin,
168    otherwise returns false.
169    """
170    if not _state:
171        raise RuntimeError("no active input()")
172    return _state.isstdin()
173
174class FileInput:
175    """FileInput([files[, inplace[, backup]]], *, mode=None, openhook=None)
176
177    Class FileInput is the implementation of the module; its methods
178    filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
179    nextfile() and close() correspond to the functions of the same name
180    in the module.
181    In addition it has a readline() method which returns the next
182    input line, and a __getitem__() method which implements the
183    sequence behavior. The sequence must be accessed in strictly
184    sequential order; random access and readline() cannot be mixed.
185    """
186
187    def __init__(self, files=None, inplace=False, backup="", *,
188                 mode="r", openhook=None):
189        if isinstance(files, str):
190            files = (files,)
191        elif isinstance(files, os.PathLike):
192            files = (os.fspath(files), )
193        else:
194            if files is None:
195                files = sys.argv[1:]
196            if not files:
197                files = ('-',)
198            else:
199                files = tuple(files)
200        self._files = files
201        self._inplace = inplace
202        self._backup = backup
203        self._savestdout = None
204        self._output = None
205        self._filename = None
206        self._startlineno = 0
207        self._filelineno = 0
208        self._file = None
209        self._isstdin = False
210        self._backupfilename = None
211        # restrict mode argument to reading modes
212        if mode not in ('r', 'rU', 'U', 'rb'):
213            raise ValueError("FileInput opening mode must be one of "
214                             "'r', 'rU', 'U' and 'rb'")
215        if 'U' in mode:
216            import warnings
217            warnings.warn("'U' mode is deprecated",
218                          DeprecationWarning, 2)
219        self._mode = mode
220        self._write_mode = mode.replace('r', 'w') if 'U' not in mode else 'w'
221        if openhook:
222            if inplace:
223                raise ValueError("FileInput cannot use an opening hook in inplace mode")
224            if not callable(openhook):
225                raise ValueError("FileInput openhook must be callable")
226        self._openhook = openhook
227
228    def __del__(self):
229        self.close()
230
231    def close(self):
232        try:
233            self.nextfile()
234        finally:
235            self._files = ()
236
237    def __enter__(self):
238        return self
239
240    def __exit__(self, type, value, traceback):
241        self.close()
242
243    def __iter__(self):
244        return self
245
246    def __next__(self):
247        while True:
248            line = self._readline()
249            if line:
250                self._filelineno += 1
251                return line
252            if not self._file:
253                raise StopIteration
254            self.nextfile()
255            # repeat with next file
256
257    def __getitem__(self, i):
258        import warnings
259        warnings.warn(
260            "Support for indexing FileInput objects is deprecated. "
261            "Use iterator protocol instead.",
262            DeprecationWarning,
263            stacklevel=2
264        )
265        if i != self.lineno():
266            raise RuntimeError("accessing lines out of order")
267        try:
268            return self.__next__()
269        except StopIteration:
270            raise IndexError("end of input reached")
271
272    def nextfile(self):
273        savestdout = self._savestdout
274        self._savestdout = None
275        if savestdout:
276            sys.stdout = savestdout
277
278        output = self._output
279        self._output = None
280        try:
281            if output:
282                output.close()
283        finally:
284            file = self._file
285            self._file = None
286            try:
287                del self._readline  # restore FileInput._readline
288            except AttributeError:
289                pass
290            try:
291                if file and not self._isstdin:
292                    file.close()
293            finally:
294                backupfilename = self._backupfilename
295                self._backupfilename = None
296                if backupfilename and not self._backup:
297                    try: os.unlink(backupfilename)
298                    except OSError: pass
299
300                self._isstdin = False
301
302    def readline(self):
303        while True:
304            line = self._readline()
305            if line:
306                self._filelineno += 1
307                return line
308            if not self._file:
309                return line
310            self.nextfile()
311            # repeat with next file
312
313    def _readline(self):
314        if not self._files:
315            if 'b' in self._mode:
316                return b''
317            else:
318                return ''
319        self._filename = self._files[0]
320        self._files = self._files[1:]
321        self._startlineno = self.lineno()
322        self._filelineno = 0
323        self._file = None
324        self._isstdin = False
325        self._backupfilename = 0
326        if self._filename == '-':
327            self._filename = '<stdin>'
328            if 'b' in self._mode:
329                self._file = getattr(sys.stdin, 'buffer', sys.stdin)
330            else:
331                self._file = sys.stdin
332            self._isstdin = True
333        else:
334            if self._inplace:
335                self._backupfilename = (
336                    os.fspath(self._filename) + (self._backup or ".bak"))
337                try:
338                    os.unlink(self._backupfilename)
339                except OSError:
340                    pass
341                # The next few lines may raise OSError
342                os.rename(self._filename, self._backupfilename)
343                self._file = open(self._backupfilename, self._mode)
344                try:
345                    perm = os.fstat(self._file.fileno()).st_mode
346                except OSError:
347                    self._output = open(self._filename, self._write_mode)
348                else:
349                    mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC
350                    if hasattr(os, 'O_BINARY'):
351                        mode |= os.O_BINARY
352
353                    fd = os.open(self._filename, mode, perm)
354                    self._output = os.fdopen(fd, self._write_mode)
355                    try:
356                        os.chmod(self._filename, perm)
357                    except OSError:
358                        pass
359                self._savestdout = sys.stdout
360                sys.stdout = self._output
361            else:
362                # This may raise OSError
363                if self._openhook:
364                    self._file = self._openhook(self._filename, self._mode)
365                else:
366                    self._file = open(self._filename, self._mode)
367        self._readline = self._file.readline  # hide FileInput._readline
368        return self._readline()
369
370    def filename(self):
371        return self._filename
372
373    def lineno(self):
374        return self._startlineno + self._filelineno
375
376    def filelineno(self):
377        return self._filelineno
378
379    def fileno(self):
380        if self._file:
381            try:
382                return self._file.fileno()
383            except ValueError:
384                return -1
385        else:
386            return -1
387
388    def isfirstline(self):
389        return self._filelineno == 1
390
391    def isstdin(self):
392        return self._isstdin
393
394
395def hook_compressed(filename, mode):
396    ext = os.path.splitext(filename)[1]
397    if ext == '.gz':
398        import gzip
399        return gzip.open(filename, mode)
400    elif ext == '.bz2':
401        import bz2
402        return bz2.BZ2File(filename, mode)
403    else:
404        return open(filename, mode)
405
406
407def hook_encoded(encoding, errors=None):
408    def openhook(filename, mode):
409        return open(filename, mode, encoding=encoding, errors=errors)
410    return openhook
411
412
413def _test():
414    import getopt
415    inplace = False
416    backup = False
417    opts, args = getopt.getopt(sys.argv[1:], "ib:")
418    for o, a in opts:
419        if o == '-i': inplace = True
420        if o == '-b': backup = a
421    for line in input(args, inplace=inplace, backup=backup):
422        if line[-1:] == '\n': line = line[:-1]
423        if line[-1:] == '\r': line = line[:-1]
424        print("%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
425                                   isfirstline() and "*" or "", line))
426    print("%d: %s[%d]" % (lineno(), filename(), filelineno()))
427
428if __name__ == '__main__':
429    _test()
430