• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Helper class to quickly write a loop over all standard input files.
2
3Typical use is:
4
5    import fileinput
6    for line in fileinput.input():
7        process(line)
8
9This iterates over the lines of all files listed in sys.argv[1:],
10defaulting to sys.stdin if the list is empty.  If a filename is '-' it
11is also replaced by sys.stdin.  To specify an alternative list of
12filenames, pass it as the argument to input().  A single file name is
13also allowed.
14
15Functions filename(), lineno() return the filename and cumulative line
16number of the line that has just been read; filelineno() returns its
17line number in the current file; isfirstline() returns true iff the
18line just read is the first line of its file; isstdin() returns true
19iff the line was read from sys.stdin.  Function nextfile() closes the
20current file so that the next iteration will read the first line from
21the next file (if any); lines not read from the file will not count
22towards the cumulative line count; the filename is not changed until
23after the first line of the next file has been read.  Function close()
24closes the sequence.
25
26Before any lines have been read, filename() returns None and both line
27numbers are zero; nextfile() has no effect.  After all lines have been
28read, filename() and the line number functions return the values
29pertaining to the last line read; nextfile() has no effect.
30
31All files are opened in text mode by default, you can override this by
32setting the mode parameter to input() or FileInput.__init__().
33If an I/O error occurs during opening or reading a file, the OSError
34exception is raised.
35
36If sys.stdin is used more than once, the second and further use will
37return no lines, except perhaps for interactive use, or if it has been
38explicitly reset (e.g. using sys.stdin.seek(0)).
39
40Empty files are opened and immediately closed; the only time their
41presence in the list of filenames is noticeable at all is when the
42last file opened is empty.
43
44It is possible that the last line of a file doesn't end in a newline
45character; otherwise lines are returned including the trailing
46newline.
47
48Class FileInput is the implementation; its methods filename(),
49lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
50correspond to the functions in the module.  In addition it has a
51readline() method which returns the next input line, and a
52__getitem__() method which implements the sequence behavior.  The
53sequence must be accessed in strictly sequential order; sequence
54access and readline() cannot be mixed.
55
56Optional in-place filtering: if the keyword argument inplace=1 is
57passed to input() or to the FileInput constructor, the file is moved
58to a backup file and standard output is directed to the input file.
59This makes it possible to write a filter that rewrites its input file
60in place.  If the keyword argument backup=".<some extension>" is also
61given, it specifies the extension for the backup file, and the backup
62file remains around; by default, the extension is ".bak" and it is
63deleted when the output file is closed.  In-place filtering is
64disabled when standard input is read.  XXX The current implementation
65does not work for MS-DOS 8+3 filesystems.
66
67XXX Possible additions:
68
69- optional getopt argument processing
70- isatty()
71- read(), read(size), even readlines()
72
73"""
74
75import sys, os
76
77__all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno",
78           "fileno", "isfirstline", "isstdin", "FileInput", "hook_compressed",
79           "hook_encoded"]
80
81_state = None
82
83def input(files=None, inplace=False, backup="", bufsize=0,
84          mode="r", openhook=None):
85    """Return an instance of the FileInput class, which can be iterated.
86
87    The parameters are passed to the constructor of the FileInput class.
88    The returned instance, in addition to being an iterator,
89    keeps global state for the functions of this module,.
90    """
91    global _state
92    if _state and _state._file:
93        raise RuntimeError("input() already active")
94    _state = FileInput(files, inplace, backup, bufsize, mode, openhook)
95    return _state
96
97def close():
98    """Close the sequence."""
99    global _state
100    state = _state
101    _state = None
102    if state:
103        state.close()
104
105def nextfile():
106    """
107    Close the current file so that the next iteration will read the first
108    line from the next file (if any); lines not read from the file will
109    not count towards the cumulative line count. The filename is not
110    changed until after the first line of the next file has been read.
111    Before the first line has been read, this function has no effect;
112    it cannot be used to skip the first file. After the last line of the
113    last file has been read, this function has no effect.
114    """
115    if not _state:
116        raise RuntimeError("no active input()")
117    return _state.nextfile()
118
119def filename():
120    """
121    Return the name of the file currently being read.
122    Before the first line has been read, returns None.
123    """
124    if not _state:
125        raise RuntimeError("no active input()")
126    return _state.filename()
127
128def lineno():
129    """
130    Return the cumulative line number of the line that has just been read.
131    Before the first line has been read, returns 0. After the last line
132    of the last file has been read, returns the line number of that line.
133    """
134    if not _state:
135        raise RuntimeError("no active input()")
136    return _state.lineno()
137
138def filelineno():
139    """
140    Return the line number in the current file. Before the first line
141    has been read, returns 0. After the last line of the last file has
142    been read, returns the line number of that line within the file.
143    """
144    if not _state:
145        raise RuntimeError("no active input()")
146    return _state.filelineno()
147
148def fileno():
149    """
150    Return the file number of the current file. When no file is currently
151    opened, returns -1.
152    """
153    if not _state:
154        raise RuntimeError("no active input()")
155    return _state.fileno()
156
157def isfirstline():
158    """
159    Returns true the line just read is the first line of its file,
160    otherwise returns false.
161    """
162    if not _state:
163        raise RuntimeError("no active input()")
164    return _state.isfirstline()
165
166def isstdin():
167    """
168    Returns true if the last line was read from sys.stdin,
169    otherwise returns false.
170    """
171    if not _state:
172        raise RuntimeError("no active input()")
173    return _state.isstdin()
174
175class FileInput:
176    """FileInput([files[, inplace[, backup[, bufsize, [, mode[, openhook]]]]]])
177
178    Class FileInput is the implementation of the module; its methods
179    filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
180    nextfile() and close() correspond to the functions of the same name
181    in the module.
182    In addition it has a readline() method which returns the next
183    input line, and a __getitem__() method which implements the
184    sequence behavior. The sequence must be accessed in strictly
185    sequential order; random access and readline() cannot be mixed.
186    """
187
188    def __init__(self, files=None, inplace=False, backup="", bufsize=0,
189                 mode="r", openhook=None):
190        if isinstance(files, str):
191            files = (files,)
192        elif isinstance(files, os.PathLike):
193            files = (os.fspath(files), )
194        else:
195            if files is None:
196                files = sys.argv[1:]
197            if not files:
198                files = ('-',)
199            else:
200                files = tuple(files)
201        self._files = files
202        self._inplace = inplace
203        self._backup = backup
204        if bufsize:
205            import warnings
206            warnings.warn('bufsize is deprecated and ignored',
207                          DeprecationWarning, stacklevel=2)
208        self._savestdout = None
209        self._output = None
210        self._filename = None
211        self._startlineno = 0
212        self._filelineno = 0
213        self._file = None
214        self._isstdin = False
215        self._backupfilename = None
216        # restrict mode argument to reading modes
217        if mode not in ('r', 'rU', 'U', 'rb'):
218            raise ValueError("FileInput opening mode must be one of "
219                             "'r', 'rU', 'U' and 'rb'")
220        if 'U' in mode:
221            import warnings
222            warnings.warn("'U' mode is deprecated",
223                          DeprecationWarning, 2)
224        self._mode = mode
225        if openhook:
226            if inplace:
227                raise ValueError("FileInput cannot use an opening hook in inplace mode")
228            if not callable(openhook):
229                raise ValueError("FileInput openhook must be callable")
230        self._openhook = openhook
231
232    def __del__(self):
233        self.close()
234
235    def close(self):
236        try:
237            self.nextfile()
238        finally:
239            self._files = ()
240
241    def __enter__(self):
242        return self
243
244    def __exit__(self, type, value, traceback):
245        self.close()
246
247    def __iter__(self):
248        return self
249
250    def __next__(self):
251        while True:
252            line = self._readline()
253            if line:
254                self._filelineno += 1
255                return line
256            if not self._file:
257                raise StopIteration
258            self.nextfile()
259            # repeat with next file
260
261    def __getitem__(self, i):
262        if i != self.lineno():
263            raise RuntimeError("accessing lines out of order")
264        try:
265            return self.__next__()
266        except StopIteration:
267            raise IndexError("end of input reached")
268
269    def nextfile(self):
270        savestdout = self._savestdout
271        self._savestdout = None
272        if savestdout:
273            sys.stdout = savestdout
274
275        output = self._output
276        self._output = None
277        try:
278            if output:
279                output.close()
280        finally:
281            file = self._file
282            self._file = None
283            try:
284                del self._readline  # restore FileInput._readline
285            except AttributeError:
286                pass
287            try:
288                if file and not self._isstdin:
289                    file.close()
290            finally:
291                backupfilename = self._backupfilename
292                self._backupfilename = None
293                if backupfilename and not self._backup:
294                    try: os.unlink(backupfilename)
295                    except OSError: pass
296
297                self._isstdin = False
298
299    def readline(self):
300        while True:
301            line = self._readline()
302            if line:
303                self._filelineno += 1
304                return line
305            if not self._file:
306                return line
307            self.nextfile()
308            # repeat with next file
309
310    def _readline(self):
311        if not self._files:
312            if 'b' in self._mode:
313                return b''
314            else:
315                return ''
316        self._filename = self._files[0]
317        self._files = self._files[1:]
318        self._startlineno = self.lineno()
319        self._filelineno = 0
320        self._file = None
321        self._isstdin = False
322        self._backupfilename = 0
323        if self._filename == '-':
324            self._filename = '<stdin>'
325            if 'b' in self._mode:
326                self._file = getattr(sys.stdin, 'buffer', sys.stdin)
327            else:
328                self._file = sys.stdin
329            self._isstdin = True
330        else:
331            if self._inplace:
332                self._backupfilename = (
333                    os.fspath(self._filename) + (self._backup or ".bak"))
334                try:
335                    os.unlink(self._backupfilename)
336                except OSError:
337                    pass
338                # The next few lines may raise OSError
339                os.rename(self._filename, self._backupfilename)
340                self._file = open(self._backupfilename, self._mode)
341                try:
342                    perm = os.fstat(self._file.fileno()).st_mode
343                except OSError:
344                    self._output = open(self._filename, "w")
345                else:
346                    mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC
347                    if hasattr(os, 'O_BINARY'):
348                        mode |= os.O_BINARY
349
350                    fd = os.open(self._filename, mode, perm)
351                    self._output = os.fdopen(fd, "w")
352                    try:
353                        if hasattr(os, 'chmod'):
354                            os.chmod(self._filename, perm)
355                    except OSError:
356                        pass
357                self._savestdout = sys.stdout
358                sys.stdout = self._output
359            else:
360                # This may raise OSError
361                if self._openhook:
362                    self._file = self._openhook(self._filename, self._mode)
363                else:
364                    self._file = open(self._filename, self._mode)
365        self._readline = self._file.readline  # hide FileInput._readline
366        return self._readline()
367
368    def filename(self):
369        return self._filename
370
371    def lineno(self):
372        return self._startlineno + self._filelineno
373
374    def filelineno(self):
375        return self._filelineno
376
377    def fileno(self):
378        if self._file:
379            try:
380                return self._file.fileno()
381            except ValueError:
382                return -1
383        else:
384            return -1
385
386    def isfirstline(self):
387        return self._filelineno == 1
388
389    def isstdin(self):
390        return self._isstdin
391
392
393def hook_compressed(filename, mode):
394    ext = os.path.splitext(filename)[1]
395    if ext == '.gz':
396        import gzip
397        return gzip.open(filename, mode)
398    elif ext == '.bz2':
399        import bz2
400        return bz2.BZ2File(filename, mode)
401    else:
402        return open(filename, mode)
403
404
405def hook_encoded(encoding, errors=None):
406    def openhook(filename, mode):
407        return open(filename, mode, encoding=encoding, errors=errors)
408    return openhook
409
410
411def _test():
412    import getopt
413    inplace = False
414    backup = False
415    opts, args = getopt.getopt(sys.argv[1:], "ib:")
416    for o, a in opts:
417        if o == '-i': inplace = True
418        if o == '-b': backup = a
419    for line in input(args, inplace=inplace, backup=backup):
420        if line[-1:] == '\n': line = line[:-1]
421        if line[-1:] == '\r': line = line[:-1]
422        print("%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
423                                   isfirstline() and "*" or "", line))
424    print("%d: %s[%d]" % (lineno(), filename(), filelineno()))
425
426if __name__ == '__main__':
427    _test()
428