• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Helper class to quickly write a loop over all standard input files.
2
3Typical use is:
4
5    import fileinput
6    for line in fileinput.input():
7        process(line)
8
9This iterates over the lines of all files listed in sys.argv[1:],
10defaulting to sys.stdin if the list is empty.  If a filename is '-' it
11is also replaced by sys.stdin.  To specify an alternative list of
12filenames, pass it as the argument to input().  A single file name is
13also allowed.
14
15Functions filename(), lineno() return the filename and cumulative line
16number of the line that has just been read; filelineno() returns its
17line number in the current file; isfirstline() returns true iff the
18line just read is the first line of its file; isstdin() returns true
19iff the line was read from sys.stdin.  Function nextfile() closes the
20current file so that the next iteration will read the first line from
21the next file (if any); lines not read from the file will not count
22towards the cumulative line count; the filename is not changed until
23after the first line of the next file has been read.  Function close()
24closes the sequence.
25
26Before any lines have been read, filename() returns None and both line
27numbers are zero; nextfile() has no effect.  After all lines have been
28read, filename() and the line number functions return the values
29pertaining to the last line read; nextfile() has no effect.
30
31All files are opened in text mode by default, you can override this by
32setting the mode parameter to input() or FileInput.__init__().
33If an I/O error occurs during opening or reading a file, the IOError
34exception is raised.
35
36If sys.stdin is used more than once, the second and further use will
37return no lines, except perhaps for interactive use, or if it has been
38explicitly reset (e.g. using sys.stdin.seek(0)).
39
40Empty files are opened and immediately closed; the only time their
41presence in the list of filenames is noticeable at all is when the
42last file opened is empty.
43
44It is possible that the last line of a file doesn't end in a newline
45character; otherwise lines are returned including the trailing
46newline.
47
48Class FileInput is the implementation; its methods filename(),
49lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
50correspond to the functions in the module.  In addition it has a
51readline() method which returns the next input line, and a
52__getitem__() method which implements the sequence behavior.  The
53sequence must be accessed in strictly sequential order; sequence
54access and readline() cannot be mixed.
55
56Optional in-place filtering: if the keyword argument inplace=1 is
57passed to input() or to the FileInput constructor, the file is moved
58to a backup file and standard output is directed to the input file.
59This makes it possible to write a filter that rewrites its input file
60in place.  If the keyword argument backup=".<some extension>" is also
61given, it specifies the extension for the backup file, and the backup
62file remains around; by default, the extension is ".bak" and it is
63deleted when the output file is closed.  In-place filtering is
64disabled when standard input is read.  XXX The current implementation
65does not work for MS-DOS 8+3 filesystems.
66
67Performance: this module is unfortunately one of the slower ways of
68processing large numbers of input lines.  Nevertheless, a significant
69speed-up has been obtained by using readlines(bufsize) instead of
70readline().  A new keyword argument, bufsize=N, is present on the
71input() function and the FileInput() class to override the default
72buffer size.
73
74XXX Possible additions:
75
76- optional getopt argument processing
77- isatty()
78- read(), read(size), even readlines()
79
80"""
81
82import sys, os
83
84__all__ = ["input","close","nextfile","filename","lineno","filelineno",
85           "isfirstline","isstdin","FileInput"]
86
87_state = None
88
89DEFAULT_BUFSIZE = 8*1024
90
91def input(files=None, inplace=0, backup="", bufsize=0,
92          mode="r", openhook=None):
93    """Return an instance of the FileInput class, which can be iterated.
94
95    The parameters are passed to the constructor of the FileInput class.
96    The returned instance, in addition to being an iterator,
97    keeps global state for the functions of this module,.
98    """
99    global _state
100    if _state and _state._file:
101        raise RuntimeError, "input() already active"
102    _state = FileInput(files, inplace, backup, bufsize, mode, openhook)
103    return _state
104
105def close():
106    """Close the sequence."""
107    global _state
108    state = _state
109    _state = None
110    if state:
111        state.close()
112
113def nextfile():
114    """
115    Close the current file so that the next iteration will read the first
116    line from the next file (if any); lines not read from the file will
117    not count towards the cumulative line count. The filename is not
118    changed until after the first line of the next file has been read.
119    Before the first line has been read, this function has no effect;
120    it cannot be used to skip the first file. After the last line of the
121    last file has been read, this function has no effect.
122    """
123    if not _state:
124        raise RuntimeError, "no active input()"
125    return _state.nextfile()
126
127def filename():
128    """
129    Return the name of the file currently being read.
130    Before the first line has been read, returns None.
131    """
132    if not _state:
133        raise RuntimeError, "no active input()"
134    return _state.filename()
135
136def lineno():
137    """
138    Return the cumulative line number of the line that has just been read.
139    Before the first line has been read, returns 0. After the last line
140    of the last file has been read, returns the line number of that line.
141    """
142    if not _state:
143        raise RuntimeError, "no active input()"
144    return _state.lineno()
145
146def filelineno():
147    """
148    Return the line number in the current file. Before the first line
149    has been read, returns 0. After the last line of the last file has
150    been read, returns the line number of that line within the file.
151    """
152    if not _state:
153        raise RuntimeError, "no active input()"
154    return _state.filelineno()
155
156def fileno():
157    """
158    Return the file number of the current file. When no file is currently
159    opened, returns -1.
160    """
161    if not _state:
162        raise RuntimeError, "no active input()"
163    return _state.fileno()
164
165def isfirstline():
166    """
167    Returns true the line just read is the first line of its file,
168    otherwise returns false.
169    """
170    if not _state:
171        raise RuntimeError, "no active input()"
172    return _state.isfirstline()
173
174def isstdin():
175    """
176    Returns true if the last line was read from sys.stdin,
177    otherwise returns false.
178    """
179    if not _state:
180        raise RuntimeError, "no active input()"
181    return _state.isstdin()
182
183class FileInput:
184    """FileInput([files[, inplace[, backup[, bufsize[, mode[, openhook]]]]]])
185
186    Class FileInput is the implementation of the module; its methods
187    filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
188    nextfile() and close() correspond to the functions of the same name
189    in the module.
190    In addition it has a readline() method which returns the next
191    input line, and a __getitem__() method which implements the
192    sequence behavior. The sequence must be accessed in strictly
193    sequential order; random access and readline() cannot be mixed.
194    """
195
196    def __init__(self, files=None, inplace=0, backup="", bufsize=0,
197                 mode="r", openhook=None):
198        if isinstance(files, basestring):
199            files = (files,)
200        else:
201            if files is None:
202                files = sys.argv[1:]
203            if not files:
204                files = ('-',)
205            else:
206                files = tuple(files)
207        self._files = files
208        self._inplace = inplace
209        self._backup = backup
210        self._bufsize = bufsize or DEFAULT_BUFSIZE
211        self._savestdout = None
212        self._output = None
213        self._filename = None
214        self._lineno = 0
215        self._filelineno = 0
216        self._file = None
217        self._isstdin = False
218        self._backupfilename = None
219        self._buffer = []
220        self._bufindex = 0
221        # restrict mode argument to reading modes
222        if mode not in ('r', 'rU', 'U', 'rb'):
223            raise ValueError("FileInput opening mode must be one of "
224                             "'r', 'rU', 'U' and 'rb'")
225        self._mode = mode
226        if inplace and openhook:
227            raise ValueError("FileInput cannot use an opening hook in inplace mode")
228        elif openhook and not hasattr(openhook, '__call__'):
229            raise ValueError("FileInput openhook must be callable")
230        self._openhook = openhook
231
232    def __del__(self):
233        self.close()
234
235    def close(self):
236        try:
237            self.nextfile()
238        finally:
239            self._files = ()
240
241    def __iter__(self):
242        return self
243
244    def next(self):
245        try:
246            line = self._buffer[self._bufindex]
247        except IndexError:
248            pass
249        else:
250            self._bufindex += 1
251            self._lineno += 1
252            self._filelineno += 1
253            return line
254        line = self.readline()
255        if not line:
256            raise StopIteration
257        return line
258
259    def __getitem__(self, i):
260        if i != self._lineno:
261            raise RuntimeError, "accessing lines out of order"
262        try:
263            return self.next()
264        except StopIteration:
265            raise IndexError, "end of input reached"
266
267    def nextfile(self):
268        savestdout = self._savestdout
269        self._savestdout = 0
270        if savestdout:
271            sys.stdout = savestdout
272
273        output = self._output
274        self._output = 0
275        try:
276            if output:
277                output.close()
278        finally:
279            file = self._file
280            self._file = 0
281            try:
282                if file and not self._isstdin:
283                    file.close()
284            finally:
285                backupfilename = self._backupfilename
286                self._backupfilename = 0
287                if backupfilename and not self._backup:
288                    try: os.unlink(backupfilename)
289                    except OSError: pass
290
291                self._isstdin = False
292                self._buffer = []
293                self._bufindex = 0
294
295    def readline(self):
296        try:
297            line = self._buffer[self._bufindex]
298        except IndexError:
299            pass
300        else:
301            self._bufindex += 1
302            self._lineno += 1
303            self._filelineno += 1
304            return line
305        if not self._file:
306            if not self._files:
307                return ""
308            self._filename = self._files[0]
309            self._files = self._files[1:]
310            self._filelineno = 0
311            self._file = None
312            self._isstdin = False
313            self._backupfilename = 0
314            if self._filename == '-':
315                self._filename = '<stdin>'
316                self._file = sys.stdin
317                self._isstdin = True
318            else:
319                if self._inplace:
320                    self._backupfilename = (
321                        self._filename + (self._backup or os.extsep+"bak"))
322                    try: os.unlink(self._backupfilename)
323                    except os.error: pass
324                    # The next few lines may raise IOError
325                    os.rename(self._filename, self._backupfilename)
326                    self._file = open(self._backupfilename, self._mode)
327                    try:
328                        perm = os.fstat(self._file.fileno()).st_mode
329                    except OSError:
330                        self._output = open(self._filename, "w")
331                    else:
332                        fd = os.open(self._filename,
333                                     os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
334                                     perm)
335                        self._output = os.fdopen(fd, "w")
336                        try:
337                            if hasattr(os, 'chmod'):
338                                os.chmod(self._filename, perm)
339                        except OSError:
340                            pass
341                    self._savestdout = sys.stdout
342                    sys.stdout = self._output
343                else:
344                    # This may raise IOError
345                    if self._openhook:
346                        self._file = self._openhook(self._filename, self._mode)
347                    else:
348                        self._file = open(self._filename, self._mode)
349        self._buffer = self._file.readlines(self._bufsize)
350        self._bufindex = 0
351        if not self._buffer:
352            self.nextfile()
353        # Recursive call
354        return self.readline()
355
356    def filename(self):
357        return self._filename
358
359    def lineno(self):
360        return self._lineno
361
362    def filelineno(self):
363        return self._filelineno
364
365    def fileno(self):
366        if self._file:
367            try:
368                return self._file.fileno()
369            except ValueError:
370                return -1
371        else:
372            return -1
373
374    def isfirstline(self):
375        return self._filelineno == 1
376
377    def isstdin(self):
378        return self._isstdin
379
380
381def hook_compressed(filename, mode):
382    ext = os.path.splitext(filename)[1]
383    if ext == '.gz':
384        import gzip
385        return gzip.open(filename, mode)
386    elif ext == '.bz2':
387        import bz2
388        return bz2.BZ2File(filename, mode)
389    else:
390        return open(filename, mode)
391
392
393def hook_encoded(encoding):
394    import io
395    def openhook(filename, mode):
396        mode = mode.replace('U', '').replace('b', '') or 'r'
397        return io.open(filename, mode, encoding=encoding, newline='')
398    return openhook
399
400
401def _test():
402    import getopt
403    inplace = 0
404    backup = 0
405    opts, args = getopt.getopt(sys.argv[1:], "ib:")
406    for o, a in opts:
407        if o == '-i': inplace = 1
408        if o == '-b': backup = a
409    for line in input(args, inplace=inplace, backup=backup):
410        if line[-1:] == '\n': line = line[:-1]
411        if line[-1:] == '\r': line = line[:-1]
412        print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
413                                   isfirstline() and "*" or "", line)
414    print "%d: %s[%d]" % (lineno(), filename(), filelineno())
415
416if __name__ == '__main__':
417    _test()
418