• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Helper class to quickly write a loop over all standard input files.
2
3Typical use is:
4
5    import fileinput
6    for line in fileinput.input():
7        process(line)
8
9This iterates over the lines of all files listed in sys.argv[1:],
10defaulting to sys.stdin if the list is empty.  If a filename is '-' it
11is also replaced by sys.stdin.  To specify an alternative list of
12filenames, pass it as the argument to input().  A single file name is
13also allowed.
14
15Functions filename(), lineno() return the filename and cumulative line
16number of the line that has just been read; filelineno() returns its
17line number in the current file; isfirstline() returns true iff the
18line just read is the first line of its file; isstdin() returns true
19iff the line was read from sys.stdin.  Function nextfile() closes the
20current file so that the next iteration will read the first line from
21the next file (if any); lines not read from the file will not count
22towards the cumulative line count; the filename is not changed until
23after the first line of the next file has been read.  Function close()
24closes the sequence.
25
26Before any lines have been read, filename() returns None and both line
27numbers are zero; nextfile() has no effect.  After all lines have been
28read, filename() and the line number functions return the values
29pertaining to the last line read; nextfile() has no effect.
30
31All files are opened in text mode by default, you can override this by
32setting the mode parameter to input() or FileInput.__init__().
33If an I/O error occurs during opening or reading a file, the IOError
34exception is raised.
35
36If sys.stdin is used more than once, the second and further use will
37return no lines, except perhaps for interactive use, or if it has been
38explicitly reset (e.g. using sys.stdin.seek(0)).
39
40Empty files are opened and immediately closed; the only time their
41presence in the list of filenames is noticeable at all is when the
42last file opened is empty.
43
44It is possible that the last line of a file doesn't end in a newline
45character; otherwise lines are returned including the trailing
46newline.
47
48Class FileInput is the implementation; its methods filename(),
49lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
50correspond to the functions in the module.  In addition it has a
51readline() method which returns the next input line, and a
52__getitem__() method which implements the sequence behavior.  The
53sequence must be accessed in strictly sequential order; sequence
54access and readline() cannot be mixed.
55
56Optional in-place filtering: if the keyword argument inplace=1 is
57passed to input() or to the FileInput constructor, the file is moved
58to a backup file and standard output is directed to the input file.
59This makes it possible to write a filter that rewrites its input file
60in place.  If the keyword argument backup=".<some extension>" is also
61given, it specifies the extension for the backup file, and the backup
62file remains around; by default, the extension is ".bak" and it is
63deleted when the output file is closed.  In-place filtering is
64disabled when standard input is read.  XXX The current implementation
65does not work for MS-DOS 8+3 filesystems.
66
67XXX Possible additions:
68
69- optional getopt argument processing
70- isatty()
71- read(), read(size), even readlines()
72
73"""
74
75import sys, os
76
77__all__ = ["input","close","nextfile","filename","lineno","filelineno",
78           "isfirstline","isstdin","FileInput"]
79
80_state = None
81
82# No longer used
83DEFAULT_BUFSIZE = 8*1024
84
85def input(files=None, inplace=0, backup="", bufsize=0,
86          mode="r", openhook=None):
87    """Return an instance of the FileInput class, which can be iterated.
88
89    The parameters are passed to the constructor of the FileInput class.
90    The returned instance, in addition to being an iterator,
91    keeps global state for the functions of this module,.
92    """
93    global _state
94    if _state and _state._file:
95        raise RuntimeError, "input() already active"
96    _state = FileInput(files, inplace, backup, bufsize, mode, openhook)
97    return _state
98
99def close():
100    """Close the sequence."""
101    global _state
102    state = _state
103    _state = None
104    if state:
105        state.close()
106
107def nextfile():
108    """
109    Close the current file so that the next iteration will read the first
110    line from the next file (if any); lines not read from the file will
111    not count towards the cumulative line count. The filename is not
112    changed until after the first line of the next file has been read.
113    Before the first line has been read, this function has no effect;
114    it cannot be used to skip the first file. After the last line of the
115    last file has been read, this function has no effect.
116    """
117    if not _state:
118        raise RuntimeError, "no active input()"
119    return _state.nextfile()
120
121def filename():
122    """
123    Return the name of the file currently being read.
124    Before the first line has been read, returns None.
125    """
126    if not _state:
127        raise RuntimeError, "no active input()"
128    return _state.filename()
129
130def lineno():
131    """
132    Return the cumulative line number of the line that has just been read.
133    Before the first line has been read, returns 0. After the last line
134    of the last file has been read, returns the line number of that line.
135    """
136    if not _state:
137        raise RuntimeError, "no active input()"
138    return _state.lineno()
139
140def filelineno():
141    """
142    Return the line number in the current file. Before the first line
143    has been read, returns 0. After the last line of the last file has
144    been read, returns the line number of that line within the file.
145    """
146    if not _state:
147        raise RuntimeError, "no active input()"
148    return _state.filelineno()
149
150def fileno():
151    """
152    Return the file number of the current file. When no file is currently
153    opened, returns -1.
154    """
155    if not _state:
156        raise RuntimeError, "no active input()"
157    return _state.fileno()
158
159def isfirstline():
160    """
161    Returns true the line just read is the first line of its file,
162    otherwise returns false.
163    """
164    if not _state:
165        raise RuntimeError, "no active input()"
166    return _state.isfirstline()
167
168def isstdin():
169    """
170    Returns true if the last line was read from sys.stdin,
171    otherwise returns false.
172    """
173    if not _state:
174        raise RuntimeError, "no active input()"
175    return _state.isstdin()
176
177class FileInput:
178    """FileInput([files[, inplace[, backup[, bufsize[, mode[, openhook]]]]]])
179
180    Class FileInput is the implementation of the module; its methods
181    filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
182    nextfile() and close() correspond to the functions of the same name
183    in the module.
184    In addition it has a readline() method which returns the next
185    input line, and a __getitem__() method which implements the
186    sequence behavior. The sequence must be accessed in strictly
187    sequential order; random access and readline() cannot be mixed.
188    """
189
190    def __init__(self, files=None, inplace=0, backup="", bufsize=0,
191                 mode="r", openhook=None):
192        if isinstance(files, basestring):
193            files = (files,)
194        else:
195            if files is None:
196                files = sys.argv[1:]
197            if not files:
198                files = ('-',)
199            else:
200                files = tuple(files)
201        self._files = files
202        self._inplace = inplace
203        self._backup = backup
204        self._savestdout = None
205        self._output = None
206        self._filename = None
207        self._startlineno = 0
208        self._filelineno = 0
209        self._file = None
210        self._isstdin = False
211        self._backupfilename = None
212        # restrict mode argument to reading modes
213        if mode not in ('r', 'rU', 'U', 'rb'):
214            raise ValueError("FileInput opening mode must be one of "
215                             "'r', 'rU', 'U' and 'rb'")
216        self._mode = mode
217        if inplace and openhook:
218            raise ValueError("FileInput cannot use an opening hook in inplace mode")
219        elif openhook and not hasattr(openhook, '__call__'):
220            raise ValueError("FileInput openhook must be callable")
221        self._openhook = openhook
222
223    def __del__(self):
224        self.close()
225
226    def close(self):
227        try:
228            self.nextfile()
229        finally:
230            self._files = ()
231
232    def __iter__(self):
233        return self
234
235    def next(self):
236        while 1:
237            line = self._readline()
238            if line:
239                self._filelineno += 1
240                return line
241            if not self._file:
242                raise StopIteration
243            self.nextfile()
244            # repeat with next file
245
246    def __getitem__(self, i):
247        if i != self.lineno():
248            raise RuntimeError, "accessing lines out of order"
249        try:
250            return self.next()
251        except StopIteration:
252            raise IndexError, "end of input reached"
253
254    def nextfile(self):
255        savestdout = self._savestdout
256        self._savestdout = 0
257        if savestdout:
258            sys.stdout = savestdout
259
260        output = self._output
261        self._output = 0
262        try:
263            if output:
264                output.close()
265        finally:
266            file = self._file
267            self._file = None
268            try:
269                del self._readline  # restore FileInput._readline
270            except AttributeError:
271                pass
272            try:
273                if file and not self._isstdin:
274                    file.close()
275            finally:
276                backupfilename = self._backupfilename
277                self._backupfilename = 0
278                if backupfilename and not self._backup:
279                    try: os.unlink(backupfilename)
280                    except OSError: pass
281
282                self._isstdin = False
283
284    def readline(self):
285        while 1:
286            line = self._readline()
287            if line:
288                self._filelineno += 1
289                return line
290            if not self._file:
291                return line
292            self.nextfile()
293            # repeat with next file
294
295    def _readline(self):
296        if not self._files:
297            return ""
298        self._filename = self._files[0]
299        self._files = self._files[1:]
300        self._startlineno = self.lineno()
301        self._filelineno = 0
302        self._file = None
303        self._isstdin = False
304        self._backupfilename = 0
305        if self._filename == '-':
306            self._filename = '<stdin>'
307            self._file = sys.stdin
308            self._isstdin = True
309        else:
310            if self._inplace:
311                self._backupfilename = (
312                    self._filename + (self._backup or os.extsep+"bak"))
313                try: os.unlink(self._backupfilename)
314                except os.error: pass
315                # The next few lines may raise IOError
316                os.rename(self._filename, self._backupfilename)
317                self._file = open(self._backupfilename, self._mode)
318                try:
319                    perm = os.fstat(self._file.fileno()).st_mode
320                except OSError:
321                    self._output = open(self._filename, "w")
322                else:
323                    fd = os.open(self._filename,
324                                    os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
325                                    perm)
326                    self._output = os.fdopen(fd, "w")
327                    try:
328                        if hasattr(os, 'chmod'):
329                            os.chmod(self._filename, perm)
330                    except OSError:
331                        pass
332                self._savestdout = sys.stdout
333                sys.stdout = self._output
334            else:
335                # This may raise IOError
336                if self._openhook:
337                    self._file = self._openhook(self._filename, self._mode)
338                else:
339                    self._file = open(self._filename, self._mode)
340
341        self._readline = self._file.readline  # hide FileInput._readline
342        return self._readline()
343
344    def filename(self):
345        return self._filename
346
347    def lineno(self):
348        return self._startlineno + self._filelineno
349
350    def filelineno(self):
351        return self._filelineno
352
353    def fileno(self):
354        if self._file:
355            try:
356                return self._file.fileno()
357            except ValueError:
358                return -1
359        else:
360            return -1
361
362    def isfirstline(self):
363        return self._filelineno == 1
364
365    def isstdin(self):
366        return self._isstdin
367
368
369def hook_compressed(filename, mode):
370    ext = os.path.splitext(filename)[1]
371    if ext == '.gz':
372        import gzip
373        return gzip.open(filename, mode)
374    elif ext == '.bz2':
375        import bz2
376        return bz2.BZ2File(filename, mode)
377    else:
378        return open(filename, mode)
379
380
381def hook_encoded(encoding):
382    import io
383    def openhook(filename, mode):
384        mode = mode.replace('U', '').replace('b', '') or 'r'
385        return io.open(filename, mode, encoding=encoding, newline='')
386    return openhook
387
388
389def _test():
390    import getopt
391    inplace = 0
392    backup = 0
393    opts, args = getopt.getopt(sys.argv[1:], "ib:")
394    for o, a in opts:
395        if o == '-i': inplace = 1
396        if o == '-b': backup = a
397    for line in input(args, inplace=inplace, backup=backup):
398        if line[-1:] == '\n': line = line[:-1]
399        if line[-1:] == '\r': line = line[:-1]
400        print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
401                                   isfirstline() and "*" or "", line)
402    print "%d: %s[%d]" % (lineno(), filename(), filelineno())
403
404if __name__ == '__main__':
405    _test()
406