• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 """Helper class to quickly write a loop over all standard input files.
2 
3 Typical use is:
4 
5     import fileinput
6     for line in fileinput.input():
7         process(line)
8 
9 This iterates over the lines of all files listed in sys.argv[1:],
10 defaulting to sys.stdin if the list is empty.  If a filename is '-' it
11 is also replaced by sys.stdin.  To specify an alternative list of
12 filenames, pass it as the argument to input().  A single file name is
13 also allowed.
14 
15 Functions filename(), lineno() return the filename and cumulative line
16 number of the line that has just been read; filelineno() returns its
17 line number in the current file; isfirstline() returns true iff the
18 line just read is the first line of its file; isstdin() returns true
19 iff the line was read from sys.stdin.  Function nextfile() closes the
20 current file so that the next iteration will read the first line from
21 the next file (if any); lines not read from the file will not count
22 towards the cumulative line count; the filename is not changed until
23 after the first line of the next file has been read.  Function close()
24 closes the sequence.
25 
26 Before any lines have been read, filename() returns None and both line
27 numbers are zero; nextfile() has no effect.  After all lines have been
28 read, filename() and the line number functions return the values
29 pertaining to the last line read; nextfile() has no effect.
30 
31 All files are opened in text mode by default, you can override this by
32 setting the mode parameter to input() or FileInput.__init__().
33 If an I/O error occurs during opening or reading a file, the OSError
34 exception is raised.
35 
36 If sys.stdin is used more than once, the second and further use will
37 return no lines, except perhaps for interactive use, or if it has been
38 explicitly reset (e.g. using sys.stdin.seek(0)).
39 
40 Empty files are opened and immediately closed; the only time their
41 presence in the list of filenames is noticeable at all is when the
42 last file opened is empty.
43 
44 It is possible that the last line of a file doesn't end in a newline
45 character; otherwise lines are returned including the trailing
46 newline.
47 
48 Class FileInput is the implementation; its methods filename(),
49 lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
50 correspond to the functions in the module.  In addition it has a
51 readline() method which returns the next input line, and a
52 __getitem__() method which implements the sequence behavior.  The
53 sequence must be accessed in strictly sequential order; sequence
54 access and readline() cannot be mixed.
55 
56 Optional in-place filtering: if the keyword argument inplace=1 is
57 passed to input() or to the FileInput constructor, the file is moved
58 to a backup file and standard output is directed to the input file.
59 This makes it possible to write a filter that rewrites its input file
60 in place.  If the keyword argument backup=".<some extension>" is also
61 given, it specifies the extension for the backup file, and the backup
62 file remains around; by default, the extension is ".bak" and it is
63 deleted when the output file is closed.  In-place filtering is
64 disabled when standard input is read.  XXX The current implementation
65 does not work for MS-DOS 8+3 filesystems.
66 
67 XXX Possible additions:
68 
69 - optional getopt argument processing
70 - isatty()
71 - read(), read(size), even readlines()
72 
73 """
74 
75 import sys, os
76 
77 __all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno",
78            "fileno", "isfirstline", "isstdin", "FileInput", "hook_compressed",
79            "hook_encoded"]
80 
81 _state = None
82 
83 def input(files=None, inplace=False, backup="", bufsize=0,
84           mode="r", openhook=None):
85     """Return an instance of the FileInput class, which can be iterated.
86 
87     The parameters are passed to the constructor of the FileInput class.
88     The returned instance, in addition to being an iterator,
89     keeps global state for the functions of this module,.
90     """
91     global _state
92     if _state and _state._file:
93         raise RuntimeError("input() already active")
94     _state = FileInput(files, inplace, backup, bufsize, mode, openhook)
95     return _state
96 
97 def close():
98     """Close the sequence."""
99     global _state
100     state = _state
101     _state = None
102     if state:
103         state.close()
104 
105 def nextfile():
106     """
107     Close the current file so that the next iteration will read the first
108     line from the next file (if any); lines not read from the file will
109     not count towards the cumulative line count. The filename is not
110     changed until after the first line of the next file has been read.
111     Before the first line has been read, this function has no effect;
112     it cannot be used to skip the first file. After the last line of the
113     last file has been read, this function has no effect.
114     """
115     if not _state:
116         raise RuntimeError("no active input()")
117     return _state.nextfile()
118 
119 def filename():
120     """
121     Return the name of the file currently being read.
122     Before the first line has been read, returns None.
123     """
124     if not _state:
125         raise RuntimeError("no active input()")
126     return _state.filename()
127 
128 def lineno():
129     """
130     Return the cumulative line number of the line that has just been read.
131     Before the first line has been read, returns 0. After the last line
132     of the last file has been read, returns the line number of that line.
133     """
134     if not _state:
135         raise RuntimeError("no active input()")
136     return _state.lineno()
137 
138 def filelineno():
139     """
140     Return the line number in the current file. Before the first line
141     has been read, returns 0. After the last line of the last file has
142     been read, returns the line number of that line within the file.
143     """
144     if not _state:
145         raise RuntimeError("no active input()")
146     return _state.filelineno()
147 
148 def fileno():
149     """
150     Return the file number of the current file. When no file is currently
151     opened, returns -1.
152     """
153     if not _state:
154         raise RuntimeError("no active input()")
155     return _state.fileno()
156 
157 def isfirstline():
158     """
159     Returns true the line just read is the first line of its file,
160     otherwise returns false.
161     """
162     if not _state:
163         raise RuntimeError("no active input()")
164     return _state.isfirstline()
165 
166 def isstdin():
167     """
168     Returns true if the last line was read from sys.stdin,
169     otherwise returns false.
170     """
171     if not _state:
172         raise RuntimeError("no active input()")
173     return _state.isstdin()
174 
175 class FileInput:
176     """FileInput([files[, inplace[, backup[, bufsize, [, mode[, openhook]]]]]])
177 
178     Class FileInput is the implementation of the module; its methods
179     filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
180     nextfile() and close() correspond to the functions of the same name
181     in the module.
182     In addition it has a readline() method which returns the next
183     input line, and a __getitem__() method which implements the
184     sequence behavior. The sequence must be accessed in strictly
185     sequential order; random access and readline() cannot be mixed.
186     """
187 
188     def __init__(self, files=None, inplace=False, backup="", bufsize=0,
189                  mode="r", openhook=None):
190         if isinstance(files, str):
191             files = (files,)
192         else:
193             if files is None:
194                 files = sys.argv[1:]
195             if not files:
196                 files = ('-',)
197             else:
198                 files = tuple(files)
199         self._files = files
200         self._inplace = inplace
201         self._backup = backup
202         if bufsize:
203             import warnings
204             warnings.warn('bufsize is deprecated and ignored',
205                           DeprecationWarning, stacklevel=2)
206         self._savestdout = None
207         self._output = None
208         self._filename = None
209         self._startlineno = 0
210         self._filelineno = 0
211         self._file = None
212         self._isstdin = False
213         self._backupfilename = None
214         # restrict mode argument to reading modes
215         if mode not in ('r', 'rU', 'U', 'rb'):
216             raise ValueError("FileInput opening mode must be one of "
217                              "'r', 'rU', 'U' and 'rb'")
218         if 'U' in mode:
219             import warnings
220             warnings.warn("'U' mode is deprecated",
221                           DeprecationWarning, 2)
222         self._mode = mode
223         if openhook:
224             if inplace:
225                 raise ValueError("FileInput cannot use an opening hook in inplace mode")
226             if not callable(openhook):
227                 raise ValueError("FileInput openhook must be callable")
228         self._openhook = openhook
229 
230     def __del__(self):
231         self.close()
232 
233     def close(self):
234         try:
235             self.nextfile()
236         finally:
237             self._files = ()
238 
239     def __enter__(self):
240         return self
241 
242     def __exit__(self, type, value, traceback):
243         self.close()
244 
245     def __iter__(self):
246         return self
247 
248     def __next__(self):
249         while True:
250             line = self._readline()
251             if line:
252                 self._filelineno += 1
253                 return line
254             if not self._file:
255                 raise StopIteration
256             self.nextfile()
257             # repeat with next file
258 
259     def __getitem__(self, i):
260         if i != self.lineno():
261             raise RuntimeError("accessing lines out of order")
262         try:
263             return self.__next__()
264         except StopIteration:
265             raise IndexError("end of input reached")
266 
267     def nextfile(self):
268         savestdout = self._savestdout
269         self._savestdout = None
270         if savestdout:
271             sys.stdout = savestdout
272 
273         output = self._output
274         self._output = None
275         try:
276             if output:
277                 output.close()
278         finally:
279             file = self._file
280             self._file = None
281             try:
282                 del self._readline  # restore FileInput._readline
283             except AttributeError:
284                 pass
285             try:
286                 if file and not self._isstdin:
287                     file.close()
288             finally:
289                 backupfilename = self._backupfilename
290                 self._backupfilename = None
291                 if backupfilename and not self._backup:
292                     try: os.unlink(backupfilename)
293                     except OSError: pass
294 
295                 self._isstdin = False
296 
297     def readline(self):
298         while True:
299             line = self._readline()
300             if line:
301                 self._filelineno += 1
302                 return line
303             if not self._file:
304                 return line
305             self.nextfile()
306             # repeat with next file
307 
308     def _readline(self):
309         if not self._files:
310             if 'b' in self._mode:
311                 return b''
312             else:
313                 return ''
314         self._filename = self._files[0]
315         self._files = self._files[1:]
316         self._startlineno = self.lineno()
317         self._filelineno = 0
318         self._file = None
319         self._isstdin = False
320         self._backupfilename = 0
321         if self._filename == '-':
322             self._filename = '<stdin>'
323             if 'b' in self._mode:
324                 self._file = getattr(sys.stdin, 'buffer', sys.stdin)
325             else:
326                 self._file = sys.stdin
327             self._isstdin = True
328         else:
329             if self._inplace:
330                 self._backupfilename = (
331                     self._filename + (self._backup or ".bak"))
332                 try:
333                     os.unlink(self._backupfilename)
334                 except OSError:
335                     pass
336                 # The next few lines may raise OSError
337                 os.rename(self._filename, self._backupfilename)
338                 self._file = open(self._backupfilename, self._mode)
339                 try:
340                     perm = os.fstat(self._file.fileno()).st_mode
341                 except OSError:
342                     self._output = open(self._filename, "w")
343                 else:
344                     mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC
345                     if hasattr(os, 'O_BINARY'):
346                         mode |= os.O_BINARY
347 
348                     fd = os.open(self._filename, mode, perm)
349                     self._output = os.fdopen(fd, "w")
350                     try:
351                         if hasattr(os, 'chmod'):
352                             os.chmod(self._filename, perm)
353                     except OSError:
354                         pass
355                 self._savestdout = sys.stdout
356                 sys.stdout = self._output
357             else:
358                 # This may raise OSError
359                 if self._openhook:
360                     self._file = self._openhook(self._filename, self._mode)
361                 else:
362                     self._file = open(self._filename, self._mode)
363         self._readline = self._file.readline  # hide FileInput._readline
364         return self._readline()
365 
366     def filename(self):
367         return self._filename
368 
369     def lineno(self):
370         return self._startlineno + self._filelineno
371 
372     def filelineno(self):
373         return self._filelineno
374 
375     def fileno(self):
376         if self._file:
377             try:
378                 return self._file.fileno()
379             except ValueError:
380                 return -1
381         else:
382             return -1
383 
384     def isfirstline(self):
385         return self._filelineno == 1
386 
387     def isstdin(self):
388         return self._isstdin
389 
390 
391 def hook_compressed(filename, mode):
392     ext = os.path.splitext(filename)[1]
393     if ext == '.gz':
394         import gzip
395         return gzip.open(filename, mode)
396     elif ext == '.bz2':
397         import bz2
398         return bz2.BZ2File(filename, mode)
399     else:
400         return open(filename, mode)
401 
402 
403 def hook_encoded(encoding, errors=None):
404     def openhook(filename, mode):
405         return open(filename, mode, encoding=encoding, errors=errors)
406     return openhook
407 
408 
409 def _test():
410     import getopt
411     inplace = False
412     backup = False
413     opts, args = getopt.getopt(sys.argv[1:], "ib:")
414     for o, a in opts:
415         if o == '-i': inplace = True
416         if o == '-b': backup = a
417     for line in input(args, inplace=inplace, backup=backup):
418         if line[-1:] == '\n': line = line[:-1]
419         if line[-1:] == '\r': line = line[:-1]
420         print("%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
421                                    isfirstline() and "*" or "", line))
422     print("%d: %s[%d]" % (lineno(), filename(), filelineno()))
423 
424 if __name__ == '__main__':
425     _test()
426