1"""Helper class to quickly write a loop over all standard input files. 2 3Typical use is: 4 5 import fileinput 6 for line in fileinput.input(): 7 process(line) 8 9This iterates over the lines of all files listed in sys.argv[1:], 10defaulting to sys.stdin if the list is empty. If a filename is '-' it 11is also replaced by sys.stdin. To specify an alternative list of 12filenames, pass it as the argument to input(). A single file name is 13also allowed. 14 15Functions filename(), lineno() return the filename and cumulative line 16number of the line that has just been read; filelineno() returns its 17line number in the current file; isfirstline() returns true iff the 18line just read is the first line of its file; isstdin() returns true 19iff the line was read from sys.stdin. Function nextfile() closes the 20current file so that the next iteration will read the first line from 21the next file (if any); lines not read from the file will not count 22towards the cumulative line count; the filename is not changed until 23after the first line of the next file has been read. Function close() 24closes the sequence. 25 26Before any lines have been read, filename() returns None and both line 27numbers are zero; nextfile() has no effect. After all lines have been 28read, filename() and the line number functions return the values 29pertaining to the last line read; nextfile() has no effect. 30 31All files are opened in text mode by default, you can override this by 32setting the mode parameter to input() or FileInput.__init__(). 33If an I/O error occurs during opening or reading a file, the IOError 34exception is raised. 35 36If sys.stdin is used more than once, the second and further use will 37return no lines, except perhaps for interactive use, or if it has been 38explicitly reset (e.g. using sys.stdin.seek(0)). 39 40Empty files are opened and immediately closed; the only time their 41presence in the list of filenames is noticeable at all is when the 42last file opened is empty. 43 44It is possible that the last line of a file doesn't end in a newline 45character; otherwise lines are returned including the trailing 46newline. 47 48Class FileInput is the implementation; its methods filename(), 49lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close() 50correspond to the functions in the module. In addition it has a 51readline() method which returns the next input line, and a 52__getitem__() method which implements the sequence behavior. The 53sequence must be accessed in strictly sequential order; sequence 54access and readline() cannot be mixed. 55 56Optional in-place filtering: if the keyword argument inplace=1 is 57passed to input() or to the FileInput constructor, the file is moved 58to a backup file and standard output is directed to the input file. 59This makes it possible to write a filter that rewrites its input file 60in place. If the keyword argument backup=".<some extension>" is also 61given, it specifies the extension for the backup file, and the backup 62file remains around; by default, the extension is ".bak" and it is 63deleted when the output file is closed. In-place filtering is 64disabled when standard input is read. XXX The current implementation 65does not work for MS-DOS 8+3 filesystems. 66 67Performance: this module is unfortunately one of the slower ways of 68processing large numbers of input lines. Nevertheless, a significant 69speed-up has been obtained by using readlines(bufsize) instead of 70readline(). A new keyword argument, bufsize=N, is present on the 71input() function and the FileInput() class to override the default 72buffer size. 73 74XXX Possible additions: 75 76- optional getopt argument processing 77- isatty() 78- read(), read(size), even readlines() 79 80""" 81 82import sys, os 83 84__all__ = ["input","close","nextfile","filename","lineno","filelineno", 85 "isfirstline","isstdin","FileInput"] 86 87_state = None 88 89DEFAULT_BUFSIZE = 8*1024 90 91def input(files=None, inplace=0, backup="", bufsize=0, 92 mode="r", openhook=None): 93 """Return an instance of the FileInput class, which can be iterated. 94 95 The parameters are passed to the constructor of the FileInput class. 96 The returned instance, in addition to being an iterator, 97 keeps global state for the functions of this module,. 98 """ 99 global _state 100 if _state and _state._file: 101 raise RuntimeError, "input() already active" 102 _state = FileInput(files, inplace, backup, bufsize, mode, openhook) 103 return _state 104 105def close(): 106 """Close the sequence.""" 107 global _state 108 state = _state 109 _state = None 110 if state: 111 state.close() 112 113def nextfile(): 114 """ 115 Close the current file so that the next iteration will read the first 116 line from the next file (if any); lines not read from the file will 117 not count towards the cumulative line count. The filename is not 118 changed until after the first line of the next file has been read. 119 Before the first line has been read, this function has no effect; 120 it cannot be used to skip the first file. After the last line of the 121 last file has been read, this function has no effect. 122 """ 123 if not _state: 124 raise RuntimeError, "no active input()" 125 return _state.nextfile() 126 127def filename(): 128 """ 129 Return the name of the file currently being read. 130 Before the first line has been read, returns None. 131 """ 132 if not _state: 133 raise RuntimeError, "no active input()" 134 return _state.filename() 135 136def lineno(): 137 """ 138 Return the cumulative line number of the line that has just been read. 139 Before the first line has been read, returns 0. After the last line 140 of the last file has been read, returns the line number of that line. 141 """ 142 if not _state: 143 raise RuntimeError, "no active input()" 144 return _state.lineno() 145 146def filelineno(): 147 """ 148 Return the line number in the current file. Before the first line 149 has been read, returns 0. After the last line of the last file has 150 been read, returns the line number of that line within the file. 151 """ 152 if not _state: 153 raise RuntimeError, "no active input()" 154 return _state.filelineno() 155 156def fileno(): 157 """ 158 Return the file number of the current file. When no file is currently 159 opened, returns -1. 160 """ 161 if not _state: 162 raise RuntimeError, "no active input()" 163 return _state.fileno() 164 165def isfirstline(): 166 """ 167 Returns true the line just read is the first line of its file, 168 otherwise returns false. 169 """ 170 if not _state: 171 raise RuntimeError, "no active input()" 172 return _state.isfirstline() 173 174def isstdin(): 175 """ 176 Returns true if the last line was read from sys.stdin, 177 otherwise returns false. 178 """ 179 if not _state: 180 raise RuntimeError, "no active input()" 181 return _state.isstdin() 182 183class FileInput: 184 """FileInput([files[, inplace[, backup[, bufsize[, mode[, openhook]]]]]]) 185 186 Class FileInput is the implementation of the module; its methods 187 filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(), 188 nextfile() and close() correspond to the functions of the same name 189 in the module. 190 In addition it has a readline() method which returns the next 191 input line, and a __getitem__() method which implements the 192 sequence behavior. The sequence must be accessed in strictly 193 sequential order; random access and readline() cannot be mixed. 194 """ 195 196 def __init__(self, files=None, inplace=0, backup="", bufsize=0, 197 mode="r", openhook=None): 198 if isinstance(files, basestring): 199 files = (files,) 200 else: 201 if files is None: 202 files = sys.argv[1:] 203 if not files: 204 files = ('-',) 205 else: 206 files = tuple(files) 207 self._files = files 208 self._inplace = inplace 209 self._backup = backup 210 self._bufsize = bufsize or DEFAULT_BUFSIZE 211 self._savestdout = None 212 self._output = None 213 self._filename = None 214 self._lineno = 0 215 self._filelineno = 0 216 self._file = None 217 self._isstdin = False 218 self._backupfilename = None 219 self._buffer = [] 220 self._bufindex = 0 221 # restrict mode argument to reading modes 222 if mode not in ('r', 'rU', 'U', 'rb'): 223 raise ValueError("FileInput opening mode must be one of " 224 "'r', 'rU', 'U' and 'rb'") 225 self._mode = mode 226 if inplace and openhook: 227 raise ValueError("FileInput cannot use an opening hook in inplace mode") 228 elif openhook and not hasattr(openhook, '__call__'): 229 raise ValueError("FileInput openhook must be callable") 230 self._openhook = openhook 231 232 def __del__(self): 233 self.close() 234 235 def close(self): 236 try: 237 self.nextfile() 238 finally: 239 self._files = () 240 241 def __iter__(self): 242 return self 243 244 def next(self): 245 try: 246 line = self._buffer[self._bufindex] 247 except IndexError: 248 pass 249 else: 250 self._bufindex += 1 251 self._lineno += 1 252 self._filelineno += 1 253 return line 254 line = self.readline() 255 if not line: 256 raise StopIteration 257 return line 258 259 def __getitem__(self, i): 260 if i != self._lineno: 261 raise RuntimeError, "accessing lines out of order" 262 try: 263 return self.next() 264 except StopIteration: 265 raise IndexError, "end of input reached" 266 267 def nextfile(self): 268 savestdout = self._savestdout 269 self._savestdout = 0 270 if savestdout: 271 sys.stdout = savestdout 272 273 output = self._output 274 self._output = 0 275 try: 276 if output: 277 output.close() 278 finally: 279 file = self._file 280 self._file = 0 281 try: 282 if file and not self._isstdin: 283 file.close() 284 finally: 285 backupfilename = self._backupfilename 286 self._backupfilename = 0 287 if backupfilename and not self._backup: 288 try: os.unlink(backupfilename) 289 except OSError: pass 290 291 self._isstdin = False 292 self._buffer = [] 293 self._bufindex = 0 294 295 def readline(self): 296 try: 297 line = self._buffer[self._bufindex] 298 except IndexError: 299 pass 300 else: 301 self._bufindex += 1 302 self._lineno += 1 303 self._filelineno += 1 304 return line 305 if not self._file: 306 if not self._files: 307 return "" 308 self._filename = self._files[0] 309 self._files = self._files[1:] 310 self._filelineno = 0 311 self._file = None 312 self._isstdin = False 313 self._backupfilename = 0 314 if self._filename == '-': 315 self._filename = '<stdin>' 316 self._file = sys.stdin 317 self._isstdin = True 318 else: 319 if self._inplace: 320 self._backupfilename = ( 321 self._filename + (self._backup or os.extsep+"bak")) 322 try: os.unlink(self._backupfilename) 323 except os.error: pass 324 # The next few lines may raise IOError 325 os.rename(self._filename, self._backupfilename) 326 self._file = open(self._backupfilename, self._mode) 327 try: 328 perm = os.fstat(self._file.fileno()).st_mode 329 except OSError: 330 self._output = open(self._filename, "w") 331 else: 332 fd = os.open(self._filename, 333 os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 334 perm) 335 self._output = os.fdopen(fd, "w") 336 try: 337 if hasattr(os, 'chmod'): 338 os.chmod(self._filename, perm) 339 except OSError: 340 pass 341 self._savestdout = sys.stdout 342 sys.stdout = self._output 343 else: 344 # This may raise IOError 345 if self._openhook: 346 self._file = self._openhook(self._filename, self._mode) 347 else: 348 self._file = open(self._filename, self._mode) 349 self._buffer = self._file.readlines(self._bufsize) 350 self._bufindex = 0 351 if not self._buffer: 352 self.nextfile() 353 # Recursive call 354 return self.readline() 355 356 def filename(self): 357 return self._filename 358 359 def lineno(self): 360 return self._lineno 361 362 def filelineno(self): 363 return self._filelineno 364 365 def fileno(self): 366 if self._file: 367 try: 368 return self._file.fileno() 369 except ValueError: 370 return -1 371 else: 372 return -1 373 374 def isfirstline(self): 375 return self._filelineno == 1 376 377 def isstdin(self): 378 return self._isstdin 379 380 381def hook_compressed(filename, mode): 382 ext = os.path.splitext(filename)[1] 383 if ext == '.gz': 384 import gzip 385 return gzip.open(filename, mode) 386 elif ext == '.bz2': 387 import bz2 388 return bz2.BZ2File(filename, mode) 389 else: 390 return open(filename, mode) 391 392 393def hook_encoded(encoding): 394 import io 395 def openhook(filename, mode): 396 mode = mode.replace('U', '').replace('b', '') or 'r' 397 return io.open(filename, mode, encoding=encoding, newline='') 398 return openhook 399 400 401def _test(): 402 import getopt 403 inplace = 0 404 backup = 0 405 opts, args = getopt.getopt(sys.argv[1:], "ib:") 406 for o, a in opts: 407 if o == '-i': inplace = 1 408 if o == '-b': backup = a 409 for line in input(args, inplace=inplace, backup=backup): 410 if line[-1:] == '\n': line = line[:-1] 411 if line[-1:] == '\r': line = line[:-1] 412 print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(), 413 isfirstline() and "*" or "", line) 414 print "%d: %s[%d]" % (lineno(), filename(), filelineno()) 415 416if __name__ == '__main__': 417 _test() 418