1"""Helper class to quickly write a loop over all standard input files. 2 3Typical use is: 4 5 import fileinput 6 for line in fileinput.input(): 7 process(line) 8 9This iterates over the lines of all files listed in sys.argv[1:], 10defaulting to sys.stdin if the list is empty. If a filename is '-' it 11is also replaced by sys.stdin. To specify an alternative list of 12filenames, pass it as the argument to input(). A single file name is 13also allowed. 14 15Functions filename(), lineno() return the filename and cumulative line 16number of the line that has just been read; filelineno() returns its 17line number in the current file; isfirstline() returns true iff the 18line just read is the first line of its file; isstdin() returns true 19iff the line was read from sys.stdin. Function nextfile() closes the 20current file so that the next iteration will read the first line from 21the next file (if any); lines not read from the file will not count 22towards the cumulative line count; the filename is not changed until 23after the first line of the next file has been read. Function close() 24closes the sequence. 25 26Before any lines have been read, filename() returns None and both line 27numbers are zero; nextfile() has no effect. After all lines have been 28read, filename() and the line number functions return the values 29pertaining to the last line read; nextfile() has no effect. 30 31All files are opened in text mode by default, you can override this by 32setting the mode parameter to input() or FileInput.__init__(). 33If an I/O error occurs during opening or reading a file, the OSError 34exception is raised. 35 36If sys.stdin is used more than once, the second and further use will 37return no lines, except perhaps for interactive use, or if it has been 38explicitly reset (e.g. using sys.stdin.seek(0)). 39 40Empty files are opened and immediately closed; the only time their 41presence in the list of filenames is noticeable at all is when the 42last file opened is empty. 43 44It is possible that the last line of a file doesn't end in a newline 45character; otherwise lines are returned including the trailing 46newline. 47 48Class FileInput is the implementation; its methods filename(), 49lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close() 50correspond to the functions in the module. In addition it has a 51readline() method which returns the next input line, and a 52__getitem__() method which implements the sequence behavior. The 53sequence must be accessed in strictly sequential order; sequence 54access and readline() cannot be mixed. 55 56Optional in-place filtering: if the keyword argument inplace=1 is 57passed to input() or to the FileInput constructor, the file is moved 58to a backup file and standard output is directed to the input file. 59This makes it possible to write a filter that rewrites its input file 60in place. If the keyword argument backup=".<some extension>" is also 61given, it specifies the extension for the backup file, and the backup 62file remains around; by default, the extension is ".bak" and it is 63deleted when the output file is closed. In-place filtering is 64disabled when standard input is read. XXX The current implementation 65does not work for MS-DOS 8+3 filesystems. 66 67XXX Possible additions: 68 69- optional getopt argument processing 70- isatty() 71- read(), read(size), even readlines() 72 73""" 74 75import sys, os 76 77__all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno", 78 "fileno", "isfirstline", "isstdin", "FileInput", "hook_compressed", 79 "hook_encoded"] 80 81_state = None 82 83def input(files=None, inplace=False, backup="", bufsize=0, 84 mode="r", openhook=None): 85 """Return an instance of the FileInput class, which can be iterated. 86 87 The parameters are passed to the constructor of the FileInput class. 88 The returned instance, in addition to being an iterator, 89 keeps global state for the functions of this module,. 90 """ 91 global _state 92 if _state and _state._file: 93 raise RuntimeError("input() already active") 94 _state = FileInput(files, inplace, backup, bufsize, mode, openhook) 95 return _state 96 97def close(): 98 """Close the sequence.""" 99 global _state 100 state = _state 101 _state = None 102 if state: 103 state.close() 104 105def nextfile(): 106 """ 107 Close the current file so that the next iteration will read the first 108 line from the next file (if any); lines not read from the file will 109 not count towards the cumulative line count. The filename is not 110 changed until after the first line of the next file has been read. 111 Before the first line has been read, this function has no effect; 112 it cannot be used to skip the first file. After the last line of the 113 last file has been read, this function has no effect. 114 """ 115 if not _state: 116 raise RuntimeError("no active input()") 117 return _state.nextfile() 118 119def filename(): 120 """ 121 Return the name of the file currently being read. 122 Before the first line has been read, returns None. 123 """ 124 if not _state: 125 raise RuntimeError("no active input()") 126 return _state.filename() 127 128def lineno(): 129 """ 130 Return the cumulative line number of the line that has just been read. 131 Before the first line has been read, returns 0. After the last line 132 of the last file has been read, returns the line number of that line. 133 """ 134 if not _state: 135 raise RuntimeError("no active input()") 136 return _state.lineno() 137 138def filelineno(): 139 """ 140 Return the line number in the current file. Before the first line 141 has been read, returns 0. After the last line of the last file has 142 been read, returns the line number of that line within the file. 143 """ 144 if not _state: 145 raise RuntimeError("no active input()") 146 return _state.filelineno() 147 148def fileno(): 149 """ 150 Return the file number of the current file. When no file is currently 151 opened, returns -1. 152 """ 153 if not _state: 154 raise RuntimeError("no active input()") 155 return _state.fileno() 156 157def isfirstline(): 158 """ 159 Returns true the line just read is the first line of its file, 160 otherwise returns false. 161 """ 162 if not _state: 163 raise RuntimeError("no active input()") 164 return _state.isfirstline() 165 166def isstdin(): 167 """ 168 Returns true if the last line was read from sys.stdin, 169 otherwise returns false. 170 """ 171 if not _state: 172 raise RuntimeError("no active input()") 173 return _state.isstdin() 174 175class FileInput: 176 """FileInput([files[, inplace[, backup[, bufsize, [, mode[, openhook]]]]]]) 177 178 Class FileInput is the implementation of the module; its methods 179 filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(), 180 nextfile() and close() correspond to the functions of the same name 181 in the module. 182 In addition it has a readline() method which returns the next 183 input line, and a __getitem__() method which implements the 184 sequence behavior. The sequence must be accessed in strictly 185 sequential order; random access and readline() cannot be mixed. 186 """ 187 188 def __init__(self, files=None, inplace=False, backup="", bufsize=0, 189 mode="r", openhook=None): 190 if isinstance(files, str): 191 files = (files,) 192 elif isinstance(files, os.PathLike): 193 files = (os.fspath(files), ) 194 else: 195 if files is None: 196 files = sys.argv[1:] 197 if not files: 198 files = ('-',) 199 else: 200 files = tuple(files) 201 self._files = files 202 self._inplace = inplace 203 self._backup = backup 204 if bufsize: 205 import warnings 206 warnings.warn('bufsize is deprecated and ignored', 207 DeprecationWarning, stacklevel=2) 208 self._savestdout = None 209 self._output = None 210 self._filename = None 211 self._startlineno = 0 212 self._filelineno = 0 213 self._file = None 214 self._isstdin = False 215 self._backupfilename = None 216 # restrict mode argument to reading modes 217 if mode not in ('r', 'rU', 'U', 'rb'): 218 raise ValueError("FileInput opening mode must be one of " 219 "'r', 'rU', 'U' and 'rb'") 220 if 'U' in mode: 221 import warnings 222 warnings.warn("'U' mode is deprecated", 223 DeprecationWarning, 2) 224 self._mode = mode 225 if openhook: 226 if inplace: 227 raise ValueError("FileInput cannot use an opening hook in inplace mode") 228 if not callable(openhook): 229 raise ValueError("FileInput openhook must be callable") 230 self._openhook = openhook 231 232 def __del__(self): 233 self.close() 234 235 def close(self): 236 try: 237 self.nextfile() 238 finally: 239 self._files = () 240 241 def __enter__(self): 242 return self 243 244 def __exit__(self, type, value, traceback): 245 self.close() 246 247 def __iter__(self): 248 return self 249 250 def __next__(self): 251 while True: 252 line = self._readline() 253 if line: 254 self._filelineno += 1 255 return line 256 if not self._file: 257 raise StopIteration 258 self.nextfile() 259 # repeat with next file 260 261 def __getitem__(self, i): 262 if i != self.lineno(): 263 raise RuntimeError("accessing lines out of order") 264 try: 265 return self.__next__() 266 except StopIteration: 267 raise IndexError("end of input reached") 268 269 def nextfile(self): 270 savestdout = self._savestdout 271 self._savestdout = None 272 if savestdout: 273 sys.stdout = savestdout 274 275 output = self._output 276 self._output = None 277 try: 278 if output: 279 output.close() 280 finally: 281 file = self._file 282 self._file = None 283 try: 284 del self._readline # restore FileInput._readline 285 except AttributeError: 286 pass 287 try: 288 if file and not self._isstdin: 289 file.close() 290 finally: 291 backupfilename = self._backupfilename 292 self._backupfilename = None 293 if backupfilename and not self._backup: 294 try: os.unlink(backupfilename) 295 except OSError: pass 296 297 self._isstdin = False 298 299 def readline(self): 300 while True: 301 line = self._readline() 302 if line: 303 self._filelineno += 1 304 return line 305 if not self._file: 306 return line 307 self.nextfile() 308 # repeat with next file 309 310 def _readline(self): 311 if not self._files: 312 if 'b' in self._mode: 313 return b'' 314 else: 315 return '' 316 self._filename = self._files[0] 317 self._files = self._files[1:] 318 self._startlineno = self.lineno() 319 self._filelineno = 0 320 self._file = None 321 self._isstdin = False 322 self._backupfilename = 0 323 if self._filename == '-': 324 self._filename = '<stdin>' 325 if 'b' in self._mode: 326 self._file = getattr(sys.stdin, 'buffer', sys.stdin) 327 else: 328 self._file = sys.stdin 329 self._isstdin = True 330 else: 331 if self._inplace: 332 self._backupfilename = ( 333 os.fspath(self._filename) + (self._backup or ".bak")) 334 try: 335 os.unlink(self._backupfilename) 336 except OSError: 337 pass 338 # The next few lines may raise OSError 339 os.rename(self._filename, self._backupfilename) 340 self._file = open(self._backupfilename, self._mode) 341 try: 342 perm = os.fstat(self._file.fileno()).st_mode 343 except OSError: 344 self._output = open(self._filename, "w") 345 else: 346 mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC 347 if hasattr(os, 'O_BINARY'): 348 mode |= os.O_BINARY 349 350 fd = os.open(self._filename, mode, perm) 351 self._output = os.fdopen(fd, "w") 352 try: 353 if hasattr(os, 'chmod'): 354 os.chmod(self._filename, perm) 355 except OSError: 356 pass 357 self._savestdout = sys.stdout 358 sys.stdout = self._output 359 else: 360 # This may raise OSError 361 if self._openhook: 362 self._file = self._openhook(self._filename, self._mode) 363 else: 364 self._file = open(self._filename, self._mode) 365 self._readline = self._file.readline # hide FileInput._readline 366 return self._readline() 367 368 def filename(self): 369 return self._filename 370 371 def lineno(self): 372 return self._startlineno + self._filelineno 373 374 def filelineno(self): 375 return self._filelineno 376 377 def fileno(self): 378 if self._file: 379 try: 380 return self._file.fileno() 381 except ValueError: 382 return -1 383 else: 384 return -1 385 386 def isfirstline(self): 387 return self._filelineno == 1 388 389 def isstdin(self): 390 return self._isstdin 391 392 393def hook_compressed(filename, mode): 394 ext = os.path.splitext(filename)[1] 395 if ext == '.gz': 396 import gzip 397 return gzip.open(filename, mode) 398 elif ext == '.bz2': 399 import bz2 400 return bz2.BZ2File(filename, mode) 401 else: 402 return open(filename, mode) 403 404 405def hook_encoded(encoding, errors=None): 406 def openhook(filename, mode): 407 return open(filename, mode, encoding=encoding, errors=errors) 408 return openhook 409 410 411def _test(): 412 import getopt 413 inplace = False 414 backup = False 415 opts, args = getopt.getopt(sys.argv[1:], "ib:") 416 for o, a in opts: 417 if o == '-i': inplace = True 418 if o == '-b': backup = a 419 for line in input(args, inplace=inplace, backup=backup): 420 if line[-1:] == '\n': line = line[:-1] 421 if line[-1:] == '\r': line = line[:-1] 422 print("%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(), 423 isfirstline() and "*" or "", line)) 424 print("%d: %s[%d]" % (lineno(), filename(), filelineno())) 425 426if __name__ == '__main__': 427 _test() 428