1"""Helper class to quickly write a loop over all standard input files. 2 3Typical use is: 4 5 import fileinput 6 for line in fileinput.input(): 7 process(line) 8 9This iterates over the lines of all files listed in sys.argv[1:], 10defaulting to sys.stdin if the list is empty. If a filename is '-' it 11is also replaced by sys.stdin and the optional arguments mode and 12openhook are ignored. To specify an alternative list of filenames, 13pass it as the argument to input(). A single file name is also allowed. 14 15Functions filename(), lineno() return the filename and cumulative line 16number of the line that has just been read; filelineno() returns its 17line number in the current file; isfirstline() returns true iff the 18line just read is the first line of its file; isstdin() returns true 19iff the line was read from sys.stdin. Function nextfile() closes the 20current file so that the next iteration will read the first line from 21the next file (if any); lines not read from the file will not count 22towards the cumulative line count; the filename is not changed until 23after the first line of the next file has been read. Function close() 24closes the sequence. 25 26Before any lines have been read, filename() returns None and both line 27numbers are zero; nextfile() has no effect. After all lines have been 28read, filename() and the line number functions return the values 29pertaining to the last line read; nextfile() has no effect. 30 31All files are opened in text mode by default, you can override this by 32setting the mode parameter to input() or FileInput.__init__(). 33If an I/O error occurs during opening or reading a file, the OSError 34exception is raised. 35 36If sys.stdin is used more than once, the second and further use will 37return no lines, except perhaps for interactive use, or if it has been 38explicitly reset (e.g. using sys.stdin.seek(0)). 39 40Empty files are opened and immediately closed; the only time their 41presence in the list of filenames is noticeable at all is when the 42last file opened is empty. 43 44It is possible that the last line of a file doesn't end in a newline 45character; otherwise lines are returned including the trailing 46newline. 47 48Class FileInput is the implementation; its methods filename(), 49lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close() 50correspond to the functions in the module. In addition it has a 51readline() method which returns the next input line, and a 52__getitem__() method which implements the sequence behavior. The 53sequence must be accessed in strictly sequential order; sequence 54access and readline() cannot be mixed. 55 56Optional in-place filtering: if the keyword argument inplace=1 is 57passed to input() or to the FileInput constructor, the file is moved 58to a backup file and standard output is directed to the input file. 59This makes it possible to write a filter that rewrites its input file 60in place. If the keyword argument backup=".<some extension>" is also 61given, it specifies the extension for the backup file, and the backup 62file remains around; by default, the extension is ".bak" and it is 63deleted when the output file is closed. In-place filtering is 64disabled when standard input is read. XXX The current implementation 65does not work for MS-DOS 8+3 filesystems. 66 67XXX Possible additions: 68 69- optional getopt argument processing 70- isatty() 71- read(), read(size), even readlines() 72 73""" 74 75import sys, os 76from types import GenericAlias 77 78__all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno", 79 "fileno", "isfirstline", "isstdin", "FileInput", "hook_compressed", 80 "hook_encoded"] 81 82_state = None 83 84def input(files=None, inplace=False, backup="", *, mode="r", openhook=None): 85 """Return an instance of the FileInput class, which can be iterated. 86 87 The parameters are passed to the constructor of the FileInput class. 88 The returned instance, in addition to being an iterator, 89 keeps global state for the functions of this module,. 90 """ 91 global _state 92 if _state and _state._file: 93 raise RuntimeError("input() already active") 94 _state = FileInput(files, inplace, backup, mode=mode, openhook=openhook) 95 return _state 96 97def close(): 98 """Close the sequence.""" 99 global _state 100 state = _state 101 _state = None 102 if state: 103 state.close() 104 105def nextfile(): 106 """ 107 Close the current file so that the next iteration will read the first 108 line from the next file (if any); lines not read from the file will 109 not count towards the cumulative line count. The filename is not 110 changed until after the first line of the next file has been read. 111 Before the first line has been read, this function has no effect; 112 it cannot be used to skip the first file. After the last line of the 113 last file has been read, this function has no effect. 114 """ 115 if not _state: 116 raise RuntimeError("no active input()") 117 return _state.nextfile() 118 119def filename(): 120 """ 121 Return the name of the file currently being read. 122 Before the first line has been read, returns None. 123 """ 124 if not _state: 125 raise RuntimeError("no active input()") 126 return _state.filename() 127 128def lineno(): 129 """ 130 Return the cumulative line number of the line that has just been read. 131 Before the first line has been read, returns 0. After the last line 132 of the last file has been read, returns the line number of that line. 133 """ 134 if not _state: 135 raise RuntimeError("no active input()") 136 return _state.lineno() 137 138def filelineno(): 139 """ 140 Return the line number in the current file. Before the first line 141 has been read, returns 0. After the last line of the last file has 142 been read, returns the line number of that line within the file. 143 """ 144 if not _state: 145 raise RuntimeError("no active input()") 146 return _state.filelineno() 147 148def fileno(): 149 """ 150 Return the file number of the current file. When no file is currently 151 opened, returns -1. 152 """ 153 if not _state: 154 raise RuntimeError("no active input()") 155 return _state.fileno() 156 157def isfirstline(): 158 """ 159 Returns true the line just read is the first line of its file, 160 otherwise returns false. 161 """ 162 if not _state: 163 raise RuntimeError("no active input()") 164 return _state.isfirstline() 165 166def isstdin(): 167 """ 168 Returns true if the last line was read from sys.stdin, 169 otherwise returns false. 170 """ 171 if not _state: 172 raise RuntimeError("no active input()") 173 return _state.isstdin() 174 175class FileInput: 176 """FileInput([files[, inplace[, backup]]], *, mode=None, openhook=None) 177 178 Class FileInput is the implementation of the module; its methods 179 filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(), 180 nextfile() and close() correspond to the functions of the same name 181 in the module. 182 In addition it has a readline() method which returns the next 183 input line, and a __getitem__() method which implements the 184 sequence behavior. The sequence must be accessed in strictly 185 sequential order; random access and readline() cannot be mixed. 186 """ 187 188 def __init__(self, files=None, inplace=False, backup="", *, 189 mode="r", openhook=None): 190 if isinstance(files, str): 191 files = (files,) 192 elif isinstance(files, os.PathLike): 193 files = (os.fspath(files), ) 194 else: 195 if files is None: 196 files = sys.argv[1:] 197 if not files: 198 files = ('-',) 199 else: 200 files = tuple(files) 201 self._files = files 202 self._inplace = inplace 203 self._backup = backup 204 self._savestdout = None 205 self._output = None 206 self._filename = None 207 self._startlineno = 0 208 self._filelineno = 0 209 self._file = None 210 self._isstdin = False 211 self._backupfilename = None 212 # restrict mode argument to reading modes 213 if mode not in ('r', 'rU', 'U', 'rb'): 214 raise ValueError("FileInput opening mode must be one of " 215 "'r', 'rU', 'U' and 'rb'") 216 if 'U' in mode: 217 import warnings 218 warnings.warn("'U' mode is deprecated", 219 DeprecationWarning, 2) 220 self._mode = mode 221 self._write_mode = mode.replace('r', 'w') if 'U' not in mode else 'w' 222 if openhook: 223 if inplace: 224 raise ValueError("FileInput cannot use an opening hook in inplace mode") 225 if not callable(openhook): 226 raise ValueError("FileInput openhook must be callable") 227 self._openhook = openhook 228 229 def __del__(self): 230 self.close() 231 232 def close(self): 233 try: 234 self.nextfile() 235 finally: 236 self._files = () 237 238 def __enter__(self): 239 return self 240 241 def __exit__(self, type, value, traceback): 242 self.close() 243 244 def __iter__(self): 245 return self 246 247 def __next__(self): 248 while True: 249 line = self._readline() 250 if line: 251 self._filelineno += 1 252 return line 253 if not self._file: 254 raise StopIteration 255 self.nextfile() 256 # repeat with next file 257 258 def __getitem__(self, i): 259 import warnings 260 warnings.warn( 261 "Support for indexing FileInput objects is deprecated. " 262 "Use iterator protocol instead.", 263 DeprecationWarning, 264 stacklevel=2 265 ) 266 if i != self.lineno(): 267 raise RuntimeError("accessing lines out of order") 268 try: 269 return self.__next__() 270 except StopIteration: 271 raise IndexError("end of input reached") 272 273 def nextfile(self): 274 savestdout = self._savestdout 275 self._savestdout = None 276 if savestdout: 277 sys.stdout = savestdout 278 279 output = self._output 280 self._output = None 281 try: 282 if output: 283 output.close() 284 finally: 285 file = self._file 286 self._file = None 287 try: 288 del self._readline # restore FileInput._readline 289 except AttributeError: 290 pass 291 try: 292 if file and not self._isstdin: 293 file.close() 294 finally: 295 backupfilename = self._backupfilename 296 self._backupfilename = None 297 if backupfilename and not self._backup: 298 try: os.unlink(backupfilename) 299 except OSError: pass 300 301 self._isstdin = False 302 303 def readline(self): 304 while True: 305 line = self._readline() 306 if line: 307 self._filelineno += 1 308 return line 309 if not self._file: 310 return line 311 self.nextfile() 312 # repeat with next file 313 314 def _readline(self): 315 if not self._files: 316 if 'b' in self._mode: 317 return b'' 318 else: 319 return '' 320 self._filename = self._files[0] 321 self._files = self._files[1:] 322 self._startlineno = self.lineno() 323 self._filelineno = 0 324 self._file = None 325 self._isstdin = False 326 self._backupfilename = 0 327 if self._filename == '-': 328 self._filename = '<stdin>' 329 if 'b' in self._mode: 330 self._file = getattr(sys.stdin, 'buffer', sys.stdin) 331 else: 332 self._file = sys.stdin 333 self._isstdin = True 334 else: 335 if self._inplace: 336 self._backupfilename = ( 337 os.fspath(self._filename) + (self._backup or ".bak")) 338 try: 339 os.unlink(self._backupfilename) 340 except OSError: 341 pass 342 # The next few lines may raise OSError 343 os.rename(self._filename, self._backupfilename) 344 self._file = open(self._backupfilename, self._mode) 345 try: 346 perm = os.fstat(self._file.fileno()).st_mode 347 except OSError: 348 self._output = open(self._filename, self._write_mode) 349 else: 350 mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC 351 if hasattr(os, 'O_BINARY'): 352 mode |= os.O_BINARY 353 354 fd = os.open(self._filename, mode, perm) 355 self._output = os.fdopen(fd, self._write_mode) 356 try: 357 os.chmod(self._filename, perm) 358 except OSError: 359 pass 360 self._savestdout = sys.stdout 361 sys.stdout = self._output 362 else: 363 # This may raise OSError 364 if self._openhook: 365 self._file = self._openhook(self._filename, self._mode) 366 else: 367 self._file = open(self._filename, self._mode) 368 self._readline = self._file.readline # hide FileInput._readline 369 return self._readline() 370 371 def filename(self): 372 return self._filename 373 374 def lineno(self): 375 return self._startlineno + self._filelineno 376 377 def filelineno(self): 378 return self._filelineno 379 380 def fileno(self): 381 if self._file: 382 try: 383 return self._file.fileno() 384 except ValueError: 385 return -1 386 else: 387 return -1 388 389 def isfirstline(self): 390 return self._filelineno == 1 391 392 def isstdin(self): 393 return self._isstdin 394 395 __class_getitem__ = classmethod(GenericAlias) 396 397 398def hook_compressed(filename, mode): 399 ext = os.path.splitext(filename)[1] 400 if ext == '.gz': 401 import gzip 402 return gzip.open(filename, mode) 403 elif ext == '.bz2': 404 import bz2 405 return bz2.BZ2File(filename, mode) 406 else: 407 return open(filename, mode) 408 409 410def hook_encoded(encoding, errors=None): 411 def openhook(filename, mode): 412 return open(filename, mode, encoding=encoding, errors=errors) 413 return openhook 414 415 416def _test(): 417 import getopt 418 inplace = False 419 backup = False 420 opts, args = getopt.getopt(sys.argv[1:], "ib:") 421 for o, a in opts: 422 if o == '-i': inplace = True 423 if o == '-b': backup = a 424 for line in input(args, inplace=inplace, backup=backup): 425 if line[-1:] == '\n': line = line[:-1] 426 if line[-1:] == '\r': line = line[:-1] 427 print("%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(), 428 isfirstline() and "*" or "", line)) 429 print("%d: %s[%d]" % (lineno(), filename(), filelineno())) 430 431if __name__ == '__main__': 432 _test() 433