1""" 2Python implementation of the io module. 3""" 4 5import os 6import abc 7import codecs 8import errno 9import stat 10import sys 11# Import _thread instead of threading to reduce startup cost 12from _thread import allocate_lock as Lock 13if sys.platform in {'win32', 'cygwin'}: 14 from msvcrt import setmode as _setmode 15else: 16 _setmode = None 17 18import io 19from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END) 20 21valid_seek_flags = {0, 1, 2} # Hardwired values 22if hasattr(os, 'SEEK_HOLE') : 23 valid_seek_flags.add(os.SEEK_HOLE) 24 valid_seek_flags.add(os.SEEK_DATA) 25 26# open() uses st_blksize whenever we can 27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes 28 29# NOTE: Base classes defined here are registered with the "official" ABCs 30# defined in io.py. We don't use real inheritance though, because we don't want 31# to inherit the C implementations. 32 33# Rebind for compatibility 34BlockingIOError = BlockingIOError 35 36# Does io.IOBase finalizer log the exception if the close() method fails? 37# The exception is ignored silently by default in release build. 38_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) 39 40 41def open(file, mode="r", buffering=-1, encoding=None, errors=None, 42 newline=None, closefd=True, opener=None): 43 44 r"""Open file and return a stream. Raise OSError upon failure. 45 46 file is either a text or byte string giving the name (and the path 47 if the file isn't in the current working directory) of the file to 48 be opened or an integer file descriptor of the file to be 49 wrapped. (If a file descriptor is given, it is closed when the 50 returned I/O object is closed, unless closefd is set to False.) 51 52 mode is an optional string that specifies the mode in which the file is 53 opened. It defaults to 'r' which means open for reading in text mode. Other 54 common values are 'w' for writing (truncating the file if it already 55 exists), 'x' for exclusive creation of a new file, and 'a' for appending 56 (which on some Unix systems, means that all writes append to the end of the 57 file regardless of the current seek position). In text mode, if encoding is 58 not specified the encoding used is platform dependent. (For reading and 59 writing raw bytes use binary mode and leave encoding unspecified.) The 60 available modes are: 61 62 ========= =============================================================== 63 Character Meaning 64 --------- --------------------------------------------------------------- 65 'r' open for reading (default) 66 'w' open for writing, truncating the file first 67 'x' create a new file and open it for writing 68 'a' open for writing, appending to the end of the file if it exists 69 'b' binary mode 70 't' text mode (default) 71 '+' open a disk file for updating (reading and writing) 72 'U' universal newline mode (deprecated) 73 ========= =============================================================== 74 75 The default mode is 'rt' (open for reading text). For binary random 76 access, the mode 'w+b' opens and truncates the file to 0 bytes, while 77 'r+b' opens the file without truncation. The 'x' mode implies 'w' and 78 raises an `FileExistsError` if the file already exists. 79 80 Python distinguishes between files opened in binary and text modes, 81 even when the underlying operating system doesn't. Files opened in 82 binary mode (appending 'b' to the mode argument) return contents as 83 bytes objects without any decoding. In text mode (the default, or when 84 't' is appended to the mode argument), the contents of the file are 85 returned as strings, the bytes having been first decoded using a 86 platform-dependent encoding or using the specified encoding if given. 87 88 'U' mode is deprecated and will raise an exception in future versions 89 of Python. It has no effect in Python 3. Use newline to control 90 universal newlines mode. 91 92 buffering is an optional integer used to set the buffering policy. 93 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select 94 line buffering (only usable in text mode), and an integer > 1 to indicate 95 the size of a fixed-size chunk buffer. When no buffering argument is 96 given, the default buffering policy works as follows: 97 98 * Binary files are buffered in fixed-size chunks; the size of the buffer 99 is chosen using a heuristic trying to determine the underlying device's 100 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. 101 On many systems, the buffer will typically be 4096 or 8192 bytes long. 102 103 * "Interactive" text files (files for which isatty() returns True) 104 use line buffering. Other text files use the policy described above 105 for binary files. 106 107 encoding is the str name of the encoding used to decode or encode the 108 file. This should only be used in text mode. The default encoding is 109 platform dependent, but any encoding supported by Python can be 110 passed. See the codecs module for the list of supported encodings. 111 112 errors is an optional string that specifies how encoding errors are to 113 be handled---this argument should not be used in binary mode. Pass 114 'strict' to raise a ValueError exception if there is an encoding error 115 (the default of None has the same effect), or pass 'ignore' to ignore 116 errors. (Note that ignoring encoding errors can lead to data loss.) 117 See the documentation for codecs.register for a list of the permitted 118 encoding error strings. 119 120 newline is a string controlling how universal newlines works (it only 121 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works 122 as follows: 123 124 * On input, if newline is None, universal newlines mode is 125 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and 126 these are translated into '\n' before being returned to the 127 caller. If it is '', universal newline mode is enabled, but line 128 endings are returned to the caller untranslated. If it has any of 129 the other legal values, input lines are only terminated by the given 130 string, and the line ending is returned to the caller untranslated. 131 132 * On output, if newline is None, any '\n' characters written are 133 translated to the system default line separator, os.linesep. If 134 newline is '', no translation takes place. If newline is any of the 135 other legal values, any '\n' characters written are translated to 136 the given string. 137 138 closedfd is a bool. If closefd is False, the underlying file descriptor will 139 be kept open when the file is closed. This does not work when a file name is 140 given and must be True in that case. 141 142 The newly created file is non-inheritable. 143 144 A custom opener can be used by passing a callable as *opener*. The 145 underlying file descriptor for the file object is then obtained by calling 146 *opener* with (*file*, *flags*). *opener* must return an open file 147 descriptor (passing os.open as *opener* results in functionality similar to 148 passing None). 149 150 open() returns a file object whose type depends on the mode, and 151 through which the standard file operations such as reading and writing 152 are performed. When open() is used to open a file in a text mode ('w', 153 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open 154 a file in a binary mode, the returned class varies: in read binary 155 mode, it returns a BufferedReader; in write binary and append binary 156 modes, it returns a BufferedWriter, and in read/write mode, it returns 157 a BufferedRandom. 158 159 It is also possible to use a string or bytearray as a file for both 160 reading and writing. For strings StringIO can be used like a file 161 opened in a text mode, and for bytes a BytesIO can be used like a file 162 opened in a binary mode. 163 """ 164 if not isinstance(file, int): 165 file = os.fspath(file) 166 if not isinstance(file, (str, bytes, int)): 167 raise TypeError("invalid file: %r" % file) 168 if not isinstance(mode, str): 169 raise TypeError("invalid mode: %r" % mode) 170 if not isinstance(buffering, int): 171 raise TypeError("invalid buffering: %r" % buffering) 172 if encoding is not None and not isinstance(encoding, str): 173 raise TypeError("invalid encoding: %r" % encoding) 174 if errors is not None and not isinstance(errors, str): 175 raise TypeError("invalid errors: %r" % errors) 176 modes = set(mode) 177 if modes - set("axrwb+tU") or len(mode) > len(modes): 178 raise ValueError("invalid mode: %r" % mode) 179 creating = "x" in modes 180 reading = "r" in modes 181 writing = "w" in modes 182 appending = "a" in modes 183 updating = "+" in modes 184 text = "t" in modes 185 binary = "b" in modes 186 if "U" in modes: 187 if creating or writing or appending or updating: 188 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'") 189 import warnings 190 warnings.warn("'U' mode is deprecated", 191 DeprecationWarning, 2) 192 reading = True 193 if text and binary: 194 raise ValueError("can't have text and binary mode at once") 195 if creating + reading + writing + appending > 1: 196 raise ValueError("can't have read/write/append mode at once") 197 if not (creating or reading or writing or appending): 198 raise ValueError("must have exactly one of read/write/append mode") 199 if binary and encoding is not None: 200 raise ValueError("binary mode doesn't take an encoding argument") 201 if binary and errors is not None: 202 raise ValueError("binary mode doesn't take an errors argument") 203 if binary and newline is not None: 204 raise ValueError("binary mode doesn't take a newline argument") 205 if binary and buffering == 1: 206 import warnings 207 warnings.warn("line buffering (buffering=1) isn't supported in binary " 208 "mode, the default buffer size will be used", 209 RuntimeWarning, 2) 210 raw = FileIO(file, 211 (creating and "x" or "") + 212 (reading and "r" or "") + 213 (writing and "w" or "") + 214 (appending and "a" or "") + 215 (updating and "+" or ""), 216 closefd, opener=opener) 217 result = raw 218 try: 219 line_buffering = False 220 if buffering == 1 or buffering < 0 and raw.isatty(): 221 buffering = -1 222 line_buffering = True 223 if buffering < 0: 224 buffering = DEFAULT_BUFFER_SIZE 225 try: 226 bs = os.fstat(raw.fileno()).st_blksize 227 except (OSError, AttributeError): 228 pass 229 else: 230 if bs > 1: 231 buffering = bs 232 if buffering < 0: 233 raise ValueError("invalid buffering size") 234 if buffering == 0: 235 if binary: 236 return result 237 raise ValueError("can't have unbuffered text I/O") 238 if updating: 239 buffer = BufferedRandom(raw, buffering) 240 elif creating or writing or appending: 241 buffer = BufferedWriter(raw, buffering) 242 elif reading: 243 buffer = BufferedReader(raw, buffering) 244 else: 245 raise ValueError("unknown mode: %r" % mode) 246 result = buffer 247 if binary: 248 return result 249 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) 250 result = text 251 text.mode = mode 252 return result 253 except: 254 result.close() 255 raise 256 257# Define a default pure-Python implementation for open_code() 258# that does not allow hooks. Warn on first use. Defined for tests. 259def _open_code_with_warning(path): 260 """Opens the provided file with mode ``'rb'``. This function 261 should be used when the intent is to treat the contents as 262 executable code. 263 264 ``path`` should be an absolute path. 265 266 When supported by the runtime, this function can be hooked 267 in order to allow embedders more control over code files. 268 This functionality is not supported on the current runtime. 269 """ 270 import warnings 271 warnings.warn("_pyio.open_code() may not be using hooks", 272 RuntimeWarning, 2) 273 return open(path, "rb") 274 275try: 276 open_code = io.open_code 277except AttributeError: 278 open_code = _open_code_with_warning 279 280 281class DocDescriptor: 282 """Helper for builtins.open.__doc__ 283 """ 284 def __get__(self, obj, typ=None): 285 return ( 286 "open(file, mode='r', buffering=-1, encoding=None, " 287 "errors=None, newline=None, closefd=True)\n\n" + 288 open.__doc__) 289 290class OpenWrapper: 291 """Wrapper for builtins.open 292 293 Trick so that open won't become a bound method when stored 294 as a class variable (as dbm.dumb does). 295 296 See initstdio() in Python/pylifecycle.c. 297 """ 298 __doc__ = DocDescriptor() 299 300 def __new__(cls, *args, **kwargs): 301 return open(*args, **kwargs) 302 303 304# In normal operation, both `UnsupportedOperation`s should be bound to the 305# same object. 306try: 307 UnsupportedOperation = io.UnsupportedOperation 308except AttributeError: 309 class UnsupportedOperation(OSError, ValueError): 310 pass 311 312 313class IOBase(metaclass=abc.ABCMeta): 314 315 """The abstract base class for all I/O classes, acting on streams of 316 bytes. There is no public constructor. 317 318 This class provides dummy implementations for many methods that 319 derived classes can override selectively; the default implementations 320 represent a file that cannot be read, written or seeked. 321 322 Even though IOBase does not declare read or write because 323 their signatures will vary, implementations and clients should 324 consider those methods part of the interface. Also, implementations 325 may raise UnsupportedOperation when operations they do not support are 326 called. 327 328 The basic type used for binary data read from or written to a file is 329 bytes. Other bytes-like objects are accepted as method arguments too. 330 Text I/O classes work with str data. 331 332 Note that calling any method (even inquiries) on a closed stream is 333 undefined. Implementations may raise OSError in this case. 334 335 IOBase (and its subclasses) support the iterator protocol, meaning 336 that an IOBase object can be iterated over yielding the lines in a 337 stream. 338 339 IOBase also supports the :keyword:`with` statement. In this example, 340 fp is closed after the suite of the with statement is complete: 341 342 with open('spam.txt', 'r') as fp: 343 fp.write('Spam and eggs!') 344 """ 345 346 ### Internal ### 347 348 def _unsupported(self, name): 349 """Internal: raise an OSError exception for unsupported operations.""" 350 raise UnsupportedOperation("%s.%s() not supported" % 351 (self.__class__.__name__, name)) 352 353 ### Positioning ### 354 355 def seek(self, pos, whence=0): 356 """Change stream position. 357 358 Change the stream position to byte offset pos. Argument pos is 359 interpreted relative to the position indicated by whence. Values 360 for whence are ints: 361 362 * 0 -- start of stream (the default); offset should be zero or positive 363 * 1 -- current stream position; offset may be negative 364 * 2 -- end of stream; offset is usually negative 365 Some operating systems / file systems could provide additional values. 366 367 Return an int indicating the new absolute position. 368 """ 369 self._unsupported("seek") 370 371 def tell(self): 372 """Return an int indicating the current stream position.""" 373 return self.seek(0, 1) 374 375 def truncate(self, pos=None): 376 """Truncate file to size bytes. 377 378 Size defaults to the current IO position as reported by tell(). Return 379 the new size. 380 """ 381 self._unsupported("truncate") 382 383 ### Flush and close ### 384 385 def flush(self): 386 """Flush write buffers, if applicable. 387 388 This is not implemented for read-only and non-blocking streams. 389 """ 390 self._checkClosed() 391 # XXX Should this return the number of bytes written??? 392 393 __closed = False 394 395 def close(self): 396 """Flush and close the IO object. 397 398 This method has no effect if the file is already closed. 399 """ 400 if not self.__closed: 401 try: 402 self.flush() 403 finally: 404 self.__closed = True 405 406 def __del__(self): 407 """Destructor. Calls close().""" 408 try: 409 closed = self.closed 410 except AttributeError: 411 # If getting closed fails, then the object is probably 412 # in an unusable state, so ignore. 413 return 414 415 if closed: 416 return 417 418 if _IOBASE_EMITS_UNRAISABLE: 419 self.close() 420 else: 421 # The try/except block is in case this is called at program 422 # exit time, when it's possible that globals have already been 423 # deleted, and then the close() call might fail. Since 424 # there's nothing we can do about such failures and they annoy 425 # the end users, we suppress the traceback. 426 try: 427 self.close() 428 except: 429 pass 430 431 ### Inquiries ### 432 433 def seekable(self): 434 """Return a bool indicating whether object supports random access. 435 436 If False, seek(), tell() and truncate() will raise OSError. 437 This method may need to do a test seek(). 438 """ 439 return False 440 441 def _checkSeekable(self, msg=None): 442 """Internal: raise UnsupportedOperation if file is not seekable 443 """ 444 if not self.seekable(): 445 raise UnsupportedOperation("File or stream is not seekable." 446 if msg is None else msg) 447 448 def readable(self): 449 """Return a bool indicating whether object was opened for reading. 450 451 If False, read() will raise OSError. 452 """ 453 return False 454 455 def _checkReadable(self, msg=None): 456 """Internal: raise UnsupportedOperation if file is not readable 457 """ 458 if not self.readable(): 459 raise UnsupportedOperation("File or stream is not readable." 460 if msg is None else msg) 461 462 def writable(self): 463 """Return a bool indicating whether object was opened for writing. 464 465 If False, write() and truncate() will raise OSError. 466 """ 467 return False 468 469 def _checkWritable(self, msg=None): 470 """Internal: raise UnsupportedOperation if file is not writable 471 """ 472 if not self.writable(): 473 raise UnsupportedOperation("File or stream is not writable." 474 if msg is None else msg) 475 476 @property 477 def closed(self): 478 """closed: bool. True iff the file has been closed. 479 480 For backwards compatibility, this is a property, not a predicate. 481 """ 482 return self.__closed 483 484 def _checkClosed(self, msg=None): 485 """Internal: raise a ValueError if file is closed 486 """ 487 if self.closed: 488 raise ValueError("I/O operation on closed file." 489 if msg is None else msg) 490 491 ### Context manager ### 492 493 def __enter__(self): # That's a forward reference 494 """Context management protocol. Returns self (an instance of IOBase).""" 495 self._checkClosed() 496 return self 497 498 def __exit__(self, *args): 499 """Context management protocol. Calls close()""" 500 self.close() 501 502 ### Lower-level APIs ### 503 504 # XXX Should these be present even if unimplemented? 505 506 def fileno(self): 507 """Returns underlying file descriptor (an int) if one exists. 508 509 An OSError is raised if the IO object does not use a file descriptor. 510 """ 511 self._unsupported("fileno") 512 513 def isatty(self): 514 """Return a bool indicating whether this is an 'interactive' stream. 515 516 Return False if it can't be determined. 517 """ 518 self._checkClosed() 519 return False 520 521 ### Readline[s] and writelines ### 522 523 def readline(self, size=-1): 524 r"""Read and return a line of bytes from the stream. 525 526 If size is specified, at most size bytes will be read. 527 Size should be an int. 528 529 The line terminator is always b'\n' for binary files; for text 530 files, the newlines argument to open can be used to select the line 531 terminator(s) recognized. 532 """ 533 # For backwards compatibility, a (slowish) readline(). 534 if hasattr(self, "peek"): 535 def nreadahead(): 536 readahead = self.peek(1) 537 if not readahead: 538 return 1 539 n = (readahead.find(b"\n") + 1) or len(readahead) 540 if size >= 0: 541 n = min(n, size) 542 return n 543 else: 544 def nreadahead(): 545 return 1 546 if size is None: 547 size = -1 548 else: 549 try: 550 size_index = size.__index__ 551 except AttributeError: 552 raise TypeError(f"{size!r} is not an integer") 553 else: 554 size = size_index() 555 res = bytearray() 556 while size < 0 or len(res) < size: 557 b = self.read(nreadahead()) 558 if not b: 559 break 560 res += b 561 if res.endswith(b"\n"): 562 break 563 return bytes(res) 564 565 def __iter__(self): 566 self._checkClosed() 567 return self 568 569 def __next__(self): 570 line = self.readline() 571 if not line: 572 raise StopIteration 573 return line 574 575 def readlines(self, hint=None): 576 """Return a list of lines from the stream. 577 578 hint can be specified to control the number of lines read: no more 579 lines will be read if the total size (in bytes/characters) of all 580 lines so far exceeds hint. 581 """ 582 if hint is None or hint <= 0: 583 return list(self) 584 n = 0 585 lines = [] 586 for line in self: 587 lines.append(line) 588 n += len(line) 589 if n >= hint: 590 break 591 return lines 592 593 def writelines(self, lines): 594 """Write a list of lines to the stream. 595 596 Line separators are not added, so it is usual for each of the lines 597 provided to have a line separator at the end. 598 """ 599 self._checkClosed() 600 for line in lines: 601 self.write(line) 602 603io.IOBase.register(IOBase) 604 605 606class RawIOBase(IOBase): 607 608 """Base class for raw binary I/O.""" 609 610 # The read() method is implemented by calling readinto(); derived 611 # classes that want to support read() only need to implement 612 # readinto() as a primitive operation. In general, readinto() can be 613 # more efficient than read(). 614 615 # (It would be tempting to also provide an implementation of 616 # readinto() in terms of read(), in case the latter is a more suitable 617 # primitive operation, but that would lead to nasty recursion in case 618 # a subclass doesn't implement either.) 619 620 def read(self, size=-1): 621 """Read and return up to size bytes, where size is an int. 622 623 Returns an empty bytes object on EOF, or None if the object is 624 set not to block and has no data to read. 625 """ 626 if size is None: 627 size = -1 628 if size < 0: 629 return self.readall() 630 b = bytearray(size.__index__()) 631 n = self.readinto(b) 632 if n is None: 633 return None 634 del b[n:] 635 return bytes(b) 636 637 def readall(self): 638 """Read until EOF, using multiple read() call.""" 639 res = bytearray() 640 while True: 641 data = self.read(DEFAULT_BUFFER_SIZE) 642 if not data: 643 break 644 res += data 645 if res: 646 return bytes(res) 647 else: 648 # b'' or None 649 return data 650 651 def readinto(self, b): 652 """Read bytes into a pre-allocated bytes-like object b. 653 654 Returns an int representing the number of bytes read (0 for EOF), or 655 None if the object is set not to block and has no data to read. 656 """ 657 self._unsupported("readinto") 658 659 def write(self, b): 660 """Write the given buffer to the IO stream. 661 662 Returns the number of bytes written, which may be less than the 663 length of b in bytes. 664 """ 665 self._unsupported("write") 666 667io.RawIOBase.register(RawIOBase) 668from _io import FileIO 669RawIOBase.register(FileIO) 670 671 672class BufferedIOBase(IOBase): 673 674 """Base class for buffered IO objects. 675 676 The main difference with RawIOBase is that the read() method 677 supports omitting the size argument, and does not have a default 678 implementation that defers to readinto(). 679 680 In addition, read(), readinto() and write() may raise 681 BlockingIOError if the underlying raw stream is in non-blocking 682 mode and not ready; unlike their raw counterparts, they will never 683 return None. 684 685 A typical implementation should not inherit from a RawIOBase 686 implementation, but wrap one. 687 """ 688 689 def read(self, size=-1): 690 """Read and return up to size bytes, where size is an int. 691 692 If the argument is omitted, None, or negative, reads and 693 returns all data until EOF. 694 695 If the argument is positive, and the underlying raw stream is 696 not 'interactive', multiple raw reads may be issued to satisfy 697 the byte count (unless EOF is reached first). But for 698 interactive raw streams (XXX and for pipes?), at most one raw 699 read will be issued, and a short result does not imply that 700 EOF is imminent. 701 702 Returns an empty bytes array on EOF. 703 704 Raises BlockingIOError if the underlying raw stream has no 705 data at the moment. 706 """ 707 self._unsupported("read") 708 709 def read1(self, size=-1): 710 """Read up to size bytes with at most one read() system call, 711 where size is an int. 712 """ 713 self._unsupported("read1") 714 715 def readinto(self, b): 716 """Read bytes into a pre-allocated bytes-like object b. 717 718 Like read(), this may issue multiple reads to the underlying raw 719 stream, unless the latter is 'interactive'. 720 721 Returns an int representing the number of bytes read (0 for EOF). 722 723 Raises BlockingIOError if the underlying raw stream has no 724 data at the moment. 725 """ 726 727 return self._readinto(b, read1=False) 728 729 def readinto1(self, b): 730 """Read bytes into buffer *b*, using at most one system call 731 732 Returns an int representing the number of bytes read (0 for EOF). 733 734 Raises BlockingIOError if the underlying raw stream has no 735 data at the moment. 736 """ 737 738 return self._readinto(b, read1=True) 739 740 def _readinto(self, b, read1): 741 if not isinstance(b, memoryview): 742 b = memoryview(b) 743 b = b.cast('B') 744 745 if read1: 746 data = self.read1(len(b)) 747 else: 748 data = self.read(len(b)) 749 n = len(data) 750 751 b[:n] = data 752 753 return n 754 755 def write(self, b): 756 """Write the given bytes buffer to the IO stream. 757 758 Return the number of bytes written, which is always the length of b 759 in bytes. 760 761 Raises BlockingIOError if the buffer is full and the 762 underlying raw stream cannot accept more data at the moment. 763 """ 764 self._unsupported("write") 765 766 def detach(self): 767 """ 768 Separate the underlying raw stream from the buffer and return it. 769 770 After the raw stream has been detached, the buffer is in an unusable 771 state. 772 """ 773 self._unsupported("detach") 774 775io.BufferedIOBase.register(BufferedIOBase) 776 777 778class _BufferedIOMixin(BufferedIOBase): 779 780 """A mixin implementation of BufferedIOBase with an underlying raw stream. 781 782 This passes most requests on to the underlying raw stream. It 783 does *not* provide implementations of read(), readinto() or 784 write(). 785 """ 786 787 def __init__(self, raw): 788 self._raw = raw 789 790 ### Positioning ### 791 792 def seek(self, pos, whence=0): 793 new_position = self.raw.seek(pos, whence) 794 if new_position < 0: 795 raise OSError("seek() returned an invalid position") 796 return new_position 797 798 def tell(self): 799 pos = self.raw.tell() 800 if pos < 0: 801 raise OSError("tell() returned an invalid position") 802 return pos 803 804 def truncate(self, pos=None): 805 # Flush the stream. We're mixing buffered I/O with lower-level I/O, 806 # and a flush may be necessary to synch both views of the current 807 # file state. 808 self.flush() 809 810 if pos is None: 811 pos = self.tell() 812 # XXX: Should seek() be used, instead of passing the position 813 # XXX directly to truncate? 814 return self.raw.truncate(pos) 815 816 ### Flush and close ### 817 818 def flush(self): 819 if self.closed: 820 raise ValueError("flush on closed file") 821 self.raw.flush() 822 823 def close(self): 824 if self.raw is not None and not self.closed: 825 try: 826 # may raise BlockingIOError or BrokenPipeError etc 827 self.flush() 828 finally: 829 self.raw.close() 830 831 def detach(self): 832 if self.raw is None: 833 raise ValueError("raw stream already detached") 834 self.flush() 835 raw = self._raw 836 self._raw = None 837 return raw 838 839 ### Inquiries ### 840 841 def seekable(self): 842 return self.raw.seekable() 843 844 @property 845 def raw(self): 846 return self._raw 847 848 @property 849 def closed(self): 850 return self.raw.closed 851 852 @property 853 def name(self): 854 return self.raw.name 855 856 @property 857 def mode(self): 858 return self.raw.mode 859 860 def __getstate__(self): 861 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") 862 863 def __repr__(self): 864 modname = self.__class__.__module__ 865 clsname = self.__class__.__qualname__ 866 try: 867 name = self.name 868 except AttributeError: 869 return "<{}.{}>".format(modname, clsname) 870 else: 871 return "<{}.{} name={!r}>".format(modname, clsname, name) 872 873 ### Lower-level APIs ### 874 875 def fileno(self): 876 return self.raw.fileno() 877 878 def isatty(self): 879 return self.raw.isatty() 880 881 882class BytesIO(BufferedIOBase): 883 884 """Buffered I/O implementation using an in-memory bytes buffer.""" 885 886 # Initialize _buffer as soon as possible since it's used by __del__() 887 # which calls close() 888 _buffer = None 889 890 def __init__(self, initial_bytes=None): 891 buf = bytearray() 892 if initial_bytes is not None: 893 buf += initial_bytes 894 self._buffer = buf 895 self._pos = 0 896 897 def __getstate__(self): 898 if self.closed: 899 raise ValueError("__getstate__ on closed file") 900 return self.__dict__.copy() 901 902 def getvalue(self): 903 """Return the bytes value (contents) of the buffer 904 """ 905 if self.closed: 906 raise ValueError("getvalue on closed file") 907 return bytes(self._buffer) 908 909 def getbuffer(self): 910 """Return a readable and writable view of the buffer. 911 """ 912 if self.closed: 913 raise ValueError("getbuffer on closed file") 914 return memoryview(self._buffer) 915 916 def close(self): 917 if self._buffer is not None: 918 self._buffer.clear() 919 super().close() 920 921 def read(self, size=-1): 922 if self.closed: 923 raise ValueError("read from closed file") 924 if size is None: 925 size = -1 926 else: 927 try: 928 size_index = size.__index__ 929 except AttributeError: 930 raise TypeError(f"{size!r} is not an integer") 931 else: 932 size = size_index() 933 if size < 0: 934 size = len(self._buffer) 935 if len(self._buffer) <= self._pos: 936 return b"" 937 newpos = min(len(self._buffer), self._pos + size) 938 b = self._buffer[self._pos : newpos] 939 self._pos = newpos 940 return bytes(b) 941 942 def read1(self, size=-1): 943 """This is the same as read. 944 """ 945 return self.read(size) 946 947 def write(self, b): 948 if self.closed: 949 raise ValueError("write to closed file") 950 if isinstance(b, str): 951 raise TypeError("can't write str to binary stream") 952 with memoryview(b) as view: 953 n = view.nbytes # Size of any bytes-like object 954 if n == 0: 955 return 0 956 pos = self._pos 957 if pos > len(self._buffer): 958 # Inserts null bytes between the current end of the file 959 # and the new write position. 960 padding = b'\x00' * (pos - len(self._buffer)) 961 self._buffer += padding 962 self._buffer[pos:pos + n] = b 963 self._pos += n 964 return n 965 966 def seek(self, pos, whence=0): 967 if self.closed: 968 raise ValueError("seek on closed file") 969 try: 970 pos_index = pos.__index__ 971 except AttributeError: 972 raise TypeError(f"{pos!r} is not an integer") 973 else: 974 pos = pos_index() 975 if whence == 0: 976 if pos < 0: 977 raise ValueError("negative seek position %r" % (pos,)) 978 self._pos = pos 979 elif whence == 1: 980 self._pos = max(0, self._pos + pos) 981 elif whence == 2: 982 self._pos = max(0, len(self._buffer) + pos) 983 else: 984 raise ValueError("unsupported whence value") 985 return self._pos 986 987 def tell(self): 988 if self.closed: 989 raise ValueError("tell on closed file") 990 return self._pos 991 992 def truncate(self, pos=None): 993 if self.closed: 994 raise ValueError("truncate on closed file") 995 if pos is None: 996 pos = self._pos 997 else: 998 try: 999 pos_index = pos.__index__ 1000 except AttributeError: 1001 raise TypeError(f"{pos!r} is not an integer") 1002 else: 1003 pos = pos_index() 1004 if pos < 0: 1005 raise ValueError("negative truncate position %r" % (pos,)) 1006 del self._buffer[pos:] 1007 return pos 1008 1009 def readable(self): 1010 if self.closed: 1011 raise ValueError("I/O operation on closed file.") 1012 return True 1013 1014 def writable(self): 1015 if self.closed: 1016 raise ValueError("I/O operation on closed file.") 1017 return True 1018 1019 def seekable(self): 1020 if self.closed: 1021 raise ValueError("I/O operation on closed file.") 1022 return True 1023 1024 1025class BufferedReader(_BufferedIOMixin): 1026 1027 """BufferedReader(raw[, buffer_size]) 1028 1029 A buffer for a readable, sequential BaseRawIO object. 1030 1031 The constructor creates a BufferedReader for the given readable raw 1032 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE 1033 is used. 1034 """ 1035 1036 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1037 """Create a new buffered reader using the given readable raw IO object. 1038 """ 1039 if not raw.readable(): 1040 raise OSError('"raw" argument must be readable.') 1041 1042 _BufferedIOMixin.__init__(self, raw) 1043 if buffer_size <= 0: 1044 raise ValueError("invalid buffer size") 1045 self.buffer_size = buffer_size 1046 self._reset_read_buf() 1047 self._read_lock = Lock() 1048 1049 def readable(self): 1050 return self.raw.readable() 1051 1052 def _reset_read_buf(self): 1053 self._read_buf = b"" 1054 self._read_pos = 0 1055 1056 def read(self, size=None): 1057 """Read size bytes. 1058 1059 Returns exactly size bytes of data unless the underlying raw IO 1060 stream reaches EOF or if the call would block in non-blocking 1061 mode. If size is negative, read until EOF or until read() would 1062 block. 1063 """ 1064 if size is not None and size < -1: 1065 raise ValueError("invalid number of bytes to read") 1066 with self._read_lock: 1067 return self._read_unlocked(size) 1068 1069 def _read_unlocked(self, n=None): 1070 nodata_val = b"" 1071 empty_values = (b"", None) 1072 buf = self._read_buf 1073 pos = self._read_pos 1074 1075 # Special case for when the number of bytes to read is unspecified. 1076 if n is None or n == -1: 1077 self._reset_read_buf() 1078 if hasattr(self.raw, 'readall'): 1079 chunk = self.raw.readall() 1080 if chunk is None: 1081 return buf[pos:] or None 1082 else: 1083 return buf[pos:] + chunk 1084 chunks = [buf[pos:]] # Strip the consumed bytes. 1085 current_size = 0 1086 while True: 1087 # Read until EOF or until read() would block. 1088 chunk = self.raw.read() 1089 if chunk in empty_values: 1090 nodata_val = chunk 1091 break 1092 current_size += len(chunk) 1093 chunks.append(chunk) 1094 return b"".join(chunks) or nodata_val 1095 1096 # The number of bytes to read is specified, return at most n bytes. 1097 avail = len(buf) - pos # Length of the available buffered data. 1098 if n <= avail: 1099 # Fast path: the data to read is fully buffered. 1100 self._read_pos += n 1101 return buf[pos:pos+n] 1102 # Slow path: read from the stream until enough bytes are read, 1103 # or until an EOF occurs or until read() would block. 1104 chunks = [buf[pos:]] 1105 wanted = max(self.buffer_size, n) 1106 while avail < n: 1107 chunk = self.raw.read(wanted) 1108 if chunk in empty_values: 1109 nodata_val = chunk 1110 break 1111 avail += len(chunk) 1112 chunks.append(chunk) 1113 # n is more than avail only when an EOF occurred or when 1114 # read() would have blocked. 1115 n = min(n, avail) 1116 out = b"".join(chunks) 1117 self._read_buf = out[n:] # Save the extra data in the buffer. 1118 self._read_pos = 0 1119 return out[:n] if out else nodata_val 1120 1121 def peek(self, size=0): 1122 """Returns buffered bytes without advancing the position. 1123 1124 The argument indicates a desired minimal number of bytes; we 1125 do at most one raw read to satisfy it. We never return more 1126 than self.buffer_size. 1127 """ 1128 with self._read_lock: 1129 return self._peek_unlocked(size) 1130 1131 def _peek_unlocked(self, n=0): 1132 want = min(n, self.buffer_size) 1133 have = len(self._read_buf) - self._read_pos 1134 if have < want or have <= 0: 1135 to_read = self.buffer_size - have 1136 current = self.raw.read(to_read) 1137 if current: 1138 self._read_buf = self._read_buf[self._read_pos:] + current 1139 self._read_pos = 0 1140 return self._read_buf[self._read_pos:] 1141 1142 def read1(self, size=-1): 1143 """Reads up to size bytes, with at most one read() system call.""" 1144 # Returns up to size bytes. If at least one byte is buffered, we 1145 # only return buffered bytes. Otherwise, we do one raw read. 1146 if size < 0: 1147 size = self.buffer_size 1148 if size == 0: 1149 return b"" 1150 with self._read_lock: 1151 self._peek_unlocked(1) 1152 return self._read_unlocked( 1153 min(size, len(self._read_buf) - self._read_pos)) 1154 1155 # Implementing readinto() and readinto1() is not strictly necessary (we 1156 # could rely on the base class that provides an implementation in terms of 1157 # read() and read1()). We do it anyway to keep the _pyio implementation 1158 # similar to the io implementation (which implements the methods for 1159 # performance reasons). 1160 def _readinto(self, buf, read1): 1161 """Read data into *buf* with at most one system call.""" 1162 1163 # Need to create a memoryview object of type 'b', otherwise 1164 # we may not be able to assign bytes to it, and slicing it 1165 # would create a new object. 1166 if not isinstance(buf, memoryview): 1167 buf = memoryview(buf) 1168 if buf.nbytes == 0: 1169 return 0 1170 buf = buf.cast('B') 1171 1172 written = 0 1173 with self._read_lock: 1174 while written < len(buf): 1175 1176 # First try to read from internal buffer 1177 avail = min(len(self._read_buf) - self._read_pos, len(buf)) 1178 if avail: 1179 buf[written:written+avail] = \ 1180 self._read_buf[self._read_pos:self._read_pos+avail] 1181 self._read_pos += avail 1182 written += avail 1183 if written == len(buf): 1184 break 1185 1186 # If remaining space in callers buffer is larger than 1187 # internal buffer, read directly into callers buffer 1188 if len(buf) - written > self.buffer_size: 1189 n = self.raw.readinto(buf[written:]) 1190 if not n: 1191 break # eof 1192 written += n 1193 1194 # Otherwise refill internal buffer - unless we're 1195 # in read1 mode and already got some data 1196 elif not (read1 and written): 1197 if not self._peek_unlocked(1): 1198 break # eof 1199 1200 # In readinto1 mode, return as soon as we have some data 1201 if read1 and written: 1202 break 1203 1204 return written 1205 1206 def tell(self): 1207 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos 1208 1209 def seek(self, pos, whence=0): 1210 if whence not in valid_seek_flags: 1211 raise ValueError("invalid whence value") 1212 with self._read_lock: 1213 if whence == 1: 1214 pos -= len(self._read_buf) - self._read_pos 1215 pos = _BufferedIOMixin.seek(self, pos, whence) 1216 self._reset_read_buf() 1217 return pos 1218 1219class BufferedWriter(_BufferedIOMixin): 1220 1221 """A buffer for a writeable sequential RawIO object. 1222 1223 The constructor creates a BufferedWriter for the given writeable raw 1224 stream. If the buffer_size is not given, it defaults to 1225 DEFAULT_BUFFER_SIZE. 1226 """ 1227 1228 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1229 if not raw.writable(): 1230 raise OSError('"raw" argument must be writable.') 1231 1232 _BufferedIOMixin.__init__(self, raw) 1233 if buffer_size <= 0: 1234 raise ValueError("invalid buffer size") 1235 self.buffer_size = buffer_size 1236 self._write_buf = bytearray() 1237 self._write_lock = Lock() 1238 1239 def writable(self): 1240 return self.raw.writable() 1241 1242 def write(self, b): 1243 if isinstance(b, str): 1244 raise TypeError("can't write str to binary stream") 1245 with self._write_lock: 1246 if self.closed: 1247 raise ValueError("write to closed file") 1248 # XXX we can implement some more tricks to try and avoid 1249 # partial writes 1250 if len(self._write_buf) > self.buffer_size: 1251 # We're full, so let's pre-flush the buffer. (This may 1252 # raise BlockingIOError with characters_written == 0.) 1253 self._flush_unlocked() 1254 before = len(self._write_buf) 1255 self._write_buf.extend(b) 1256 written = len(self._write_buf) - before 1257 if len(self._write_buf) > self.buffer_size: 1258 try: 1259 self._flush_unlocked() 1260 except BlockingIOError as e: 1261 if len(self._write_buf) > self.buffer_size: 1262 # We've hit the buffer_size. We have to accept a partial 1263 # write and cut back our buffer. 1264 overage = len(self._write_buf) - self.buffer_size 1265 written -= overage 1266 self._write_buf = self._write_buf[:self.buffer_size] 1267 raise BlockingIOError(e.errno, e.strerror, written) 1268 return written 1269 1270 def truncate(self, pos=None): 1271 with self._write_lock: 1272 self._flush_unlocked() 1273 if pos is None: 1274 pos = self.raw.tell() 1275 return self.raw.truncate(pos) 1276 1277 def flush(self): 1278 with self._write_lock: 1279 self._flush_unlocked() 1280 1281 def _flush_unlocked(self): 1282 if self.closed: 1283 raise ValueError("flush on closed file") 1284 while self._write_buf: 1285 try: 1286 n = self.raw.write(self._write_buf) 1287 except BlockingIOError: 1288 raise RuntimeError("self.raw should implement RawIOBase: it " 1289 "should not raise BlockingIOError") 1290 if n is None: 1291 raise BlockingIOError( 1292 errno.EAGAIN, 1293 "write could not complete without blocking", 0) 1294 if n > len(self._write_buf) or n < 0: 1295 raise OSError("write() returned incorrect number of bytes") 1296 del self._write_buf[:n] 1297 1298 def tell(self): 1299 return _BufferedIOMixin.tell(self) + len(self._write_buf) 1300 1301 def seek(self, pos, whence=0): 1302 if whence not in valid_seek_flags: 1303 raise ValueError("invalid whence value") 1304 with self._write_lock: 1305 self._flush_unlocked() 1306 return _BufferedIOMixin.seek(self, pos, whence) 1307 1308 def close(self): 1309 with self._write_lock: 1310 if self.raw is None or self.closed: 1311 return 1312 # We have to release the lock and call self.flush() (which will 1313 # probably just re-take the lock) in case flush has been overridden in 1314 # a subclass or the user set self.flush to something. This is the same 1315 # behavior as the C implementation. 1316 try: 1317 # may raise BlockingIOError or BrokenPipeError etc 1318 self.flush() 1319 finally: 1320 with self._write_lock: 1321 self.raw.close() 1322 1323 1324class BufferedRWPair(BufferedIOBase): 1325 1326 """A buffered reader and writer object together. 1327 1328 A buffered reader object and buffered writer object put together to 1329 form a sequential IO object that can read and write. This is typically 1330 used with a socket or two-way pipe. 1331 1332 reader and writer are RawIOBase objects that are readable and 1333 writeable respectively. If the buffer_size is omitted it defaults to 1334 DEFAULT_BUFFER_SIZE. 1335 """ 1336 1337 # XXX The usefulness of this (compared to having two separate IO 1338 # objects) is questionable. 1339 1340 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE): 1341 """Constructor. 1342 1343 The arguments are two RawIO instances. 1344 """ 1345 if not reader.readable(): 1346 raise OSError('"reader" argument must be readable.') 1347 1348 if not writer.writable(): 1349 raise OSError('"writer" argument must be writable.') 1350 1351 self.reader = BufferedReader(reader, buffer_size) 1352 self.writer = BufferedWriter(writer, buffer_size) 1353 1354 def read(self, size=-1): 1355 if size is None: 1356 size = -1 1357 return self.reader.read(size) 1358 1359 def readinto(self, b): 1360 return self.reader.readinto(b) 1361 1362 def write(self, b): 1363 return self.writer.write(b) 1364 1365 def peek(self, size=0): 1366 return self.reader.peek(size) 1367 1368 def read1(self, size=-1): 1369 return self.reader.read1(size) 1370 1371 def readinto1(self, b): 1372 return self.reader.readinto1(b) 1373 1374 def readable(self): 1375 return self.reader.readable() 1376 1377 def writable(self): 1378 return self.writer.writable() 1379 1380 def flush(self): 1381 return self.writer.flush() 1382 1383 def close(self): 1384 try: 1385 self.writer.close() 1386 finally: 1387 self.reader.close() 1388 1389 def isatty(self): 1390 return self.reader.isatty() or self.writer.isatty() 1391 1392 @property 1393 def closed(self): 1394 return self.writer.closed 1395 1396 1397class BufferedRandom(BufferedWriter, BufferedReader): 1398 1399 """A buffered interface to random access streams. 1400 1401 The constructor creates a reader and writer for a seekable stream, 1402 raw, given in the first argument. If the buffer_size is omitted it 1403 defaults to DEFAULT_BUFFER_SIZE. 1404 """ 1405 1406 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1407 raw._checkSeekable() 1408 BufferedReader.__init__(self, raw, buffer_size) 1409 BufferedWriter.__init__(self, raw, buffer_size) 1410 1411 def seek(self, pos, whence=0): 1412 if whence not in valid_seek_flags: 1413 raise ValueError("invalid whence value") 1414 self.flush() 1415 if self._read_buf: 1416 # Undo read ahead. 1417 with self._read_lock: 1418 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1419 # First do the raw seek, then empty the read buffer, so that 1420 # if the raw seek fails, we don't lose buffered data forever. 1421 pos = self.raw.seek(pos, whence) 1422 with self._read_lock: 1423 self._reset_read_buf() 1424 if pos < 0: 1425 raise OSError("seek() returned invalid position") 1426 return pos 1427 1428 def tell(self): 1429 if self._write_buf: 1430 return BufferedWriter.tell(self) 1431 else: 1432 return BufferedReader.tell(self) 1433 1434 def truncate(self, pos=None): 1435 if pos is None: 1436 pos = self.tell() 1437 # Use seek to flush the read buffer. 1438 return BufferedWriter.truncate(self, pos) 1439 1440 def read(self, size=None): 1441 if size is None: 1442 size = -1 1443 self.flush() 1444 return BufferedReader.read(self, size) 1445 1446 def readinto(self, b): 1447 self.flush() 1448 return BufferedReader.readinto(self, b) 1449 1450 def peek(self, size=0): 1451 self.flush() 1452 return BufferedReader.peek(self, size) 1453 1454 def read1(self, size=-1): 1455 self.flush() 1456 return BufferedReader.read1(self, size) 1457 1458 def readinto1(self, b): 1459 self.flush() 1460 return BufferedReader.readinto1(self, b) 1461 1462 def write(self, b): 1463 if self._read_buf: 1464 # Undo readahead 1465 with self._read_lock: 1466 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1467 self._reset_read_buf() 1468 return BufferedWriter.write(self, b) 1469 1470 1471class FileIO(RawIOBase): 1472 _fd = -1 1473 _created = False 1474 _readable = False 1475 _writable = False 1476 _appending = False 1477 _seekable = None 1478 _closefd = True 1479 1480 def __init__(self, file, mode='r', closefd=True, opener=None): 1481 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading, 1482 writing, exclusive creation or appending. The file will be created if it 1483 doesn't exist when opened for writing or appending; it will be truncated 1484 when opened for writing. A FileExistsError will be raised if it already 1485 exists when opened for creating. Opening a file for creating implies 1486 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode 1487 to allow simultaneous reading and writing. A custom opener can be used by 1488 passing a callable as *opener*. The underlying file descriptor for the file 1489 object is then obtained by calling opener with (*name*, *flags*). 1490 *opener* must return an open file descriptor (passing os.open as *opener* 1491 results in functionality similar to passing None). 1492 """ 1493 if self._fd >= 0: 1494 # Have to close the existing file first. 1495 try: 1496 if self._closefd: 1497 os.close(self._fd) 1498 finally: 1499 self._fd = -1 1500 1501 if isinstance(file, float): 1502 raise TypeError('integer argument expected, got float') 1503 if isinstance(file, int): 1504 fd = file 1505 if fd < 0: 1506 raise ValueError('negative file descriptor') 1507 else: 1508 fd = -1 1509 1510 if not isinstance(mode, str): 1511 raise TypeError('invalid mode: %s' % (mode,)) 1512 if not set(mode) <= set('xrwab+'): 1513 raise ValueError('invalid mode: %s' % (mode,)) 1514 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1: 1515 raise ValueError('Must have exactly one of create/read/write/append ' 1516 'mode and at most one plus') 1517 1518 if 'x' in mode: 1519 self._created = True 1520 self._writable = True 1521 flags = os.O_EXCL | os.O_CREAT 1522 elif 'r' in mode: 1523 self._readable = True 1524 flags = 0 1525 elif 'w' in mode: 1526 self._writable = True 1527 flags = os.O_CREAT | os.O_TRUNC 1528 elif 'a' in mode: 1529 self._writable = True 1530 self._appending = True 1531 flags = os.O_APPEND | os.O_CREAT 1532 1533 if '+' in mode: 1534 self._readable = True 1535 self._writable = True 1536 1537 if self._readable and self._writable: 1538 flags |= os.O_RDWR 1539 elif self._readable: 1540 flags |= os.O_RDONLY 1541 else: 1542 flags |= os.O_WRONLY 1543 1544 flags |= getattr(os, 'O_BINARY', 0) 1545 1546 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or 1547 getattr(os, 'O_CLOEXEC', 0)) 1548 flags |= noinherit_flag 1549 1550 owned_fd = None 1551 try: 1552 if fd < 0: 1553 if not closefd: 1554 raise ValueError('Cannot use closefd=False with file name') 1555 if opener is None: 1556 fd = os.open(file, flags, 0o666) 1557 else: 1558 fd = opener(file, flags) 1559 if not isinstance(fd, int): 1560 raise TypeError('expected integer from opener') 1561 if fd < 0: 1562 raise OSError('Negative file descriptor') 1563 owned_fd = fd 1564 if not noinherit_flag: 1565 os.set_inheritable(fd, False) 1566 1567 self._closefd = closefd 1568 fdfstat = os.fstat(fd) 1569 try: 1570 if stat.S_ISDIR(fdfstat.st_mode): 1571 raise IsADirectoryError(errno.EISDIR, 1572 os.strerror(errno.EISDIR), file) 1573 except AttributeError: 1574 # Ignore the AttribueError if stat.S_ISDIR or errno.EISDIR 1575 # don't exist. 1576 pass 1577 self._blksize = getattr(fdfstat, 'st_blksize', 0) 1578 if self._blksize <= 1: 1579 self._blksize = DEFAULT_BUFFER_SIZE 1580 1581 if _setmode: 1582 # don't translate newlines (\r\n <=> \n) 1583 _setmode(fd, os.O_BINARY) 1584 1585 self.name = file 1586 if self._appending: 1587 # For consistent behaviour, we explicitly seek to the 1588 # end of file (otherwise, it might be done only on the 1589 # first write()). 1590 try: 1591 os.lseek(fd, 0, SEEK_END) 1592 except OSError as e: 1593 if e.errno != errno.ESPIPE: 1594 raise 1595 except: 1596 if owned_fd is not None: 1597 os.close(owned_fd) 1598 raise 1599 self._fd = fd 1600 1601 def __del__(self): 1602 if self._fd >= 0 and self._closefd and not self.closed: 1603 import warnings 1604 warnings.warn('unclosed file %r' % (self,), ResourceWarning, 1605 stacklevel=2, source=self) 1606 self.close() 1607 1608 def __getstate__(self): 1609 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") 1610 1611 def __repr__(self): 1612 class_name = '%s.%s' % (self.__class__.__module__, 1613 self.__class__.__qualname__) 1614 if self.closed: 1615 return '<%s [closed]>' % class_name 1616 try: 1617 name = self.name 1618 except AttributeError: 1619 return ('<%s fd=%d mode=%r closefd=%r>' % 1620 (class_name, self._fd, self.mode, self._closefd)) 1621 else: 1622 return ('<%s name=%r mode=%r closefd=%r>' % 1623 (class_name, name, self.mode, self._closefd)) 1624 1625 def _checkReadable(self): 1626 if not self._readable: 1627 raise UnsupportedOperation('File not open for reading') 1628 1629 def _checkWritable(self, msg=None): 1630 if not self._writable: 1631 raise UnsupportedOperation('File not open for writing') 1632 1633 def read(self, size=None): 1634 """Read at most size bytes, returned as bytes. 1635 1636 Only makes one system call, so less data may be returned than requested 1637 In non-blocking mode, returns None if no data is available. 1638 Return an empty bytes object at EOF. 1639 """ 1640 self._checkClosed() 1641 self._checkReadable() 1642 if size is None or size < 0: 1643 return self.readall() 1644 try: 1645 return os.read(self._fd, size) 1646 except BlockingIOError: 1647 return None 1648 1649 def readall(self): 1650 """Read all data from the file, returned as bytes. 1651 1652 In non-blocking mode, returns as much as is immediately available, 1653 or None if no data is available. Return an empty bytes object at EOF. 1654 """ 1655 self._checkClosed() 1656 self._checkReadable() 1657 bufsize = DEFAULT_BUFFER_SIZE 1658 try: 1659 pos = os.lseek(self._fd, 0, SEEK_CUR) 1660 end = os.fstat(self._fd).st_size 1661 if end >= pos: 1662 bufsize = end - pos + 1 1663 except OSError: 1664 pass 1665 1666 result = bytearray() 1667 while True: 1668 if len(result) >= bufsize: 1669 bufsize = len(result) 1670 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE) 1671 n = bufsize - len(result) 1672 try: 1673 chunk = os.read(self._fd, n) 1674 except BlockingIOError: 1675 if result: 1676 break 1677 return None 1678 if not chunk: # reached the end of the file 1679 break 1680 result += chunk 1681 1682 return bytes(result) 1683 1684 def readinto(self, b): 1685 """Same as RawIOBase.readinto().""" 1686 m = memoryview(b).cast('B') 1687 data = self.read(len(m)) 1688 n = len(data) 1689 m[:n] = data 1690 return n 1691 1692 def write(self, b): 1693 """Write bytes b to file, return number written. 1694 1695 Only makes one system call, so not all of the data may be written. 1696 The number of bytes actually written is returned. In non-blocking mode, 1697 returns None if the write would block. 1698 """ 1699 self._checkClosed() 1700 self._checkWritable() 1701 try: 1702 return os.write(self._fd, b) 1703 except BlockingIOError: 1704 return None 1705 1706 def seek(self, pos, whence=SEEK_SET): 1707 """Move to new file position. 1708 1709 Argument offset is a byte count. Optional argument whence defaults to 1710 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values 1711 are SEEK_CUR or 1 (move relative to current position, positive or negative), 1712 and SEEK_END or 2 (move relative to end of file, usually negative, although 1713 many platforms allow seeking beyond the end of a file). 1714 1715 Note that not all file objects are seekable. 1716 """ 1717 if isinstance(pos, float): 1718 raise TypeError('an integer is required') 1719 self._checkClosed() 1720 return os.lseek(self._fd, pos, whence) 1721 1722 def tell(self): 1723 """tell() -> int. Current file position. 1724 1725 Can raise OSError for non seekable files.""" 1726 self._checkClosed() 1727 return os.lseek(self._fd, 0, SEEK_CUR) 1728 1729 def truncate(self, size=None): 1730 """Truncate the file to at most size bytes. 1731 1732 Size defaults to the current file position, as returned by tell(). 1733 The current file position is changed to the value of size. 1734 """ 1735 self._checkClosed() 1736 self._checkWritable() 1737 if size is None: 1738 size = self.tell() 1739 os.ftruncate(self._fd, size) 1740 return size 1741 1742 def close(self): 1743 """Close the file. 1744 1745 A closed file cannot be used for further I/O operations. close() may be 1746 called more than once without error. 1747 """ 1748 if not self.closed: 1749 try: 1750 if self._closefd: 1751 os.close(self._fd) 1752 finally: 1753 super().close() 1754 1755 def seekable(self): 1756 """True if file supports random-access.""" 1757 self._checkClosed() 1758 if self._seekable is None: 1759 try: 1760 self.tell() 1761 except OSError: 1762 self._seekable = False 1763 else: 1764 self._seekable = True 1765 return self._seekable 1766 1767 def readable(self): 1768 """True if file was opened in a read mode.""" 1769 self._checkClosed() 1770 return self._readable 1771 1772 def writable(self): 1773 """True if file was opened in a write mode.""" 1774 self._checkClosed() 1775 return self._writable 1776 1777 def fileno(self): 1778 """Return the underlying file descriptor (an integer).""" 1779 self._checkClosed() 1780 return self._fd 1781 1782 def isatty(self): 1783 """True if the file is connected to a TTY device.""" 1784 self._checkClosed() 1785 return os.isatty(self._fd) 1786 1787 @property 1788 def closefd(self): 1789 """True if the file descriptor will be closed by close().""" 1790 return self._closefd 1791 1792 @property 1793 def mode(self): 1794 """String giving the file mode""" 1795 if self._created: 1796 if self._readable: 1797 return 'xb+' 1798 else: 1799 return 'xb' 1800 elif self._appending: 1801 if self._readable: 1802 return 'ab+' 1803 else: 1804 return 'ab' 1805 elif self._readable: 1806 if self._writable: 1807 return 'rb+' 1808 else: 1809 return 'rb' 1810 else: 1811 return 'wb' 1812 1813 1814class TextIOBase(IOBase): 1815 1816 """Base class for text I/O. 1817 1818 This class provides a character and line based interface to stream 1819 I/O. There is no public constructor. 1820 """ 1821 1822 def read(self, size=-1): 1823 """Read at most size characters from stream, where size is an int. 1824 1825 Read from underlying buffer until we have size characters or we hit EOF. 1826 If size is negative or omitted, read until EOF. 1827 1828 Returns a string. 1829 """ 1830 self._unsupported("read") 1831 1832 def write(self, s): 1833 """Write string s to stream and returning an int.""" 1834 self._unsupported("write") 1835 1836 def truncate(self, pos=None): 1837 """Truncate size to pos, where pos is an int.""" 1838 self._unsupported("truncate") 1839 1840 def readline(self): 1841 """Read until newline or EOF. 1842 1843 Returns an empty string if EOF is hit immediately. 1844 """ 1845 self._unsupported("readline") 1846 1847 def detach(self): 1848 """ 1849 Separate the underlying buffer from the TextIOBase and return it. 1850 1851 After the underlying buffer has been detached, the TextIO is in an 1852 unusable state. 1853 """ 1854 self._unsupported("detach") 1855 1856 @property 1857 def encoding(self): 1858 """Subclasses should override.""" 1859 return None 1860 1861 @property 1862 def newlines(self): 1863 """Line endings translated so far. 1864 1865 Only line endings translated during reading are considered. 1866 1867 Subclasses should override. 1868 """ 1869 return None 1870 1871 @property 1872 def errors(self): 1873 """Error setting of the decoder or encoder. 1874 1875 Subclasses should override.""" 1876 return None 1877 1878io.TextIOBase.register(TextIOBase) 1879 1880 1881class IncrementalNewlineDecoder(codecs.IncrementalDecoder): 1882 r"""Codec used when reading a file in universal newlines mode. It wraps 1883 another incremental decoder, translating \r\n and \r into \n. It also 1884 records the types of newlines encountered. When used with 1885 translate=False, it ensures that the newline sequence is returned in 1886 one piece. 1887 """ 1888 def __init__(self, decoder, translate, errors='strict'): 1889 codecs.IncrementalDecoder.__init__(self, errors=errors) 1890 self.translate = translate 1891 self.decoder = decoder 1892 self.seennl = 0 1893 self.pendingcr = False 1894 1895 def decode(self, input, final=False): 1896 # decode input (with the eventual \r from a previous pass) 1897 if self.decoder is None: 1898 output = input 1899 else: 1900 output = self.decoder.decode(input, final=final) 1901 if self.pendingcr and (output or final): 1902 output = "\r" + output 1903 self.pendingcr = False 1904 1905 # retain last \r even when not translating data: 1906 # then readline() is sure to get \r\n in one pass 1907 if output.endswith("\r") and not final: 1908 output = output[:-1] 1909 self.pendingcr = True 1910 1911 # Record which newlines are read 1912 crlf = output.count('\r\n') 1913 cr = output.count('\r') - crlf 1914 lf = output.count('\n') - crlf 1915 self.seennl |= (lf and self._LF) | (cr and self._CR) \ 1916 | (crlf and self._CRLF) 1917 1918 if self.translate: 1919 if crlf: 1920 output = output.replace("\r\n", "\n") 1921 if cr: 1922 output = output.replace("\r", "\n") 1923 1924 return output 1925 1926 def getstate(self): 1927 if self.decoder is None: 1928 buf = b"" 1929 flag = 0 1930 else: 1931 buf, flag = self.decoder.getstate() 1932 flag <<= 1 1933 if self.pendingcr: 1934 flag |= 1 1935 return buf, flag 1936 1937 def setstate(self, state): 1938 buf, flag = state 1939 self.pendingcr = bool(flag & 1) 1940 if self.decoder is not None: 1941 self.decoder.setstate((buf, flag >> 1)) 1942 1943 def reset(self): 1944 self.seennl = 0 1945 self.pendingcr = False 1946 if self.decoder is not None: 1947 self.decoder.reset() 1948 1949 _LF = 1 1950 _CR = 2 1951 _CRLF = 4 1952 1953 @property 1954 def newlines(self): 1955 return (None, 1956 "\n", 1957 "\r", 1958 ("\r", "\n"), 1959 "\r\n", 1960 ("\n", "\r\n"), 1961 ("\r", "\r\n"), 1962 ("\r", "\n", "\r\n") 1963 )[self.seennl] 1964 1965 1966class TextIOWrapper(TextIOBase): 1967 1968 r"""Character and line based layer over a BufferedIOBase object, buffer. 1969 1970 encoding gives the name of the encoding that the stream will be 1971 decoded or encoded with. It defaults to locale.getpreferredencoding(False). 1972 1973 errors determines the strictness of encoding and decoding (see the 1974 codecs.register) and defaults to "strict". 1975 1976 newline can be None, '', '\n', '\r', or '\r\n'. It controls the 1977 handling of line endings. If it is None, universal newlines is 1978 enabled. With this enabled, on input, the lines endings '\n', '\r', 1979 or '\r\n' are translated to '\n' before being returned to the 1980 caller. Conversely, on output, '\n' is translated to the system 1981 default line separator, os.linesep. If newline is any other of its 1982 legal values, that newline becomes the newline when the file is read 1983 and it is returned untranslated. On output, '\n' is converted to the 1984 newline. 1985 1986 If line_buffering is True, a call to flush is implied when a call to 1987 write contains a newline character. 1988 """ 1989 1990 _CHUNK_SIZE = 2048 1991 1992 # Initialize _buffer as soon as possible since it's used by __del__() 1993 # which calls close() 1994 _buffer = None 1995 1996 # The write_through argument has no effect here since this 1997 # implementation always writes through. The argument is present only 1998 # so that the signature can match the signature of the C version. 1999 def __init__(self, buffer, encoding=None, errors=None, newline=None, 2000 line_buffering=False, write_through=False): 2001 self._check_newline(newline) 2002 if encoding is None: 2003 try: 2004 encoding = os.device_encoding(buffer.fileno()) 2005 except (AttributeError, UnsupportedOperation): 2006 pass 2007 if encoding is None: 2008 try: 2009 import locale 2010 except ImportError: 2011 # Importing locale may fail if Python is being built 2012 encoding = "ascii" 2013 else: 2014 encoding = locale.getpreferredencoding(False) 2015 2016 if not isinstance(encoding, str): 2017 raise ValueError("invalid encoding: %r" % encoding) 2018 2019 if not codecs.lookup(encoding)._is_text_encoding: 2020 msg = ("%r is not a text encoding; " 2021 "use codecs.open() to handle arbitrary codecs") 2022 raise LookupError(msg % encoding) 2023 2024 if errors is None: 2025 errors = "strict" 2026 else: 2027 if not isinstance(errors, str): 2028 raise ValueError("invalid errors: %r" % errors) 2029 2030 self._buffer = buffer 2031 self._decoded_chars = '' # buffer for text returned from decoder 2032 self._decoded_chars_used = 0 # offset into _decoded_chars for read() 2033 self._snapshot = None # info for reconstructing decoder state 2034 self._seekable = self._telling = self.buffer.seekable() 2035 self._has_read1 = hasattr(self.buffer, 'read1') 2036 self._configure(encoding, errors, newline, 2037 line_buffering, write_through) 2038 2039 def _check_newline(self, newline): 2040 if newline is not None and not isinstance(newline, str): 2041 raise TypeError("illegal newline type: %r" % (type(newline),)) 2042 if newline not in (None, "", "\n", "\r", "\r\n"): 2043 raise ValueError("illegal newline value: %r" % (newline,)) 2044 2045 def _configure(self, encoding=None, errors=None, newline=None, 2046 line_buffering=False, write_through=False): 2047 self._encoding = encoding 2048 self._errors = errors 2049 self._encoder = None 2050 self._decoder = None 2051 self._b2cratio = 0.0 2052 2053 self._readuniversal = not newline 2054 self._readtranslate = newline is None 2055 self._readnl = newline 2056 self._writetranslate = newline != '' 2057 self._writenl = newline or os.linesep 2058 2059 self._line_buffering = line_buffering 2060 self._write_through = write_through 2061 2062 # don't write a BOM in the middle of a file 2063 if self._seekable and self.writable(): 2064 position = self.buffer.tell() 2065 if position != 0: 2066 try: 2067 self._get_encoder().setstate(0) 2068 except LookupError: 2069 # Sometimes the encoder doesn't exist 2070 pass 2071 2072 # self._snapshot is either None, or a tuple (dec_flags, next_input) 2073 # where dec_flags is the second (integer) item of the decoder state 2074 # and next_input is the chunk of input bytes that comes next after the 2075 # snapshot point. We use this to reconstruct decoder states in tell(). 2076 2077 # Naming convention: 2078 # - "bytes_..." for integer variables that count input bytes 2079 # - "chars_..." for integer variables that count decoded characters 2080 2081 def __repr__(self): 2082 result = "<{}.{}".format(self.__class__.__module__, 2083 self.__class__.__qualname__) 2084 try: 2085 name = self.name 2086 except AttributeError: 2087 pass 2088 else: 2089 result += " name={0!r}".format(name) 2090 try: 2091 mode = self.mode 2092 except AttributeError: 2093 pass 2094 else: 2095 result += " mode={0!r}".format(mode) 2096 return result + " encoding={0!r}>".format(self.encoding) 2097 2098 @property 2099 def encoding(self): 2100 return self._encoding 2101 2102 @property 2103 def errors(self): 2104 return self._errors 2105 2106 @property 2107 def line_buffering(self): 2108 return self._line_buffering 2109 2110 @property 2111 def write_through(self): 2112 return self._write_through 2113 2114 @property 2115 def buffer(self): 2116 return self._buffer 2117 2118 def reconfigure(self, *, 2119 encoding=None, errors=None, newline=Ellipsis, 2120 line_buffering=None, write_through=None): 2121 """Reconfigure the text stream with new parameters. 2122 2123 This also flushes the stream. 2124 """ 2125 if (self._decoder is not None 2126 and (encoding is not None or errors is not None 2127 or newline is not Ellipsis)): 2128 raise UnsupportedOperation( 2129 "It is not possible to set the encoding or newline of stream " 2130 "after the first read") 2131 2132 if errors is None: 2133 if encoding is None: 2134 errors = self._errors 2135 else: 2136 errors = 'strict' 2137 elif not isinstance(errors, str): 2138 raise TypeError("invalid errors: %r" % errors) 2139 2140 if encoding is None: 2141 encoding = self._encoding 2142 else: 2143 if not isinstance(encoding, str): 2144 raise TypeError("invalid encoding: %r" % encoding) 2145 2146 if newline is Ellipsis: 2147 newline = self._readnl 2148 self._check_newline(newline) 2149 2150 if line_buffering is None: 2151 line_buffering = self.line_buffering 2152 if write_through is None: 2153 write_through = self.write_through 2154 2155 self.flush() 2156 self._configure(encoding, errors, newline, 2157 line_buffering, write_through) 2158 2159 def seekable(self): 2160 if self.closed: 2161 raise ValueError("I/O operation on closed file.") 2162 return self._seekable 2163 2164 def readable(self): 2165 return self.buffer.readable() 2166 2167 def writable(self): 2168 return self.buffer.writable() 2169 2170 def flush(self): 2171 self.buffer.flush() 2172 self._telling = self._seekable 2173 2174 def close(self): 2175 if self.buffer is not None and not self.closed: 2176 try: 2177 self.flush() 2178 finally: 2179 self.buffer.close() 2180 2181 @property 2182 def closed(self): 2183 return self.buffer.closed 2184 2185 @property 2186 def name(self): 2187 return self.buffer.name 2188 2189 def fileno(self): 2190 return self.buffer.fileno() 2191 2192 def isatty(self): 2193 return self.buffer.isatty() 2194 2195 def write(self, s): 2196 'Write data, where s is a str' 2197 if self.closed: 2198 raise ValueError("write to closed file") 2199 if not isinstance(s, str): 2200 raise TypeError("can't write %s to text stream" % 2201 s.__class__.__name__) 2202 length = len(s) 2203 haslf = (self._writetranslate or self._line_buffering) and "\n" in s 2204 if haslf and self._writetranslate and self._writenl != "\n": 2205 s = s.replace("\n", self._writenl) 2206 encoder = self._encoder or self._get_encoder() 2207 # XXX What if we were just reading? 2208 b = encoder.encode(s) 2209 self.buffer.write(b) 2210 if self._line_buffering and (haslf or "\r" in s): 2211 self.flush() 2212 self._set_decoded_chars('') 2213 self._snapshot = None 2214 if self._decoder: 2215 self._decoder.reset() 2216 return length 2217 2218 def _get_encoder(self): 2219 make_encoder = codecs.getincrementalencoder(self._encoding) 2220 self._encoder = make_encoder(self._errors) 2221 return self._encoder 2222 2223 def _get_decoder(self): 2224 make_decoder = codecs.getincrementaldecoder(self._encoding) 2225 decoder = make_decoder(self._errors) 2226 if self._readuniversal: 2227 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) 2228 self._decoder = decoder 2229 return decoder 2230 2231 # The following three methods implement an ADT for _decoded_chars. 2232 # Text returned from the decoder is buffered here until the client 2233 # requests it by calling our read() or readline() method. 2234 def _set_decoded_chars(self, chars): 2235 """Set the _decoded_chars buffer.""" 2236 self._decoded_chars = chars 2237 self._decoded_chars_used = 0 2238 2239 def _get_decoded_chars(self, n=None): 2240 """Advance into the _decoded_chars buffer.""" 2241 offset = self._decoded_chars_used 2242 if n is None: 2243 chars = self._decoded_chars[offset:] 2244 else: 2245 chars = self._decoded_chars[offset:offset + n] 2246 self._decoded_chars_used += len(chars) 2247 return chars 2248 2249 def _rewind_decoded_chars(self, n): 2250 """Rewind the _decoded_chars buffer.""" 2251 if self._decoded_chars_used < n: 2252 raise AssertionError("rewind decoded_chars out of bounds") 2253 self._decoded_chars_used -= n 2254 2255 def _read_chunk(self): 2256 """ 2257 Read and decode the next chunk of data from the BufferedReader. 2258 """ 2259 2260 # The return value is True unless EOF was reached. The decoded 2261 # string is placed in self._decoded_chars (replacing its previous 2262 # value). The entire input chunk is sent to the decoder, though 2263 # some of it may remain buffered in the decoder, yet to be 2264 # converted. 2265 2266 if self._decoder is None: 2267 raise ValueError("no decoder") 2268 2269 if self._telling: 2270 # To prepare for tell(), we need to snapshot a point in the 2271 # file where the decoder's input buffer is empty. 2272 2273 dec_buffer, dec_flags = self._decoder.getstate() 2274 # Given this, we know there was a valid snapshot point 2275 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). 2276 2277 # Read a chunk, decode it, and put the result in self._decoded_chars. 2278 if self._has_read1: 2279 input_chunk = self.buffer.read1(self._CHUNK_SIZE) 2280 else: 2281 input_chunk = self.buffer.read(self._CHUNK_SIZE) 2282 eof = not input_chunk 2283 decoded_chars = self._decoder.decode(input_chunk, eof) 2284 self._set_decoded_chars(decoded_chars) 2285 if decoded_chars: 2286 self._b2cratio = len(input_chunk) / len(self._decoded_chars) 2287 else: 2288 self._b2cratio = 0.0 2289 2290 if self._telling: 2291 # At the snapshot point, len(dec_buffer) bytes before the read, 2292 # the next input to be decoded is dec_buffer + input_chunk. 2293 self._snapshot = (dec_flags, dec_buffer + input_chunk) 2294 2295 return not eof 2296 2297 def _pack_cookie(self, position, dec_flags=0, 2298 bytes_to_feed=0, need_eof=0, chars_to_skip=0): 2299 # The meaning of a tell() cookie is: seek to position, set the 2300 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them 2301 # into the decoder with need_eof as the EOF flag, then skip 2302 # chars_to_skip characters of the decoded result. For most simple 2303 # decoders, tell() will often just give a byte offset in the file. 2304 return (position | (dec_flags<<64) | (bytes_to_feed<<128) | 2305 (chars_to_skip<<192) | bool(need_eof)<<256) 2306 2307 def _unpack_cookie(self, bigint): 2308 rest, position = divmod(bigint, 1<<64) 2309 rest, dec_flags = divmod(rest, 1<<64) 2310 rest, bytes_to_feed = divmod(rest, 1<<64) 2311 need_eof, chars_to_skip = divmod(rest, 1<<64) 2312 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip 2313 2314 def tell(self): 2315 if not self._seekable: 2316 raise UnsupportedOperation("underlying stream is not seekable") 2317 if not self._telling: 2318 raise OSError("telling position disabled by next() call") 2319 self.flush() 2320 position = self.buffer.tell() 2321 decoder = self._decoder 2322 if decoder is None or self._snapshot is None: 2323 if self._decoded_chars: 2324 # This should never happen. 2325 raise AssertionError("pending decoded text") 2326 return position 2327 2328 # Skip backward to the snapshot point (see _read_chunk). 2329 dec_flags, next_input = self._snapshot 2330 position -= len(next_input) 2331 2332 # How many decoded characters have been used up since the snapshot? 2333 chars_to_skip = self._decoded_chars_used 2334 if chars_to_skip == 0: 2335 # We haven't moved from the snapshot point. 2336 return self._pack_cookie(position, dec_flags) 2337 2338 # Starting from the snapshot position, we will walk the decoder 2339 # forward until it gives us enough decoded characters. 2340 saved_state = decoder.getstate() 2341 try: 2342 # Fast search for an acceptable start point, close to our 2343 # current pos. 2344 # Rationale: calling decoder.decode() has a large overhead 2345 # regardless of chunk size; we want the number of such calls to 2346 # be O(1) in most situations (common decoders, sensible input). 2347 # Actually, it will be exactly 1 for fixed-size codecs (all 2348 # 8-bit codecs, also UTF-16 and UTF-32). 2349 skip_bytes = int(self._b2cratio * chars_to_skip) 2350 skip_back = 1 2351 assert skip_bytes <= len(next_input) 2352 while skip_bytes > 0: 2353 decoder.setstate((b'', dec_flags)) 2354 # Decode up to temptative start point 2355 n = len(decoder.decode(next_input[:skip_bytes])) 2356 if n <= chars_to_skip: 2357 b, d = decoder.getstate() 2358 if not b: 2359 # Before pos and no bytes buffered in decoder => OK 2360 dec_flags = d 2361 chars_to_skip -= n 2362 break 2363 # Skip back by buffered amount and reset heuristic 2364 skip_bytes -= len(b) 2365 skip_back = 1 2366 else: 2367 # We're too far ahead, skip back a bit 2368 skip_bytes -= skip_back 2369 skip_back = skip_back * 2 2370 else: 2371 skip_bytes = 0 2372 decoder.setstate((b'', dec_flags)) 2373 2374 # Note our initial start point. 2375 start_pos = position + skip_bytes 2376 start_flags = dec_flags 2377 if chars_to_skip == 0: 2378 # We haven't moved from the start point. 2379 return self._pack_cookie(start_pos, start_flags) 2380 2381 # Feed the decoder one byte at a time. As we go, note the 2382 # nearest "safe start point" before the current location 2383 # (a point where the decoder has nothing buffered, so seek() 2384 # can safely start from there and advance to this location). 2385 bytes_fed = 0 2386 need_eof = 0 2387 # Chars decoded since `start_pos` 2388 chars_decoded = 0 2389 for i in range(skip_bytes, len(next_input)): 2390 bytes_fed += 1 2391 chars_decoded += len(decoder.decode(next_input[i:i+1])) 2392 dec_buffer, dec_flags = decoder.getstate() 2393 if not dec_buffer and chars_decoded <= chars_to_skip: 2394 # Decoder buffer is empty, so this is a safe start point. 2395 start_pos += bytes_fed 2396 chars_to_skip -= chars_decoded 2397 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 2398 if chars_decoded >= chars_to_skip: 2399 break 2400 else: 2401 # We didn't get enough decoded data; signal EOF to get more. 2402 chars_decoded += len(decoder.decode(b'', final=True)) 2403 need_eof = 1 2404 if chars_decoded < chars_to_skip: 2405 raise OSError("can't reconstruct logical file position") 2406 2407 # The returned cookie corresponds to the last safe start point. 2408 return self._pack_cookie( 2409 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) 2410 finally: 2411 decoder.setstate(saved_state) 2412 2413 def truncate(self, pos=None): 2414 self.flush() 2415 if pos is None: 2416 pos = self.tell() 2417 return self.buffer.truncate(pos) 2418 2419 def detach(self): 2420 if self.buffer is None: 2421 raise ValueError("buffer is already detached") 2422 self.flush() 2423 buffer = self._buffer 2424 self._buffer = None 2425 return buffer 2426 2427 def seek(self, cookie, whence=0): 2428 def _reset_encoder(position): 2429 """Reset the encoder (merely useful for proper BOM handling)""" 2430 try: 2431 encoder = self._encoder or self._get_encoder() 2432 except LookupError: 2433 # Sometimes the encoder doesn't exist 2434 pass 2435 else: 2436 if position != 0: 2437 encoder.setstate(0) 2438 else: 2439 encoder.reset() 2440 2441 if self.closed: 2442 raise ValueError("tell on closed file") 2443 if not self._seekable: 2444 raise UnsupportedOperation("underlying stream is not seekable") 2445 if whence == SEEK_CUR: 2446 if cookie != 0: 2447 raise UnsupportedOperation("can't do nonzero cur-relative seeks") 2448 # Seeking to the current position should attempt to 2449 # sync the underlying buffer with the current position. 2450 whence = 0 2451 cookie = self.tell() 2452 elif whence == SEEK_END: 2453 if cookie != 0: 2454 raise UnsupportedOperation("can't do nonzero end-relative seeks") 2455 self.flush() 2456 position = self.buffer.seek(0, whence) 2457 self._set_decoded_chars('') 2458 self._snapshot = None 2459 if self._decoder: 2460 self._decoder.reset() 2461 _reset_encoder(position) 2462 return position 2463 if whence != 0: 2464 raise ValueError("unsupported whence (%r)" % (whence,)) 2465 if cookie < 0: 2466 raise ValueError("negative seek position %r" % (cookie,)) 2467 self.flush() 2468 2469 # The strategy of seek() is to go back to the safe start point 2470 # and replay the effect of read(chars_to_skip) from there. 2471 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ 2472 self._unpack_cookie(cookie) 2473 2474 # Seek back to the safe start point. 2475 self.buffer.seek(start_pos) 2476 self._set_decoded_chars('') 2477 self._snapshot = None 2478 2479 # Restore the decoder to its state from the safe start point. 2480 if cookie == 0 and self._decoder: 2481 self._decoder.reset() 2482 elif self._decoder or dec_flags or chars_to_skip: 2483 self._decoder = self._decoder or self._get_decoder() 2484 self._decoder.setstate((b'', dec_flags)) 2485 self._snapshot = (dec_flags, b'') 2486 2487 if chars_to_skip: 2488 # Just like _read_chunk, feed the decoder and save a snapshot. 2489 input_chunk = self.buffer.read(bytes_to_feed) 2490 self._set_decoded_chars( 2491 self._decoder.decode(input_chunk, need_eof)) 2492 self._snapshot = (dec_flags, input_chunk) 2493 2494 # Skip chars_to_skip of the decoded characters. 2495 if len(self._decoded_chars) < chars_to_skip: 2496 raise OSError("can't restore logical file position") 2497 self._decoded_chars_used = chars_to_skip 2498 2499 _reset_encoder(cookie) 2500 return cookie 2501 2502 def read(self, size=None): 2503 self._checkReadable() 2504 if size is None: 2505 size = -1 2506 else: 2507 try: 2508 size_index = size.__index__ 2509 except AttributeError: 2510 raise TypeError(f"{size!r} is not an integer") 2511 else: 2512 size = size_index() 2513 decoder = self._decoder or self._get_decoder() 2514 if size < 0: 2515 # Read everything. 2516 result = (self._get_decoded_chars() + 2517 decoder.decode(self.buffer.read(), final=True)) 2518 self._set_decoded_chars('') 2519 self._snapshot = None 2520 return result 2521 else: 2522 # Keep reading chunks until we have size characters to return. 2523 eof = False 2524 result = self._get_decoded_chars(size) 2525 while len(result) < size and not eof: 2526 eof = not self._read_chunk() 2527 result += self._get_decoded_chars(size - len(result)) 2528 return result 2529 2530 def __next__(self): 2531 self._telling = False 2532 line = self.readline() 2533 if not line: 2534 self._snapshot = None 2535 self._telling = self._seekable 2536 raise StopIteration 2537 return line 2538 2539 def readline(self, size=None): 2540 if self.closed: 2541 raise ValueError("read from closed file") 2542 if size is None: 2543 size = -1 2544 else: 2545 try: 2546 size_index = size.__index__ 2547 except AttributeError: 2548 raise TypeError(f"{size!r} is not an integer") 2549 else: 2550 size = size_index() 2551 2552 # Grab all the decoded text (we will rewind any extra bits later). 2553 line = self._get_decoded_chars() 2554 2555 start = 0 2556 # Make the decoder if it doesn't already exist. 2557 if not self._decoder: 2558 self._get_decoder() 2559 2560 pos = endpos = None 2561 while True: 2562 if self._readtranslate: 2563 # Newlines are already translated, only search for \n 2564 pos = line.find('\n', start) 2565 if pos >= 0: 2566 endpos = pos + 1 2567 break 2568 else: 2569 start = len(line) 2570 2571 elif self._readuniversal: 2572 # Universal newline search. Find any of \r, \r\n, \n 2573 # The decoder ensures that \r\n are not split in two pieces 2574 2575 # In C we'd look for these in parallel of course. 2576 nlpos = line.find("\n", start) 2577 crpos = line.find("\r", start) 2578 if crpos == -1: 2579 if nlpos == -1: 2580 # Nothing found 2581 start = len(line) 2582 else: 2583 # Found \n 2584 endpos = nlpos + 1 2585 break 2586 elif nlpos == -1: 2587 # Found lone \r 2588 endpos = crpos + 1 2589 break 2590 elif nlpos < crpos: 2591 # Found \n 2592 endpos = nlpos + 1 2593 break 2594 elif nlpos == crpos + 1: 2595 # Found \r\n 2596 endpos = crpos + 2 2597 break 2598 else: 2599 # Found \r 2600 endpos = crpos + 1 2601 break 2602 else: 2603 # non-universal 2604 pos = line.find(self._readnl) 2605 if pos >= 0: 2606 endpos = pos + len(self._readnl) 2607 break 2608 2609 if size >= 0 and len(line) >= size: 2610 endpos = size # reached length size 2611 break 2612 2613 # No line ending seen yet - get more data' 2614 while self._read_chunk(): 2615 if self._decoded_chars: 2616 break 2617 if self._decoded_chars: 2618 line += self._get_decoded_chars() 2619 else: 2620 # end of file 2621 self._set_decoded_chars('') 2622 self._snapshot = None 2623 return line 2624 2625 if size >= 0 and endpos > size: 2626 endpos = size # don't exceed size 2627 2628 # Rewind _decoded_chars to just after the line ending we found. 2629 self._rewind_decoded_chars(len(line) - endpos) 2630 return line[:endpos] 2631 2632 @property 2633 def newlines(self): 2634 return self._decoder.newlines if self._decoder else None 2635 2636 2637class StringIO(TextIOWrapper): 2638 """Text I/O implementation using an in-memory buffer. 2639 2640 The initial_value argument sets the value of object. The newline 2641 argument is like the one of TextIOWrapper's constructor. 2642 """ 2643 2644 def __init__(self, initial_value="", newline="\n"): 2645 super(StringIO, self).__init__(BytesIO(), 2646 encoding="utf-8", 2647 errors="surrogatepass", 2648 newline=newline) 2649 # Issue #5645: make universal newlines semantics the same as in the 2650 # C version, even under Windows. 2651 if newline is None: 2652 self._writetranslate = False 2653 if initial_value is not None: 2654 if not isinstance(initial_value, str): 2655 raise TypeError("initial_value must be str or None, not {0}" 2656 .format(type(initial_value).__name__)) 2657 self.write(initial_value) 2658 self.seek(0) 2659 2660 def getvalue(self): 2661 self.flush() 2662 decoder = self._decoder or self._get_decoder() 2663 old_state = decoder.getstate() 2664 decoder.reset() 2665 try: 2666 return decoder.decode(self.buffer.getvalue(), final=True) 2667 finally: 2668 decoder.setstate(old_state) 2669 2670 def __repr__(self): 2671 # TextIOWrapper tells the encoding in its repr. In StringIO, 2672 # that's an implementation detail. 2673 return object.__repr__(self) 2674 2675 @property 2676 def errors(self): 2677 return None 2678 2679 @property 2680 def encoding(self): 2681 return None 2682 2683 def detach(self): 2684 # This doesn't make sense on StringIO. 2685 self._unsupported("detach") 2686