1""" 2Python implementation of the io module. 3""" 4 5import os 6import abc 7import codecs 8import errno 9import stat 10import sys 11# Import _thread instead of threading to reduce startup cost 12from _thread import allocate_lock as Lock 13if sys.platform in {'win32', 'cygwin'}: 14 from msvcrt import setmode as _setmode 15else: 16 _setmode = None 17 18import io 19from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END) 20 21valid_seek_flags = {0, 1, 2} # Hardwired values 22if hasattr(os, 'SEEK_HOLE') : 23 valid_seek_flags.add(os.SEEK_HOLE) 24 valid_seek_flags.add(os.SEEK_DATA) 25 26# open() uses st_blksize whenever we can 27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes 28 29# NOTE: Base classes defined here are registered with the "official" ABCs 30# defined in io.py. We don't use real inheritance though, because we don't want 31# to inherit the C implementations. 32 33# Rebind for compatibility 34BlockingIOError = BlockingIOError 35 36# Does io.IOBase finalizer log the exception if the close() method fails? 37# The exception is ignored silently by default in release build. 38_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) 39# Does open() check its 'errors' argument? 40_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE 41 42 43def text_encoding(encoding, stacklevel=2): 44 """ 45 A helper function to choose the text encoding. 46 47 When encoding is not None, just return it. 48 Otherwise, return the default text encoding (i.e. "locale"). 49 50 This function emits an EncodingWarning if *encoding* is None and 51 sys.flags.warn_default_encoding is true. 52 53 This can be used in APIs with an encoding=None parameter 54 that pass it to TextIOWrapper or open. 55 However, please consider using encoding="utf-8" for new APIs. 56 """ 57 if encoding is None: 58 encoding = "locale" 59 if sys.flags.warn_default_encoding: 60 import warnings 61 warnings.warn("'encoding' argument not specified.", 62 EncodingWarning, stacklevel + 1) 63 return encoding 64 65 66# Wrapper for builtins.open 67# 68# Trick so that open() won't become a bound method when stored 69# as a class variable (as dbm.dumb does). 70# 71# See init_set_builtins_open() in Python/pylifecycle.c. 72@staticmethod 73def open(file, mode="r", buffering=-1, encoding=None, errors=None, 74 newline=None, closefd=True, opener=None): 75 76 r"""Open file and return a stream. Raise OSError upon failure. 77 78 file is either a text or byte string giving the name (and the path 79 if the file isn't in the current working directory) of the file to 80 be opened or an integer file descriptor of the file to be 81 wrapped. (If a file descriptor is given, it is closed when the 82 returned I/O object is closed, unless closefd is set to False.) 83 84 mode is an optional string that specifies the mode in which the file is 85 opened. It defaults to 'r' which means open for reading in text mode. Other 86 common values are 'w' for writing (truncating the file if it already 87 exists), 'x' for exclusive creation of a new file, and 'a' for appending 88 (which on some Unix systems, means that all writes append to the end of the 89 file regardless of the current seek position). In text mode, if encoding is 90 not specified the encoding used is platform dependent. (For reading and 91 writing raw bytes use binary mode and leave encoding unspecified.) The 92 available modes are: 93 94 ========= =============================================================== 95 Character Meaning 96 --------- --------------------------------------------------------------- 97 'r' open for reading (default) 98 'w' open for writing, truncating the file first 99 'x' create a new file and open it for writing 100 'a' open for writing, appending to the end of the file if it exists 101 'b' binary mode 102 't' text mode (default) 103 '+' open a disk file for updating (reading and writing) 104 'U' universal newline mode (deprecated) 105 ========= =============================================================== 106 107 The default mode is 'rt' (open for reading text). For binary random 108 access, the mode 'w+b' opens and truncates the file to 0 bytes, while 109 'r+b' opens the file without truncation. The 'x' mode implies 'w' and 110 raises an `FileExistsError` if the file already exists. 111 112 Python distinguishes between files opened in binary and text modes, 113 even when the underlying operating system doesn't. Files opened in 114 binary mode (appending 'b' to the mode argument) return contents as 115 bytes objects without any decoding. In text mode (the default, or when 116 't' is appended to the mode argument), the contents of the file are 117 returned as strings, the bytes having been first decoded using a 118 platform-dependent encoding or using the specified encoding if given. 119 120 'U' mode is deprecated and will raise an exception in future versions 121 of Python. It has no effect in Python 3. Use newline to control 122 universal newlines mode. 123 124 buffering is an optional integer used to set the buffering policy. 125 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select 126 line buffering (only usable in text mode), and an integer > 1 to indicate 127 the size of a fixed-size chunk buffer. When no buffering argument is 128 given, the default buffering policy works as follows: 129 130 * Binary files are buffered in fixed-size chunks; the size of the buffer 131 is chosen using a heuristic trying to determine the underlying device's 132 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. 133 On many systems, the buffer will typically be 4096 or 8192 bytes long. 134 135 * "Interactive" text files (files for which isatty() returns True) 136 use line buffering. Other text files use the policy described above 137 for binary files. 138 139 encoding is the str name of the encoding used to decode or encode the 140 file. This should only be used in text mode. The default encoding is 141 platform dependent, but any encoding supported by Python can be 142 passed. See the codecs module for the list of supported encodings. 143 144 errors is an optional string that specifies how encoding errors are to 145 be handled---this argument should not be used in binary mode. Pass 146 'strict' to raise a ValueError exception if there is an encoding error 147 (the default of None has the same effect), or pass 'ignore' to ignore 148 errors. (Note that ignoring encoding errors can lead to data loss.) 149 See the documentation for codecs.register for a list of the permitted 150 encoding error strings. 151 152 newline is a string controlling how universal newlines works (it only 153 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works 154 as follows: 155 156 * On input, if newline is None, universal newlines mode is 157 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and 158 these are translated into '\n' before being returned to the 159 caller. If it is '', universal newline mode is enabled, but line 160 endings are returned to the caller untranslated. If it has any of 161 the other legal values, input lines are only terminated by the given 162 string, and the line ending is returned to the caller untranslated. 163 164 * On output, if newline is None, any '\n' characters written are 165 translated to the system default line separator, os.linesep. If 166 newline is '', no translation takes place. If newline is any of the 167 other legal values, any '\n' characters written are translated to 168 the given string. 169 170 closedfd is a bool. If closefd is False, the underlying file descriptor will 171 be kept open when the file is closed. This does not work when a file name is 172 given and must be True in that case. 173 174 The newly created file is non-inheritable. 175 176 A custom opener can be used by passing a callable as *opener*. The 177 underlying file descriptor for the file object is then obtained by calling 178 *opener* with (*file*, *flags*). *opener* must return an open file 179 descriptor (passing os.open as *opener* results in functionality similar to 180 passing None). 181 182 open() returns a file object whose type depends on the mode, and 183 through which the standard file operations such as reading and writing 184 are performed. When open() is used to open a file in a text mode ('w', 185 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open 186 a file in a binary mode, the returned class varies: in read binary 187 mode, it returns a BufferedReader; in write binary and append binary 188 modes, it returns a BufferedWriter, and in read/write mode, it returns 189 a BufferedRandom. 190 191 It is also possible to use a string or bytearray as a file for both 192 reading and writing. For strings StringIO can be used like a file 193 opened in a text mode, and for bytes a BytesIO can be used like a file 194 opened in a binary mode. 195 """ 196 if not isinstance(file, int): 197 file = os.fspath(file) 198 if not isinstance(file, (str, bytes, int)): 199 raise TypeError("invalid file: %r" % file) 200 if not isinstance(mode, str): 201 raise TypeError("invalid mode: %r" % mode) 202 if not isinstance(buffering, int): 203 raise TypeError("invalid buffering: %r" % buffering) 204 if encoding is not None and not isinstance(encoding, str): 205 raise TypeError("invalid encoding: %r" % encoding) 206 if errors is not None and not isinstance(errors, str): 207 raise TypeError("invalid errors: %r" % errors) 208 modes = set(mode) 209 if modes - set("axrwb+tU") or len(mode) > len(modes): 210 raise ValueError("invalid mode: %r" % mode) 211 creating = "x" in modes 212 reading = "r" in modes 213 writing = "w" in modes 214 appending = "a" in modes 215 updating = "+" in modes 216 text = "t" in modes 217 binary = "b" in modes 218 if "U" in modes: 219 if creating or writing or appending or updating: 220 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'") 221 import warnings 222 warnings.warn("'U' mode is deprecated", 223 DeprecationWarning, 2) 224 reading = True 225 if text and binary: 226 raise ValueError("can't have text and binary mode at once") 227 if creating + reading + writing + appending > 1: 228 raise ValueError("can't have read/write/append mode at once") 229 if not (creating or reading or writing or appending): 230 raise ValueError("must have exactly one of read/write/append mode") 231 if binary and encoding is not None: 232 raise ValueError("binary mode doesn't take an encoding argument") 233 if binary and errors is not None: 234 raise ValueError("binary mode doesn't take an errors argument") 235 if binary and newline is not None: 236 raise ValueError("binary mode doesn't take a newline argument") 237 if binary and buffering == 1: 238 import warnings 239 warnings.warn("line buffering (buffering=1) isn't supported in binary " 240 "mode, the default buffer size will be used", 241 RuntimeWarning, 2) 242 raw = FileIO(file, 243 (creating and "x" or "") + 244 (reading and "r" or "") + 245 (writing and "w" or "") + 246 (appending and "a" or "") + 247 (updating and "+" or ""), 248 closefd, opener=opener) 249 result = raw 250 try: 251 line_buffering = False 252 if buffering == 1 or buffering < 0 and raw.isatty(): 253 buffering = -1 254 line_buffering = True 255 if buffering < 0: 256 buffering = DEFAULT_BUFFER_SIZE 257 try: 258 bs = os.fstat(raw.fileno()).st_blksize 259 except (OSError, AttributeError): 260 pass 261 else: 262 if bs > 1: 263 buffering = bs 264 if buffering < 0: 265 raise ValueError("invalid buffering size") 266 if buffering == 0: 267 if binary: 268 return result 269 raise ValueError("can't have unbuffered text I/O") 270 if updating: 271 buffer = BufferedRandom(raw, buffering) 272 elif creating or writing or appending: 273 buffer = BufferedWriter(raw, buffering) 274 elif reading: 275 buffer = BufferedReader(raw, buffering) 276 else: 277 raise ValueError("unknown mode: %r" % mode) 278 result = buffer 279 if binary: 280 return result 281 encoding = text_encoding(encoding) 282 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) 283 result = text 284 text.mode = mode 285 return result 286 except: 287 result.close() 288 raise 289 290# Define a default pure-Python implementation for open_code() 291# that does not allow hooks. Warn on first use. Defined for tests. 292def _open_code_with_warning(path): 293 """Opens the provided file with mode ``'rb'``. This function 294 should be used when the intent is to treat the contents as 295 executable code. 296 297 ``path`` should be an absolute path. 298 299 When supported by the runtime, this function can be hooked 300 in order to allow embedders more control over code files. 301 This functionality is not supported on the current runtime. 302 """ 303 import warnings 304 warnings.warn("_pyio.open_code() may not be using hooks", 305 RuntimeWarning, 2) 306 return open(path, "rb") 307 308try: 309 open_code = io.open_code 310except AttributeError: 311 open_code = _open_code_with_warning 312 313 314def __getattr__(name): 315 if name == "OpenWrapper": 316 # bpo-43680: Until Python 3.9, _pyio.open was not a static method and 317 # builtins.open was set to OpenWrapper to not become a bound method 318 # when set to a class variable. _io.open is a built-in function whereas 319 # _pyio.open is a Python function. In Python 3.10, _pyio.open() is now 320 # a static method, and builtins.open() is now io.open(). 321 import warnings 322 warnings.warn('OpenWrapper is deprecated, use open instead', 323 DeprecationWarning, stacklevel=2) 324 global OpenWrapper 325 OpenWrapper = open 326 return OpenWrapper 327 raise AttributeError(name) 328 329 330# In normal operation, both `UnsupportedOperation`s should be bound to the 331# same object. 332try: 333 UnsupportedOperation = io.UnsupportedOperation 334except AttributeError: 335 class UnsupportedOperation(OSError, ValueError): 336 pass 337 338 339class IOBase(metaclass=abc.ABCMeta): 340 341 """The abstract base class for all I/O classes. 342 343 This class provides dummy implementations for many methods that 344 derived classes can override selectively; the default implementations 345 represent a file that cannot be read, written or seeked. 346 347 Even though IOBase does not declare read or write because 348 their signatures will vary, implementations and clients should 349 consider those methods part of the interface. Also, implementations 350 may raise UnsupportedOperation when operations they do not support are 351 called. 352 353 The basic type used for binary data read from or written to a file is 354 bytes. Other bytes-like objects are accepted as method arguments too. 355 Text I/O classes work with str data. 356 357 Note that calling any method (even inquiries) on a closed stream is 358 undefined. Implementations may raise OSError in this case. 359 360 IOBase (and its subclasses) support the iterator protocol, meaning 361 that an IOBase object can be iterated over yielding the lines in a 362 stream. 363 364 IOBase also supports the :keyword:`with` statement. In this example, 365 fp is closed after the suite of the with statement is complete: 366 367 with open('spam.txt', 'r') as fp: 368 fp.write('Spam and eggs!') 369 """ 370 371 ### Internal ### 372 373 def _unsupported(self, name): 374 """Internal: raise an OSError exception for unsupported operations.""" 375 raise UnsupportedOperation("%s.%s() not supported" % 376 (self.__class__.__name__, name)) 377 378 ### Positioning ### 379 380 def seek(self, pos, whence=0): 381 """Change stream position. 382 383 Change the stream position to byte offset pos. Argument pos is 384 interpreted relative to the position indicated by whence. Values 385 for whence are ints: 386 387 * 0 -- start of stream (the default); offset should be zero or positive 388 * 1 -- current stream position; offset may be negative 389 * 2 -- end of stream; offset is usually negative 390 Some operating systems / file systems could provide additional values. 391 392 Return an int indicating the new absolute position. 393 """ 394 self._unsupported("seek") 395 396 def tell(self): 397 """Return an int indicating the current stream position.""" 398 return self.seek(0, 1) 399 400 def truncate(self, pos=None): 401 """Truncate file to size bytes. 402 403 Size defaults to the current IO position as reported by tell(). Return 404 the new size. 405 """ 406 self._unsupported("truncate") 407 408 ### Flush and close ### 409 410 def flush(self): 411 """Flush write buffers, if applicable. 412 413 This is not implemented for read-only and non-blocking streams. 414 """ 415 self._checkClosed() 416 # XXX Should this return the number of bytes written??? 417 418 __closed = False 419 420 def close(self): 421 """Flush and close the IO object. 422 423 This method has no effect if the file is already closed. 424 """ 425 if not self.__closed: 426 try: 427 self.flush() 428 finally: 429 self.__closed = True 430 431 def __del__(self): 432 """Destructor. Calls close().""" 433 try: 434 closed = self.closed 435 except AttributeError: 436 # If getting closed fails, then the object is probably 437 # in an unusable state, so ignore. 438 return 439 440 if closed: 441 return 442 443 if _IOBASE_EMITS_UNRAISABLE: 444 self.close() 445 else: 446 # The try/except block is in case this is called at program 447 # exit time, when it's possible that globals have already been 448 # deleted, and then the close() call might fail. Since 449 # there's nothing we can do about such failures and they annoy 450 # the end users, we suppress the traceback. 451 try: 452 self.close() 453 except: 454 pass 455 456 ### Inquiries ### 457 458 def seekable(self): 459 """Return a bool indicating whether object supports random access. 460 461 If False, seek(), tell() and truncate() will raise OSError. 462 This method may need to do a test seek(). 463 """ 464 return False 465 466 def _checkSeekable(self, msg=None): 467 """Internal: raise UnsupportedOperation if file is not seekable 468 """ 469 if not self.seekable(): 470 raise UnsupportedOperation("File or stream is not seekable." 471 if msg is None else msg) 472 473 def readable(self): 474 """Return a bool indicating whether object was opened for reading. 475 476 If False, read() will raise OSError. 477 """ 478 return False 479 480 def _checkReadable(self, msg=None): 481 """Internal: raise UnsupportedOperation if file is not readable 482 """ 483 if not self.readable(): 484 raise UnsupportedOperation("File or stream is not readable." 485 if msg is None else msg) 486 487 def writable(self): 488 """Return a bool indicating whether object was opened for writing. 489 490 If False, write() and truncate() will raise OSError. 491 """ 492 return False 493 494 def _checkWritable(self, msg=None): 495 """Internal: raise UnsupportedOperation if file is not writable 496 """ 497 if not self.writable(): 498 raise UnsupportedOperation("File or stream is not writable." 499 if msg is None else msg) 500 501 @property 502 def closed(self): 503 """closed: bool. True iff the file has been closed. 504 505 For backwards compatibility, this is a property, not a predicate. 506 """ 507 return self.__closed 508 509 def _checkClosed(self, msg=None): 510 """Internal: raise a ValueError if file is closed 511 """ 512 if self.closed: 513 raise ValueError("I/O operation on closed file." 514 if msg is None else msg) 515 516 ### Context manager ### 517 518 def __enter__(self): # That's a forward reference 519 """Context management protocol. Returns self (an instance of IOBase).""" 520 self._checkClosed() 521 return self 522 523 def __exit__(self, *args): 524 """Context management protocol. Calls close()""" 525 self.close() 526 527 ### Lower-level APIs ### 528 529 # XXX Should these be present even if unimplemented? 530 531 def fileno(self): 532 """Returns underlying file descriptor (an int) if one exists. 533 534 An OSError is raised if the IO object does not use a file descriptor. 535 """ 536 self._unsupported("fileno") 537 538 def isatty(self): 539 """Return a bool indicating whether this is an 'interactive' stream. 540 541 Return False if it can't be determined. 542 """ 543 self._checkClosed() 544 return False 545 546 ### Readline[s] and writelines ### 547 548 def readline(self, size=-1): 549 r"""Read and return a line of bytes from the stream. 550 551 If size is specified, at most size bytes will be read. 552 Size should be an int. 553 554 The line terminator is always b'\n' for binary files; for text 555 files, the newlines argument to open can be used to select the line 556 terminator(s) recognized. 557 """ 558 # For backwards compatibility, a (slowish) readline(). 559 if hasattr(self, "peek"): 560 def nreadahead(): 561 readahead = self.peek(1) 562 if not readahead: 563 return 1 564 n = (readahead.find(b"\n") + 1) or len(readahead) 565 if size >= 0: 566 n = min(n, size) 567 return n 568 else: 569 def nreadahead(): 570 return 1 571 if size is None: 572 size = -1 573 else: 574 try: 575 size_index = size.__index__ 576 except AttributeError: 577 raise TypeError(f"{size!r} is not an integer") 578 else: 579 size = size_index() 580 res = bytearray() 581 while size < 0 or len(res) < size: 582 b = self.read(nreadahead()) 583 if not b: 584 break 585 res += b 586 if res.endswith(b"\n"): 587 break 588 return bytes(res) 589 590 def __iter__(self): 591 self._checkClosed() 592 return self 593 594 def __next__(self): 595 line = self.readline() 596 if not line: 597 raise StopIteration 598 return line 599 600 def readlines(self, hint=None): 601 """Return a list of lines from the stream. 602 603 hint can be specified to control the number of lines read: no more 604 lines will be read if the total size (in bytes/characters) of all 605 lines so far exceeds hint. 606 """ 607 if hint is None or hint <= 0: 608 return list(self) 609 n = 0 610 lines = [] 611 for line in self: 612 lines.append(line) 613 n += len(line) 614 if n >= hint: 615 break 616 return lines 617 618 def writelines(self, lines): 619 """Write a list of lines to the stream. 620 621 Line separators are not added, so it is usual for each of the lines 622 provided to have a line separator at the end. 623 """ 624 self._checkClosed() 625 for line in lines: 626 self.write(line) 627 628io.IOBase.register(IOBase) 629 630 631class RawIOBase(IOBase): 632 633 """Base class for raw binary I/O.""" 634 635 # The read() method is implemented by calling readinto(); derived 636 # classes that want to support read() only need to implement 637 # readinto() as a primitive operation. In general, readinto() can be 638 # more efficient than read(). 639 640 # (It would be tempting to also provide an implementation of 641 # readinto() in terms of read(), in case the latter is a more suitable 642 # primitive operation, but that would lead to nasty recursion in case 643 # a subclass doesn't implement either.) 644 645 def read(self, size=-1): 646 """Read and return up to size bytes, where size is an int. 647 648 Returns an empty bytes object on EOF, or None if the object is 649 set not to block and has no data to read. 650 """ 651 if size is None: 652 size = -1 653 if size < 0: 654 return self.readall() 655 b = bytearray(size.__index__()) 656 n = self.readinto(b) 657 if n is None: 658 return None 659 del b[n:] 660 return bytes(b) 661 662 def readall(self): 663 """Read until EOF, using multiple read() call.""" 664 res = bytearray() 665 while True: 666 data = self.read(DEFAULT_BUFFER_SIZE) 667 if not data: 668 break 669 res += data 670 if res: 671 return bytes(res) 672 else: 673 # b'' or None 674 return data 675 676 def readinto(self, b): 677 """Read bytes into a pre-allocated bytes-like object b. 678 679 Returns an int representing the number of bytes read (0 for EOF), or 680 None if the object is set not to block and has no data to read. 681 """ 682 self._unsupported("readinto") 683 684 def write(self, b): 685 """Write the given buffer to the IO stream. 686 687 Returns the number of bytes written, which may be less than the 688 length of b in bytes. 689 """ 690 self._unsupported("write") 691 692io.RawIOBase.register(RawIOBase) 693from _io import FileIO 694RawIOBase.register(FileIO) 695 696 697class BufferedIOBase(IOBase): 698 699 """Base class for buffered IO objects. 700 701 The main difference with RawIOBase is that the read() method 702 supports omitting the size argument, and does not have a default 703 implementation that defers to readinto(). 704 705 In addition, read(), readinto() and write() may raise 706 BlockingIOError if the underlying raw stream is in non-blocking 707 mode and not ready; unlike their raw counterparts, they will never 708 return None. 709 710 A typical implementation should not inherit from a RawIOBase 711 implementation, but wrap one. 712 """ 713 714 def read(self, size=-1): 715 """Read and return up to size bytes, where size is an int. 716 717 If the argument is omitted, None, or negative, reads and 718 returns all data until EOF. 719 720 If the argument is positive, and the underlying raw stream is 721 not 'interactive', multiple raw reads may be issued to satisfy 722 the byte count (unless EOF is reached first). But for 723 interactive raw streams (XXX and for pipes?), at most one raw 724 read will be issued, and a short result does not imply that 725 EOF is imminent. 726 727 Returns an empty bytes array on EOF. 728 729 Raises BlockingIOError if the underlying raw stream has no 730 data at the moment. 731 """ 732 self._unsupported("read") 733 734 def read1(self, size=-1): 735 """Read up to size bytes with at most one read() system call, 736 where size is an int. 737 """ 738 self._unsupported("read1") 739 740 def readinto(self, b): 741 """Read bytes into a pre-allocated bytes-like object b. 742 743 Like read(), this may issue multiple reads to the underlying raw 744 stream, unless the latter is 'interactive'. 745 746 Returns an int representing the number of bytes read (0 for EOF). 747 748 Raises BlockingIOError if the underlying raw stream has no 749 data at the moment. 750 """ 751 752 return self._readinto(b, read1=False) 753 754 def readinto1(self, b): 755 """Read bytes into buffer *b*, using at most one system call 756 757 Returns an int representing the number of bytes read (0 for EOF). 758 759 Raises BlockingIOError if the underlying raw stream has no 760 data at the moment. 761 """ 762 763 return self._readinto(b, read1=True) 764 765 def _readinto(self, b, read1): 766 if not isinstance(b, memoryview): 767 b = memoryview(b) 768 b = b.cast('B') 769 770 if read1: 771 data = self.read1(len(b)) 772 else: 773 data = self.read(len(b)) 774 n = len(data) 775 776 b[:n] = data 777 778 return n 779 780 def write(self, b): 781 """Write the given bytes buffer to the IO stream. 782 783 Return the number of bytes written, which is always the length of b 784 in bytes. 785 786 Raises BlockingIOError if the buffer is full and the 787 underlying raw stream cannot accept more data at the moment. 788 """ 789 self._unsupported("write") 790 791 def detach(self): 792 """ 793 Separate the underlying raw stream from the buffer and return it. 794 795 After the raw stream has been detached, the buffer is in an unusable 796 state. 797 """ 798 self._unsupported("detach") 799 800io.BufferedIOBase.register(BufferedIOBase) 801 802 803class _BufferedIOMixin(BufferedIOBase): 804 805 """A mixin implementation of BufferedIOBase with an underlying raw stream. 806 807 This passes most requests on to the underlying raw stream. It 808 does *not* provide implementations of read(), readinto() or 809 write(). 810 """ 811 812 def __init__(self, raw): 813 self._raw = raw 814 815 ### Positioning ### 816 817 def seek(self, pos, whence=0): 818 new_position = self.raw.seek(pos, whence) 819 if new_position < 0: 820 raise OSError("seek() returned an invalid position") 821 return new_position 822 823 def tell(self): 824 pos = self.raw.tell() 825 if pos < 0: 826 raise OSError("tell() returned an invalid position") 827 return pos 828 829 def truncate(self, pos=None): 830 self._checkClosed() 831 self._checkWritable() 832 833 # Flush the stream. We're mixing buffered I/O with lower-level I/O, 834 # and a flush may be necessary to synch both views of the current 835 # file state. 836 self.flush() 837 838 if pos is None: 839 pos = self.tell() 840 # XXX: Should seek() be used, instead of passing the position 841 # XXX directly to truncate? 842 return self.raw.truncate(pos) 843 844 ### Flush and close ### 845 846 def flush(self): 847 if self.closed: 848 raise ValueError("flush on closed file") 849 self.raw.flush() 850 851 def close(self): 852 if self.raw is not None and not self.closed: 853 try: 854 # may raise BlockingIOError or BrokenPipeError etc 855 self.flush() 856 finally: 857 self.raw.close() 858 859 def detach(self): 860 if self.raw is None: 861 raise ValueError("raw stream already detached") 862 self.flush() 863 raw = self._raw 864 self._raw = None 865 return raw 866 867 ### Inquiries ### 868 869 def seekable(self): 870 return self.raw.seekable() 871 872 @property 873 def raw(self): 874 return self._raw 875 876 @property 877 def closed(self): 878 return self.raw.closed 879 880 @property 881 def name(self): 882 return self.raw.name 883 884 @property 885 def mode(self): 886 return self.raw.mode 887 888 def __getstate__(self): 889 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") 890 891 def __repr__(self): 892 modname = self.__class__.__module__ 893 clsname = self.__class__.__qualname__ 894 try: 895 name = self.name 896 except AttributeError: 897 return "<{}.{}>".format(modname, clsname) 898 else: 899 return "<{}.{} name={!r}>".format(modname, clsname, name) 900 901 ### Lower-level APIs ### 902 903 def fileno(self): 904 return self.raw.fileno() 905 906 def isatty(self): 907 return self.raw.isatty() 908 909 910class BytesIO(BufferedIOBase): 911 912 """Buffered I/O implementation using an in-memory bytes buffer.""" 913 914 # Initialize _buffer as soon as possible since it's used by __del__() 915 # which calls close() 916 _buffer = None 917 918 def __init__(self, initial_bytes=None): 919 buf = bytearray() 920 if initial_bytes is not None: 921 buf += initial_bytes 922 self._buffer = buf 923 self._pos = 0 924 925 def __getstate__(self): 926 if self.closed: 927 raise ValueError("__getstate__ on closed file") 928 return self.__dict__.copy() 929 930 def getvalue(self): 931 """Return the bytes value (contents) of the buffer 932 """ 933 if self.closed: 934 raise ValueError("getvalue on closed file") 935 return bytes(self._buffer) 936 937 def getbuffer(self): 938 """Return a readable and writable view of the buffer. 939 """ 940 if self.closed: 941 raise ValueError("getbuffer on closed file") 942 return memoryview(self._buffer) 943 944 def close(self): 945 if self._buffer is not None: 946 self._buffer.clear() 947 super().close() 948 949 def read(self, size=-1): 950 if self.closed: 951 raise ValueError("read from closed file") 952 if size is None: 953 size = -1 954 else: 955 try: 956 size_index = size.__index__ 957 except AttributeError: 958 raise TypeError(f"{size!r} is not an integer") 959 else: 960 size = size_index() 961 if size < 0: 962 size = len(self._buffer) 963 if len(self._buffer) <= self._pos: 964 return b"" 965 newpos = min(len(self._buffer), self._pos + size) 966 b = self._buffer[self._pos : newpos] 967 self._pos = newpos 968 return bytes(b) 969 970 def read1(self, size=-1): 971 """This is the same as read. 972 """ 973 return self.read(size) 974 975 def write(self, b): 976 if self.closed: 977 raise ValueError("write to closed file") 978 if isinstance(b, str): 979 raise TypeError("can't write str to binary stream") 980 with memoryview(b) as view: 981 n = view.nbytes # Size of any bytes-like object 982 if n == 0: 983 return 0 984 pos = self._pos 985 if pos > len(self._buffer): 986 # Inserts null bytes between the current end of the file 987 # and the new write position. 988 padding = b'\x00' * (pos - len(self._buffer)) 989 self._buffer += padding 990 self._buffer[pos:pos + n] = b 991 self._pos += n 992 return n 993 994 def seek(self, pos, whence=0): 995 if self.closed: 996 raise ValueError("seek on closed file") 997 try: 998 pos_index = pos.__index__ 999 except AttributeError: 1000 raise TypeError(f"{pos!r} is not an integer") 1001 else: 1002 pos = pos_index() 1003 if whence == 0: 1004 if pos < 0: 1005 raise ValueError("negative seek position %r" % (pos,)) 1006 self._pos = pos 1007 elif whence == 1: 1008 self._pos = max(0, self._pos + pos) 1009 elif whence == 2: 1010 self._pos = max(0, len(self._buffer) + pos) 1011 else: 1012 raise ValueError("unsupported whence value") 1013 return self._pos 1014 1015 def tell(self): 1016 if self.closed: 1017 raise ValueError("tell on closed file") 1018 return self._pos 1019 1020 def truncate(self, pos=None): 1021 if self.closed: 1022 raise ValueError("truncate on closed file") 1023 if pos is None: 1024 pos = self._pos 1025 else: 1026 try: 1027 pos_index = pos.__index__ 1028 except AttributeError: 1029 raise TypeError(f"{pos!r} is not an integer") 1030 else: 1031 pos = pos_index() 1032 if pos < 0: 1033 raise ValueError("negative truncate position %r" % (pos,)) 1034 del self._buffer[pos:] 1035 return pos 1036 1037 def readable(self): 1038 if self.closed: 1039 raise ValueError("I/O operation on closed file.") 1040 return True 1041 1042 def writable(self): 1043 if self.closed: 1044 raise ValueError("I/O operation on closed file.") 1045 return True 1046 1047 def seekable(self): 1048 if self.closed: 1049 raise ValueError("I/O operation on closed file.") 1050 return True 1051 1052 1053class BufferedReader(_BufferedIOMixin): 1054 1055 """BufferedReader(raw[, buffer_size]) 1056 1057 A buffer for a readable, sequential BaseRawIO object. 1058 1059 The constructor creates a BufferedReader for the given readable raw 1060 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE 1061 is used. 1062 """ 1063 1064 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1065 """Create a new buffered reader using the given readable raw IO object. 1066 """ 1067 if not raw.readable(): 1068 raise OSError('"raw" argument must be readable.') 1069 1070 _BufferedIOMixin.__init__(self, raw) 1071 if buffer_size <= 0: 1072 raise ValueError("invalid buffer size") 1073 self.buffer_size = buffer_size 1074 self._reset_read_buf() 1075 self._read_lock = Lock() 1076 1077 def readable(self): 1078 return self.raw.readable() 1079 1080 def _reset_read_buf(self): 1081 self._read_buf = b"" 1082 self._read_pos = 0 1083 1084 def read(self, size=None): 1085 """Read size bytes. 1086 1087 Returns exactly size bytes of data unless the underlying raw IO 1088 stream reaches EOF or if the call would block in non-blocking 1089 mode. If size is negative, read until EOF or until read() would 1090 block. 1091 """ 1092 if size is not None and size < -1: 1093 raise ValueError("invalid number of bytes to read") 1094 with self._read_lock: 1095 return self._read_unlocked(size) 1096 1097 def _read_unlocked(self, n=None): 1098 nodata_val = b"" 1099 empty_values = (b"", None) 1100 buf = self._read_buf 1101 pos = self._read_pos 1102 1103 # Special case for when the number of bytes to read is unspecified. 1104 if n is None or n == -1: 1105 self._reset_read_buf() 1106 if hasattr(self.raw, 'readall'): 1107 chunk = self.raw.readall() 1108 if chunk is None: 1109 return buf[pos:] or None 1110 else: 1111 return buf[pos:] + chunk 1112 chunks = [buf[pos:]] # Strip the consumed bytes. 1113 current_size = 0 1114 while True: 1115 # Read until EOF or until read() would block. 1116 chunk = self.raw.read() 1117 if chunk in empty_values: 1118 nodata_val = chunk 1119 break 1120 current_size += len(chunk) 1121 chunks.append(chunk) 1122 return b"".join(chunks) or nodata_val 1123 1124 # The number of bytes to read is specified, return at most n bytes. 1125 avail = len(buf) - pos # Length of the available buffered data. 1126 if n <= avail: 1127 # Fast path: the data to read is fully buffered. 1128 self._read_pos += n 1129 return buf[pos:pos+n] 1130 # Slow path: read from the stream until enough bytes are read, 1131 # or until an EOF occurs or until read() would block. 1132 chunks = [buf[pos:]] 1133 wanted = max(self.buffer_size, n) 1134 while avail < n: 1135 chunk = self.raw.read(wanted) 1136 if chunk in empty_values: 1137 nodata_val = chunk 1138 break 1139 avail += len(chunk) 1140 chunks.append(chunk) 1141 # n is more than avail only when an EOF occurred or when 1142 # read() would have blocked. 1143 n = min(n, avail) 1144 out = b"".join(chunks) 1145 self._read_buf = out[n:] # Save the extra data in the buffer. 1146 self._read_pos = 0 1147 return out[:n] if out else nodata_val 1148 1149 def peek(self, size=0): 1150 """Returns buffered bytes without advancing the position. 1151 1152 The argument indicates a desired minimal number of bytes; we 1153 do at most one raw read to satisfy it. We never return more 1154 than self.buffer_size. 1155 """ 1156 with self._read_lock: 1157 return self._peek_unlocked(size) 1158 1159 def _peek_unlocked(self, n=0): 1160 want = min(n, self.buffer_size) 1161 have = len(self._read_buf) - self._read_pos 1162 if have < want or have <= 0: 1163 to_read = self.buffer_size - have 1164 current = self.raw.read(to_read) 1165 if current: 1166 self._read_buf = self._read_buf[self._read_pos:] + current 1167 self._read_pos = 0 1168 return self._read_buf[self._read_pos:] 1169 1170 def read1(self, size=-1): 1171 """Reads up to size bytes, with at most one read() system call.""" 1172 # Returns up to size bytes. If at least one byte is buffered, we 1173 # only return buffered bytes. Otherwise, we do one raw read. 1174 if size < 0: 1175 size = self.buffer_size 1176 if size == 0: 1177 return b"" 1178 with self._read_lock: 1179 self._peek_unlocked(1) 1180 return self._read_unlocked( 1181 min(size, len(self._read_buf) - self._read_pos)) 1182 1183 # Implementing readinto() and readinto1() is not strictly necessary (we 1184 # could rely on the base class that provides an implementation in terms of 1185 # read() and read1()). We do it anyway to keep the _pyio implementation 1186 # similar to the io implementation (which implements the methods for 1187 # performance reasons). 1188 def _readinto(self, buf, read1): 1189 """Read data into *buf* with at most one system call.""" 1190 1191 # Need to create a memoryview object of type 'b', otherwise 1192 # we may not be able to assign bytes to it, and slicing it 1193 # would create a new object. 1194 if not isinstance(buf, memoryview): 1195 buf = memoryview(buf) 1196 if buf.nbytes == 0: 1197 return 0 1198 buf = buf.cast('B') 1199 1200 written = 0 1201 with self._read_lock: 1202 while written < len(buf): 1203 1204 # First try to read from internal buffer 1205 avail = min(len(self._read_buf) - self._read_pos, len(buf)) 1206 if avail: 1207 buf[written:written+avail] = \ 1208 self._read_buf[self._read_pos:self._read_pos+avail] 1209 self._read_pos += avail 1210 written += avail 1211 if written == len(buf): 1212 break 1213 1214 # If remaining space in callers buffer is larger than 1215 # internal buffer, read directly into callers buffer 1216 if len(buf) - written > self.buffer_size: 1217 n = self.raw.readinto(buf[written:]) 1218 if not n: 1219 break # eof 1220 written += n 1221 1222 # Otherwise refill internal buffer - unless we're 1223 # in read1 mode and already got some data 1224 elif not (read1 and written): 1225 if not self._peek_unlocked(1): 1226 break # eof 1227 1228 # In readinto1 mode, return as soon as we have some data 1229 if read1 and written: 1230 break 1231 1232 return written 1233 1234 def tell(self): 1235 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos 1236 1237 def seek(self, pos, whence=0): 1238 if whence not in valid_seek_flags: 1239 raise ValueError("invalid whence value") 1240 with self._read_lock: 1241 if whence == 1: 1242 pos -= len(self._read_buf) - self._read_pos 1243 pos = _BufferedIOMixin.seek(self, pos, whence) 1244 self._reset_read_buf() 1245 return pos 1246 1247class BufferedWriter(_BufferedIOMixin): 1248 1249 """A buffer for a writeable sequential RawIO object. 1250 1251 The constructor creates a BufferedWriter for the given writeable raw 1252 stream. If the buffer_size is not given, it defaults to 1253 DEFAULT_BUFFER_SIZE. 1254 """ 1255 1256 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1257 if not raw.writable(): 1258 raise OSError('"raw" argument must be writable.') 1259 1260 _BufferedIOMixin.__init__(self, raw) 1261 if buffer_size <= 0: 1262 raise ValueError("invalid buffer size") 1263 self.buffer_size = buffer_size 1264 self._write_buf = bytearray() 1265 self._write_lock = Lock() 1266 1267 def writable(self): 1268 return self.raw.writable() 1269 1270 def write(self, b): 1271 if isinstance(b, str): 1272 raise TypeError("can't write str to binary stream") 1273 with self._write_lock: 1274 if self.closed: 1275 raise ValueError("write to closed file") 1276 # XXX we can implement some more tricks to try and avoid 1277 # partial writes 1278 if len(self._write_buf) > self.buffer_size: 1279 # We're full, so let's pre-flush the buffer. (This may 1280 # raise BlockingIOError with characters_written == 0.) 1281 self._flush_unlocked() 1282 before = len(self._write_buf) 1283 self._write_buf.extend(b) 1284 written = len(self._write_buf) - before 1285 if len(self._write_buf) > self.buffer_size: 1286 try: 1287 self._flush_unlocked() 1288 except BlockingIOError as e: 1289 if len(self._write_buf) > self.buffer_size: 1290 # We've hit the buffer_size. We have to accept a partial 1291 # write and cut back our buffer. 1292 overage = len(self._write_buf) - self.buffer_size 1293 written -= overage 1294 self._write_buf = self._write_buf[:self.buffer_size] 1295 raise BlockingIOError(e.errno, e.strerror, written) 1296 return written 1297 1298 def truncate(self, pos=None): 1299 with self._write_lock: 1300 self._flush_unlocked() 1301 if pos is None: 1302 pos = self.raw.tell() 1303 return self.raw.truncate(pos) 1304 1305 def flush(self): 1306 with self._write_lock: 1307 self._flush_unlocked() 1308 1309 def _flush_unlocked(self): 1310 if self.closed: 1311 raise ValueError("flush on closed file") 1312 while self._write_buf: 1313 try: 1314 n = self.raw.write(self._write_buf) 1315 except BlockingIOError: 1316 raise RuntimeError("self.raw should implement RawIOBase: it " 1317 "should not raise BlockingIOError") 1318 if n is None: 1319 raise BlockingIOError( 1320 errno.EAGAIN, 1321 "write could not complete without blocking", 0) 1322 if n > len(self._write_buf) or n < 0: 1323 raise OSError("write() returned incorrect number of bytes") 1324 del self._write_buf[:n] 1325 1326 def tell(self): 1327 return _BufferedIOMixin.tell(self) + len(self._write_buf) 1328 1329 def seek(self, pos, whence=0): 1330 if whence not in valid_seek_flags: 1331 raise ValueError("invalid whence value") 1332 with self._write_lock: 1333 self._flush_unlocked() 1334 return _BufferedIOMixin.seek(self, pos, whence) 1335 1336 def close(self): 1337 with self._write_lock: 1338 if self.raw is None or self.closed: 1339 return 1340 # We have to release the lock and call self.flush() (which will 1341 # probably just re-take the lock) in case flush has been overridden in 1342 # a subclass or the user set self.flush to something. This is the same 1343 # behavior as the C implementation. 1344 try: 1345 # may raise BlockingIOError or BrokenPipeError etc 1346 self.flush() 1347 finally: 1348 with self._write_lock: 1349 self.raw.close() 1350 1351 1352class BufferedRWPair(BufferedIOBase): 1353 1354 """A buffered reader and writer object together. 1355 1356 A buffered reader object and buffered writer object put together to 1357 form a sequential IO object that can read and write. This is typically 1358 used with a socket or two-way pipe. 1359 1360 reader and writer are RawIOBase objects that are readable and 1361 writeable respectively. If the buffer_size is omitted it defaults to 1362 DEFAULT_BUFFER_SIZE. 1363 """ 1364 1365 # XXX The usefulness of this (compared to having two separate IO 1366 # objects) is questionable. 1367 1368 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE): 1369 """Constructor. 1370 1371 The arguments are two RawIO instances. 1372 """ 1373 if not reader.readable(): 1374 raise OSError('"reader" argument must be readable.') 1375 1376 if not writer.writable(): 1377 raise OSError('"writer" argument must be writable.') 1378 1379 self.reader = BufferedReader(reader, buffer_size) 1380 self.writer = BufferedWriter(writer, buffer_size) 1381 1382 def read(self, size=-1): 1383 if size is None: 1384 size = -1 1385 return self.reader.read(size) 1386 1387 def readinto(self, b): 1388 return self.reader.readinto(b) 1389 1390 def write(self, b): 1391 return self.writer.write(b) 1392 1393 def peek(self, size=0): 1394 return self.reader.peek(size) 1395 1396 def read1(self, size=-1): 1397 return self.reader.read1(size) 1398 1399 def readinto1(self, b): 1400 return self.reader.readinto1(b) 1401 1402 def readable(self): 1403 return self.reader.readable() 1404 1405 def writable(self): 1406 return self.writer.writable() 1407 1408 def flush(self): 1409 return self.writer.flush() 1410 1411 def close(self): 1412 try: 1413 self.writer.close() 1414 finally: 1415 self.reader.close() 1416 1417 def isatty(self): 1418 return self.reader.isatty() or self.writer.isatty() 1419 1420 @property 1421 def closed(self): 1422 return self.writer.closed 1423 1424 1425class BufferedRandom(BufferedWriter, BufferedReader): 1426 1427 """A buffered interface to random access streams. 1428 1429 The constructor creates a reader and writer for a seekable stream, 1430 raw, given in the first argument. If the buffer_size is omitted it 1431 defaults to DEFAULT_BUFFER_SIZE. 1432 """ 1433 1434 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1435 raw._checkSeekable() 1436 BufferedReader.__init__(self, raw, buffer_size) 1437 BufferedWriter.__init__(self, raw, buffer_size) 1438 1439 def seek(self, pos, whence=0): 1440 if whence not in valid_seek_flags: 1441 raise ValueError("invalid whence value") 1442 self.flush() 1443 if self._read_buf: 1444 # Undo read ahead. 1445 with self._read_lock: 1446 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1447 # First do the raw seek, then empty the read buffer, so that 1448 # if the raw seek fails, we don't lose buffered data forever. 1449 pos = self.raw.seek(pos, whence) 1450 with self._read_lock: 1451 self._reset_read_buf() 1452 if pos < 0: 1453 raise OSError("seek() returned invalid position") 1454 return pos 1455 1456 def tell(self): 1457 if self._write_buf: 1458 return BufferedWriter.tell(self) 1459 else: 1460 return BufferedReader.tell(self) 1461 1462 def truncate(self, pos=None): 1463 if pos is None: 1464 pos = self.tell() 1465 # Use seek to flush the read buffer. 1466 return BufferedWriter.truncate(self, pos) 1467 1468 def read(self, size=None): 1469 if size is None: 1470 size = -1 1471 self.flush() 1472 return BufferedReader.read(self, size) 1473 1474 def readinto(self, b): 1475 self.flush() 1476 return BufferedReader.readinto(self, b) 1477 1478 def peek(self, size=0): 1479 self.flush() 1480 return BufferedReader.peek(self, size) 1481 1482 def read1(self, size=-1): 1483 self.flush() 1484 return BufferedReader.read1(self, size) 1485 1486 def readinto1(self, b): 1487 self.flush() 1488 return BufferedReader.readinto1(self, b) 1489 1490 def write(self, b): 1491 if self._read_buf: 1492 # Undo readahead 1493 with self._read_lock: 1494 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1495 self._reset_read_buf() 1496 return BufferedWriter.write(self, b) 1497 1498 1499class FileIO(RawIOBase): 1500 _fd = -1 1501 _created = False 1502 _readable = False 1503 _writable = False 1504 _appending = False 1505 _seekable = None 1506 _closefd = True 1507 1508 def __init__(self, file, mode='r', closefd=True, opener=None): 1509 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading, 1510 writing, exclusive creation or appending. The file will be created if it 1511 doesn't exist when opened for writing or appending; it will be truncated 1512 when opened for writing. A FileExistsError will be raised if it already 1513 exists when opened for creating. Opening a file for creating implies 1514 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode 1515 to allow simultaneous reading and writing. A custom opener can be used by 1516 passing a callable as *opener*. The underlying file descriptor for the file 1517 object is then obtained by calling opener with (*name*, *flags*). 1518 *opener* must return an open file descriptor (passing os.open as *opener* 1519 results in functionality similar to passing None). 1520 """ 1521 if self._fd >= 0: 1522 # Have to close the existing file first. 1523 try: 1524 if self._closefd: 1525 os.close(self._fd) 1526 finally: 1527 self._fd = -1 1528 1529 if isinstance(file, float): 1530 raise TypeError('integer argument expected, got float') 1531 if isinstance(file, int): 1532 fd = file 1533 if fd < 0: 1534 raise ValueError('negative file descriptor') 1535 else: 1536 fd = -1 1537 1538 if not isinstance(mode, str): 1539 raise TypeError('invalid mode: %s' % (mode,)) 1540 if not set(mode) <= set('xrwab+'): 1541 raise ValueError('invalid mode: %s' % (mode,)) 1542 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1: 1543 raise ValueError('Must have exactly one of create/read/write/append ' 1544 'mode and at most one plus') 1545 1546 if 'x' in mode: 1547 self._created = True 1548 self._writable = True 1549 flags = os.O_EXCL | os.O_CREAT 1550 elif 'r' in mode: 1551 self._readable = True 1552 flags = 0 1553 elif 'w' in mode: 1554 self._writable = True 1555 flags = os.O_CREAT | os.O_TRUNC 1556 elif 'a' in mode: 1557 self._writable = True 1558 self._appending = True 1559 flags = os.O_APPEND | os.O_CREAT 1560 1561 if '+' in mode: 1562 self._readable = True 1563 self._writable = True 1564 1565 if self._readable and self._writable: 1566 flags |= os.O_RDWR 1567 elif self._readable: 1568 flags |= os.O_RDONLY 1569 else: 1570 flags |= os.O_WRONLY 1571 1572 flags |= getattr(os, 'O_BINARY', 0) 1573 1574 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or 1575 getattr(os, 'O_CLOEXEC', 0)) 1576 flags |= noinherit_flag 1577 1578 owned_fd = None 1579 try: 1580 if fd < 0: 1581 if not closefd: 1582 raise ValueError('Cannot use closefd=False with file name') 1583 if opener is None: 1584 fd = os.open(file, flags, 0o666) 1585 else: 1586 fd = opener(file, flags) 1587 if not isinstance(fd, int): 1588 raise TypeError('expected integer from opener') 1589 if fd < 0: 1590 raise OSError('Negative file descriptor') 1591 owned_fd = fd 1592 if not noinherit_flag: 1593 os.set_inheritable(fd, False) 1594 1595 self._closefd = closefd 1596 fdfstat = os.fstat(fd) 1597 try: 1598 if stat.S_ISDIR(fdfstat.st_mode): 1599 raise IsADirectoryError(errno.EISDIR, 1600 os.strerror(errno.EISDIR), file) 1601 except AttributeError: 1602 # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR 1603 # don't exist. 1604 pass 1605 self._blksize = getattr(fdfstat, 'st_blksize', 0) 1606 if self._blksize <= 1: 1607 self._blksize = DEFAULT_BUFFER_SIZE 1608 1609 if _setmode: 1610 # don't translate newlines (\r\n <=> \n) 1611 _setmode(fd, os.O_BINARY) 1612 1613 self.name = file 1614 if self._appending: 1615 # For consistent behaviour, we explicitly seek to the 1616 # end of file (otherwise, it might be done only on the 1617 # first write()). 1618 try: 1619 os.lseek(fd, 0, SEEK_END) 1620 except OSError as e: 1621 if e.errno != errno.ESPIPE: 1622 raise 1623 except: 1624 if owned_fd is not None: 1625 os.close(owned_fd) 1626 raise 1627 self._fd = fd 1628 1629 def __del__(self): 1630 if self._fd >= 0 and self._closefd and not self.closed: 1631 import warnings 1632 warnings.warn('unclosed file %r' % (self,), ResourceWarning, 1633 stacklevel=2, source=self) 1634 self.close() 1635 1636 def __getstate__(self): 1637 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") 1638 1639 def __repr__(self): 1640 class_name = '%s.%s' % (self.__class__.__module__, 1641 self.__class__.__qualname__) 1642 if self.closed: 1643 return '<%s [closed]>' % class_name 1644 try: 1645 name = self.name 1646 except AttributeError: 1647 return ('<%s fd=%d mode=%r closefd=%r>' % 1648 (class_name, self._fd, self.mode, self._closefd)) 1649 else: 1650 return ('<%s name=%r mode=%r closefd=%r>' % 1651 (class_name, name, self.mode, self._closefd)) 1652 1653 def _checkReadable(self): 1654 if not self._readable: 1655 raise UnsupportedOperation('File not open for reading') 1656 1657 def _checkWritable(self, msg=None): 1658 if not self._writable: 1659 raise UnsupportedOperation('File not open for writing') 1660 1661 def read(self, size=None): 1662 """Read at most size bytes, returned as bytes. 1663 1664 Only makes one system call, so less data may be returned than requested 1665 In non-blocking mode, returns None if no data is available. 1666 Return an empty bytes object at EOF. 1667 """ 1668 self._checkClosed() 1669 self._checkReadable() 1670 if size is None or size < 0: 1671 return self.readall() 1672 try: 1673 return os.read(self._fd, size) 1674 except BlockingIOError: 1675 return None 1676 1677 def readall(self): 1678 """Read all data from the file, returned as bytes. 1679 1680 In non-blocking mode, returns as much as is immediately available, 1681 or None if no data is available. Return an empty bytes object at EOF. 1682 """ 1683 self._checkClosed() 1684 self._checkReadable() 1685 bufsize = DEFAULT_BUFFER_SIZE 1686 try: 1687 pos = os.lseek(self._fd, 0, SEEK_CUR) 1688 end = os.fstat(self._fd).st_size 1689 if end >= pos: 1690 bufsize = end - pos + 1 1691 except OSError: 1692 pass 1693 1694 result = bytearray() 1695 while True: 1696 if len(result) >= bufsize: 1697 bufsize = len(result) 1698 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE) 1699 n = bufsize - len(result) 1700 try: 1701 chunk = os.read(self._fd, n) 1702 except BlockingIOError: 1703 if result: 1704 break 1705 return None 1706 if not chunk: # reached the end of the file 1707 break 1708 result += chunk 1709 1710 return bytes(result) 1711 1712 def readinto(self, b): 1713 """Same as RawIOBase.readinto().""" 1714 m = memoryview(b).cast('B') 1715 data = self.read(len(m)) 1716 n = len(data) 1717 m[:n] = data 1718 return n 1719 1720 def write(self, b): 1721 """Write bytes b to file, return number written. 1722 1723 Only makes one system call, so not all of the data may be written. 1724 The number of bytes actually written is returned. In non-blocking mode, 1725 returns None if the write would block. 1726 """ 1727 self._checkClosed() 1728 self._checkWritable() 1729 try: 1730 return os.write(self._fd, b) 1731 except BlockingIOError: 1732 return None 1733 1734 def seek(self, pos, whence=SEEK_SET): 1735 """Move to new file position. 1736 1737 Argument offset is a byte count. Optional argument whence defaults to 1738 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values 1739 are SEEK_CUR or 1 (move relative to current position, positive or negative), 1740 and SEEK_END or 2 (move relative to end of file, usually negative, although 1741 many platforms allow seeking beyond the end of a file). 1742 1743 Note that not all file objects are seekable. 1744 """ 1745 if isinstance(pos, float): 1746 raise TypeError('an integer is required') 1747 self._checkClosed() 1748 return os.lseek(self._fd, pos, whence) 1749 1750 def tell(self): 1751 """tell() -> int. Current file position. 1752 1753 Can raise OSError for non seekable files.""" 1754 self._checkClosed() 1755 return os.lseek(self._fd, 0, SEEK_CUR) 1756 1757 def truncate(self, size=None): 1758 """Truncate the file to at most size bytes. 1759 1760 Size defaults to the current file position, as returned by tell(). 1761 The current file position is changed to the value of size. 1762 """ 1763 self._checkClosed() 1764 self._checkWritable() 1765 if size is None: 1766 size = self.tell() 1767 os.ftruncate(self._fd, size) 1768 return size 1769 1770 def close(self): 1771 """Close the file. 1772 1773 A closed file cannot be used for further I/O operations. close() may be 1774 called more than once without error. 1775 """ 1776 if not self.closed: 1777 try: 1778 if self._closefd: 1779 os.close(self._fd) 1780 finally: 1781 super().close() 1782 1783 def seekable(self): 1784 """True if file supports random-access.""" 1785 self._checkClosed() 1786 if self._seekable is None: 1787 try: 1788 self.tell() 1789 except OSError: 1790 self._seekable = False 1791 else: 1792 self._seekable = True 1793 return self._seekable 1794 1795 def readable(self): 1796 """True if file was opened in a read mode.""" 1797 self._checkClosed() 1798 return self._readable 1799 1800 def writable(self): 1801 """True if file was opened in a write mode.""" 1802 self._checkClosed() 1803 return self._writable 1804 1805 def fileno(self): 1806 """Return the underlying file descriptor (an integer).""" 1807 self._checkClosed() 1808 return self._fd 1809 1810 def isatty(self): 1811 """True if the file is connected to a TTY device.""" 1812 self._checkClosed() 1813 return os.isatty(self._fd) 1814 1815 @property 1816 def closefd(self): 1817 """True if the file descriptor will be closed by close().""" 1818 return self._closefd 1819 1820 @property 1821 def mode(self): 1822 """String giving the file mode""" 1823 if self._created: 1824 if self._readable: 1825 return 'xb+' 1826 else: 1827 return 'xb' 1828 elif self._appending: 1829 if self._readable: 1830 return 'ab+' 1831 else: 1832 return 'ab' 1833 elif self._readable: 1834 if self._writable: 1835 return 'rb+' 1836 else: 1837 return 'rb' 1838 else: 1839 return 'wb' 1840 1841 1842class TextIOBase(IOBase): 1843 1844 """Base class for text I/O. 1845 1846 This class provides a character and line based interface to stream 1847 I/O. 1848 """ 1849 1850 def read(self, size=-1): 1851 """Read at most size characters from stream, where size is an int. 1852 1853 Read from underlying buffer until we have size characters or we hit EOF. 1854 If size is negative or omitted, read until EOF. 1855 1856 Returns a string. 1857 """ 1858 self._unsupported("read") 1859 1860 def write(self, s): 1861 """Write string s to stream and returning an int.""" 1862 self._unsupported("write") 1863 1864 def truncate(self, pos=None): 1865 """Truncate size to pos, where pos is an int.""" 1866 self._unsupported("truncate") 1867 1868 def readline(self): 1869 """Read until newline or EOF. 1870 1871 Returns an empty string if EOF is hit immediately. 1872 """ 1873 self._unsupported("readline") 1874 1875 def detach(self): 1876 """ 1877 Separate the underlying buffer from the TextIOBase and return it. 1878 1879 After the underlying buffer has been detached, the TextIO is in an 1880 unusable state. 1881 """ 1882 self._unsupported("detach") 1883 1884 @property 1885 def encoding(self): 1886 """Subclasses should override.""" 1887 return None 1888 1889 @property 1890 def newlines(self): 1891 """Line endings translated so far. 1892 1893 Only line endings translated during reading are considered. 1894 1895 Subclasses should override. 1896 """ 1897 return None 1898 1899 @property 1900 def errors(self): 1901 """Error setting of the decoder or encoder. 1902 1903 Subclasses should override.""" 1904 return None 1905 1906io.TextIOBase.register(TextIOBase) 1907 1908 1909class IncrementalNewlineDecoder(codecs.IncrementalDecoder): 1910 r"""Codec used when reading a file in universal newlines mode. It wraps 1911 another incremental decoder, translating \r\n and \r into \n. It also 1912 records the types of newlines encountered. When used with 1913 translate=False, it ensures that the newline sequence is returned in 1914 one piece. 1915 """ 1916 def __init__(self, decoder, translate, errors='strict'): 1917 codecs.IncrementalDecoder.__init__(self, errors=errors) 1918 self.translate = translate 1919 self.decoder = decoder 1920 self.seennl = 0 1921 self.pendingcr = False 1922 1923 def decode(self, input, final=False): 1924 # decode input (with the eventual \r from a previous pass) 1925 if self.decoder is None: 1926 output = input 1927 else: 1928 output = self.decoder.decode(input, final=final) 1929 if self.pendingcr and (output or final): 1930 output = "\r" + output 1931 self.pendingcr = False 1932 1933 # retain last \r even when not translating data: 1934 # then readline() is sure to get \r\n in one pass 1935 if output.endswith("\r") and not final: 1936 output = output[:-1] 1937 self.pendingcr = True 1938 1939 # Record which newlines are read 1940 crlf = output.count('\r\n') 1941 cr = output.count('\r') - crlf 1942 lf = output.count('\n') - crlf 1943 self.seennl |= (lf and self._LF) | (cr and self._CR) \ 1944 | (crlf and self._CRLF) 1945 1946 if self.translate: 1947 if crlf: 1948 output = output.replace("\r\n", "\n") 1949 if cr: 1950 output = output.replace("\r", "\n") 1951 1952 return output 1953 1954 def getstate(self): 1955 if self.decoder is None: 1956 buf = b"" 1957 flag = 0 1958 else: 1959 buf, flag = self.decoder.getstate() 1960 flag <<= 1 1961 if self.pendingcr: 1962 flag |= 1 1963 return buf, flag 1964 1965 def setstate(self, state): 1966 buf, flag = state 1967 self.pendingcr = bool(flag & 1) 1968 if self.decoder is not None: 1969 self.decoder.setstate((buf, flag >> 1)) 1970 1971 def reset(self): 1972 self.seennl = 0 1973 self.pendingcr = False 1974 if self.decoder is not None: 1975 self.decoder.reset() 1976 1977 _LF = 1 1978 _CR = 2 1979 _CRLF = 4 1980 1981 @property 1982 def newlines(self): 1983 return (None, 1984 "\n", 1985 "\r", 1986 ("\r", "\n"), 1987 "\r\n", 1988 ("\n", "\r\n"), 1989 ("\r", "\r\n"), 1990 ("\r", "\n", "\r\n") 1991 )[self.seennl] 1992 1993 1994class TextIOWrapper(TextIOBase): 1995 1996 r"""Character and line based layer over a BufferedIOBase object, buffer. 1997 1998 encoding gives the name of the encoding that the stream will be 1999 decoded or encoded with. It defaults to locale.getpreferredencoding(False). 2000 2001 errors determines the strictness of encoding and decoding (see the 2002 codecs.register) and defaults to "strict". 2003 2004 newline can be None, '', '\n', '\r', or '\r\n'. It controls the 2005 handling of line endings. If it is None, universal newlines is 2006 enabled. With this enabled, on input, the lines endings '\n', '\r', 2007 or '\r\n' are translated to '\n' before being returned to the 2008 caller. Conversely, on output, '\n' is translated to the system 2009 default line separator, os.linesep. If newline is any other of its 2010 legal values, that newline becomes the newline when the file is read 2011 and it is returned untranslated. On output, '\n' is converted to the 2012 newline. 2013 2014 If line_buffering is True, a call to flush is implied when a call to 2015 write contains a newline character. 2016 """ 2017 2018 _CHUNK_SIZE = 2048 2019 2020 # Initialize _buffer as soon as possible since it's used by __del__() 2021 # which calls close() 2022 _buffer = None 2023 2024 # The write_through argument has no effect here since this 2025 # implementation always writes through. The argument is present only 2026 # so that the signature can match the signature of the C version. 2027 def __init__(self, buffer, encoding=None, errors=None, newline=None, 2028 line_buffering=False, write_through=False): 2029 self._check_newline(newline) 2030 encoding = text_encoding(encoding) 2031 2032 if encoding == "locale": 2033 try: 2034 encoding = os.device_encoding(buffer.fileno()) or "locale" 2035 except (AttributeError, UnsupportedOperation): 2036 pass 2037 2038 if encoding == "locale": 2039 try: 2040 import locale 2041 except ImportError: 2042 # Importing locale may fail if Python is being built 2043 encoding = "utf-8" 2044 else: 2045 encoding = locale.getpreferredencoding(False) 2046 2047 if not isinstance(encoding, str): 2048 raise ValueError("invalid encoding: %r" % encoding) 2049 2050 if not codecs.lookup(encoding)._is_text_encoding: 2051 msg = ("%r is not a text encoding; " 2052 "use codecs.open() to handle arbitrary codecs") 2053 raise LookupError(msg % encoding) 2054 2055 if errors is None: 2056 errors = "strict" 2057 else: 2058 if not isinstance(errors, str): 2059 raise ValueError("invalid errors: %r" % errors) 2060 if _CHECK_ERRORS: 2061 codecs.lookup_error(errors) 2062 2063 self._buffer = buffer 2064 self._decoded_chars = '' # buffer for text returned from decoder 2065 self._decoded_chars_used = 0 # offset into _decoded_chars for read() 2066 self._snapshot = None # info for reconstructing decoder state 2067 self._seekable = self._telling = self.buffer.seekable() 2068 self._has_read1 = hasattr(self.buffer, 'read1') 2069 self._configure(encoding, errors, newline, 2070 line_buffering, write_through) 2071 2072 def _check_newline(self, newline): 2073 if newline is not None and not isinstance(newline, str): 2074 raise TypeError("illegal newline type: %r" % (type(newline),)) 2075 if newline not in (None, "", "\n", "\r", "\r\n"): 2076 raise ValueError("illegal newline value: %r" % (newline,)) 2077 2078 def _configure(self, encoding=None, errors=None, newline=None, 2079 line_buffering=False, write_through=False): 2080 self._encoding = encoding 2081 self._errors = errors 2082 self._encoder = None 2083 self._decoder = None 2084 self._b2cratio = 0.0 2085 2086 self._readuniversal = not newline 2087 self._readtranslate = newline is None 2088 self._readnl = newline 2089 self._writetranslate = newline != '' 2090 self._writenl = newline or os.linesep 2091 2092 self._line_buffering = line_buffering 2093 self._write_through = write_through 2094 2095 # don't write a BOM in the middle of a file 2096 if self._seekable and self.writable(): 2097 position = self.buffer.tell() 2098 if position != 0: 2099 try: 2100 self._get_encoder().setstate(0) 2101 except LookupError: 2102 # Sometimes the encoder doesn't exist 2103 pass 2104 2105 # self._snapshot is either None, or a tuple (dec_flags, next_input) 2106 # where dec_flags is the second (integer) item of the decoder state 2107 # and next_input is the chunk of input bytes that comes next after the 2108 # snapshot point. We use this to reconstruct decoder states in tell(). 2109 2110 # Naming convention: 2111 # - "bytes_..." for integer variables that count input bytes 2112 # - "chars_..." for integer variables that count decoded characters 2113 2114 def __repr__(self): 2115 result = "<{}.{}".format(self.__class__.__module__, 2116 self.__class__.__qualname__) 2117 try: 2118 name = self.name 2119 except AttributeError: 2120 pass 2121 else: 2122 result += " name={0!r}".format(name) 2123 try: 2124 mode = self.mode 2125 except AttributeError: 2126 pass 2127 else: 2128 result += " mode={0!r}".format(mode) 2129 return result + " encoding={0!r}>".format(self.encoding) 2130 2131 @property 2132 def encoding(self): 2133 return self._encoding 2134 2135 @property 2136 def errors(self): 2137 return self._errors 2138 2139 @property 2140 def line_buffering(self): 2141 return self._line_buffering 2142 2143 @property 2144 def write_through(self): 2145 return self._write_through 2146 2147 @property 2148 def buffer(self): 2149 return self._buffer 2150 2151 def reconfigure(self, *, 2152 encoding=None, errors=None, newline=Ellipsis, 2153 line_buffering=None, write_through=None): 2154 """Reconfigure the text stream with new parameters. 2155 2156 This also flushes the stream. 2157 """ 2158 if (self._decoder is not None 2159 and (encoding is not None or errors is not None 2160 or newline is not Ellipsis)): 2161 raise UnsupportedOperation( 2162 "It is not possible to set the encoding or newline of stream " 2163 "after the first read") 2164 2165 if errors is None: 2166 if encoding is None: 2167 errors = self._errors 2168 else: 2169 errors = 'strict' 2170 elif not isinstance(errors, str): 2171 raise TypeError("invalid errors: %r" % errors) 2172 2173 if encoding is None: 2174 encoding = self._encoding 2175 else: 2176 if not isinstance(encoding, str): 2177 raise TypeError("invalid encoding: %r" % encoding) 2178 2179 if newline is Ellipsis: 2180 newline = self._readnl 2181 self._check_newline(newline) 2182 2183 if line_buffering is None: 2184 line_buffering = self.line_buffering 2185 if write_through is None: 2186 write_through = self.write_through 2187 2188 self.flush() 2189 self._configure(encoding, errors, newline, 2190 line_buffering, write_through) 2191 2192 def seekable(self): 2193 if self.closed: 2194 raise ValueError("I/O operation on closed file.") 2195 return self._seekable 2196 2197 def readable(self): 2198 return self.buffer.readable() 2199 2200 def writable(self): 2201 return self.buffer.writable() 2202 2203 def flush(self): 2204 self.buffer.flush() 2205 self._telling = self._seekable 2206 2207 def close(self): 2208 if self.buffer is not None and not self.closed: 2209 try: 2210 self.flush() 2211 finally: 2212 self.buffer.close() 2213 2214 @property 2215 def closed(self): 2216 return self.buffer.closed 2217 2218 @property 2219 def name(self): 2220 return self.buffer.name 2221 2222 def fileno(self): 2223 return self.buffer.fileno() 2224 2225 def isatty(self): 2226 return self.buffer.isatty() 2227 2228 def write(self, s): 2229 'Write data, where s is a str' 2230 if self.closed: 2231 raise ValueError("write to closed file") 2232 if not isinstance(s, str): 2233 raise TypeError("can't write %s to text stream" % 2234 s.__class__.__name__) 2235 length = len(s) 2236 haslf = (self._writetranslate or self._line_buffering) and "\n" in s 2237 if haslf and self._writetranslate and self._writenl != "\n": 2238 s = s.replace("\n", self._writenl) 2239 encoder = self._encoder or self._get_encoder() 2240 # XXX What if we were just reading? 2241 b = encoder.encode(s) 2242 self.buffer.write(b) 2243 if self._line_buffering and (haslf or "\r" in s): 2244 self.flush() 2245 self._set_decoded_chars('') 2246 self._snapshot = None 2247 if self._decoder: 2248 self._decoder.reset() 2249 return length 2250 2251 def _get_encoder(self): 2252 make_encoder = codecs.getincrementalencoder(self._encoding) 2253 self._encoder = make_encoder(self._errors) 2254 return self._encoder 2255 2256 def _get_decoder(self): 2257 make_decoder = codecs.getincrementaldecoder(self._encoding) 2258 decoder = make_decoder(self._errors) 2259 if self._readuniversal: 2260 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) 2261 self._decoder = decoder 2262 return decoder 2263 2264 # The following three methods implement an ADT for _decoded_chars. 2265 # Text returned from the decoder is buffered here until the client 2266 # requests it by calling our read() or readline() method. 2267 def _set_decoded_chars(self, chars): 2268 """Set the _decoded_chars buffer.""" 2269 self._decoded_chars = chars 2270 self._decoded_chars_used = 0 2271 2272 def _get_decoded_chars(self, n=None): 2273 """Advance into the _decoded_chars buffer.""" 2274 offset = self._decoded_chars_used 2275 if n is None: 2276 chars = self._decoded_chars[offset:] 2277 else: 2278 chars = self._decoded_chars[offset:offset + n] 2279 self._decoded_chars_used += len(chars) 2280 return chars 2281 2282 def _rewind_decoded_chars(self, n): 2283 """Rewind the _decoded_chars buffer.""" 2284 if self._decoded_chars_used < n: 2285 raise AssertionError("rewind decoded_chars out of bounds") 2286 self._decoded_chars_used -= n 2287 2288 def _read_chunk(self): 2289 """ 2290 Read and decode the next chunk of data from the BufferedReader. 2291 """ 2292 2293 # The return value is True unless EOF was reached. The decoded 2294 # string is placed in self._decoded_chars (replacing its previous 2295 # value). The entire input chunk is sent to the decoder, though 2296 # some of it may remain buffered in the decoder, yet to be 2297 # converted. 2298 2299 if self._decoder is None: 2300 raise ValueError("no decoder") 2301 2302 if self._telling: 2303 # To prepare for tell(), we need to snapshot a point in the 2304 # file where the decoder's input buffer is empty. 2305 2306 dec_buffer, dec_flags = self._decoder.getstate() 2307 # Given this, we know there was a valid snapshot point 2308 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). 2309 2310 # Read a chunk, decode it, and put the result in self._decoded_chars. 2311 if self._has_read1: 2312 input_chunk = self.buffer.read1(self._CHUNK_SIZE) 2313 else: 2314 input_chunk = self.buffer.read(self._CHUNK_SIZE) 2315 eof = not input_chunk 2316 decoded_chars = self._decoder.decode(input_chunk, eof) 2317 self._set_decoded_chars(decoded_chars) 2318 if decoded_chars: 2319 self._b2cratio = len(input_chunk) / len(self._decoded_chars) 2320 else: 2321 self._b2cratio = 0.0 2322 2323 if self._telling: 2324 # At the snapshot point, len(dec_buffer) bytes before the read, 2325 # the next input to be decoded is dec_buffer + input_chunk. 2326 self._snapshot = (dec_flags, dec_buffer + input_chunk) 2327 2328 return not eof 2329 2330 def _pack_cookie(self, position, dec_flags=0, 2331 bytes_to_feed=0, need_eof=False, chars_to_skip=0): 2332 # The meaning of a tell() cookie is: seek to position, set the 2333 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them 2334 # into the decoder with need_eof as the EOF flag, then skip 2335 # chars_to_skip characters of the decoded result. For most simple 2336 # decoders, tell() will often just give a byte offset in the file. 2337 return (position | (dec_flags<<64) | (bytes_to_feed<<128) | 2338 (chars_to_skip<<192) | bool(need_eof)<<256) 2339 2340 def _unpack_cookie(self, bigint): 2341 rest, position = divmod(bigint, 1<<64) 2342 rest, dec_flags = divmod(rest, 1<<64) 2343 rest, bytes_to_feed = divmod(rest, 1<<64) 2344 need_eof, chars_to_skip = divmod(rest, 1<<64) 2345 return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip 2346 2347 def tell(self): 2348 if not self._seekable: 2349 raise UnsupportedOperation("underlying stream is not seekable") 2350 if not self._telling: 2351 raise OSError("telling position disabled by next() call") 2352 self.flush() 2353 position = self.buffer.tell() 2354 decoder = self._decoder 2355 if decoder is None or self._snapshot is None: 2356 if self._decoded_chars: 2357 # This should never happen. 2358 raise AssertionError("pending decoded text") 2359 return position 2360 2361 # Skip backward to the snapshot point (see _read_chunk). 2362 dec_flags, next_input = self._snapshot 2363 position -= len(next_input) 2364 2365 # How many decoded characters have been used up since the snapshot? 2366 chars_to_skip = self._decoded_chars_used 2367 if chars_to_skip == 0: 2368 # We haven't moved from the snapshot point. 2369 return self._pack_cookie(position, dec_flags) 2370 2371 # Starting from the snapshot position, we will walk the decoder 2372 # forward until it gives us enough decoded characters. 2373 saved_state = decoder.getstate() 2374 try: 2375 # Fast search for an acceptable start point, close to our 2376 # current pos. 2377 # Rationale: calling decoder.decode() has a large overhead 2378 # regardless of chunk size; we want the number of such calls to 2379 # be O(1) in most situations (common decoders, sensible input). 2380 # Actually, it will be exactly 1 for fixed-size codecs (all 2381 # 8-bit codecs, also UTF-16 and UTF-32). 2382 skip_bytes = int(self._b2cratio * chars_to_skip) 2383 skip_back = 1 2384 assert skip_bytes <= len(next_input) 2385 while skip_bytes > 0: 2386 decoder.setstate((b'', dec_flags)) 2387 # Decode up to temptative start point 2388 n = len(decoder.decode(next_input[:skip_bytes])) 2389 if n <= chars_to_skip: 2390 b, d = decoder.getstate() 2391 if not b: 2392 # Before pos and no bytes buffered in decoder => OK 2393 dec_flags = d 2394 chars_to_skip -= n 2395 break 2396 # Skip back by buffered amount and reset heuristic 2397 skip_bytes -= len(b) 2398 skip_back = 1 2399 else: 2400 # We're too far ahead, skip back a bit 2401 skip_bytes -= skip_back 2402 skip_back = skip_back * 2 2403 else: 2404 skip_bytes = 0 2405 decoder.setstate((b'', dec_flags)) 2406 2407 # Note our initial start point. 2408 start_pos = position + skip_bytes 2409 start_flags = dec_flags 2410 if chars_to_skip == 0: 2411 # We haven't moved from the start point. 2412 return self._pack_cookie(start_pos, start_flags) 2413 2414 # Feed the decoder one byte at a time. As we go, note the 2415 # nearest "safe start point" before the current location 2416 # (a point where the decoder has nothing buffered, so seek() 2417 # can safely start from there and advance to this location). 2418 bytes_fed = 0 2419 need_eof = False 2420 # Chars decoded since `start_pos` 2421 chars_decoded = 0 2422 for i in range(skip_bytes, len(next_input)): 2423 bytes_fed += 1 2424 chars_decoded += len(decoder.decode(next_input[i:i+1])) 2425 dec_buffer, dec_flags = decoder.getstate() 2426 if not dec_buffer and chars_decoded <= chars_to_skip: 2427 # Decoder buffer is empty, so this is a safe start point. 2428 start_pos += bytes_fed 2429 chars_to_skip -= chars_decoded 2430 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 2431 if chars_decoded >= chars_to_skip: 2432 break 2433 else: 2434 # We didn't get enough decoded data; signal EOF to get more. 2435 chars_decoded += len(decoder.decode(b'', final=True)) 2436 need_eof = True 2437 if chars_decoded < chars_to_skip: 2438 raise OSError("can't reconstruct logical file position") 2439 2440 # The returned cookie corresponds to the last safe start point. 2441 return self._pack_cookie( 2442 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) 2443 finally: 2444 decoder.setstate(saved_state) 2445 2446 def truncate(self, pos=None): 2447 self.flush() 2448 if pos is None: 2449 pos = self.tell() 2450 return self.buffer.truncate(pos) 2451 2452 def detach(self): 2453 if self.buffer is None: 2454 raise ValueError("buffer is already detached") 2455 self.flush() 2456 buffer = self._buffer 2457 self._buffer = None 2458 return buffer 2459 2460 def seek(self, cookie, whence=0): 2461 def _reset_encoder(position): 2462 """Reset the encoder (merely useful for proper BOM handling)""" 2463 try: 2464 encoder = self._encoder or self._get_encoder() 2465 except LookupError: 2466 # Sometimes the encoder doesn't exist 2467 pass 2468 else: 2469 if position != 0: 2470 encoder.setstate(0) 2471 else: 2472 encoder.reset() 2473 2474 if self.closed: 2475 raise ValueError("tell on closed file") 2476 if not self._seekable: 2477 raise UnsupportedOperation("underlying stream is not seekable") 2478 if whence == SEEK_CUR: 2479 if cookie != 0: 2480 raise UnsupportedOperation("can't do nonzero cur-relative seeks") 2481 # Seeking to the current position should attempt to 2482 # sync the underlying buffer with the current position. 2483 whence = 0 2484 cookie = self.tell() 2485 elif whence == SEEK_END: 2486 if cookie != 0: 2487 raise UnsupportedOperation("can't do nonzero end-relative seeks") 2488 self.flush() 2489 position = self.buffer.seek(0, whence) 2490 self._set_decoded_chars('') 2491 self._snapshot = None 2492 if self._decoder: 2493 self._decoder.reset() 2494 _reset_encoder(position) 2495 return position 2496 if whence != 0: 2497 raise ValueError("unsupported whence (%r)" % (whence,)) 2498 if cookie < 0: 2499 raise ValueError("negative seek position %r" % (cookie,)) 2500 self.flush() 2501 2502 # The strategy of seek() is to go back to the safe start point 2503 # and replay the effect of read(chars_to_skip) from there. 2504 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ 2505 self._unpack_cookie(cookie) 2506 2507 # Seek back to the safe start point. 2508 self.buffer.seek(start_pos) 2509 self._set_decoded_chars('') 2510 self._snapshot = None 2511 2512 # Restore the decoder to its state from the safe start point. 2513 if cookie == 0 and self._decoder: 2514 self._decoder.reset() 2515 elif self._decoder or dec_flags or chars_to_skip: 2516 self._decoder = self._decoder or self._get_decoder() 2517 self._decoder.setstate((b'', dec_flags)) 2518 self._snapshot = (dec_flags, b'') 2519 2520 if chars_to_skip: 2521 # Just like _read_chunk, feed the decoder and save a snapshot. 2522 input_chunk = self.buffer.read(bytes_to_feed) 2523 self._set_decoded_chars( 2524 self._decoder.decode(input_chunk, need_eof)) 2525 self._snapshot = (dec_flags, input_chunk) 2526 2527 # Skip chars_to_skip of the decoded characters. 2528 if len(self._decoded_chars) < chars_to_skip: 2529 raise OSError("can't restore logical file position") 2530 self._decoded_chars_used = chars_to_skip 2531 2532 _reset_encoder(cookie) 2533 return cookie 2534 2535 def read(self, size=None): 2536 self._checkReadable() 2537 if size is None: 2538 size = -1 2539 else: 2540 try: 2541 size_index = size.__index__ 2542 except AttributeError: 2543 raise TypeError(f"{size!r} is not an integer") 2544 else: 2545 size = size_index() 2546 decoder = self._decoder or self._get_decoder() 2547 if size < 0: 2548 # Read everything. 2549 result = (self._get_decoded_chars() + 2550 decoder.decode(self.buffer.read(), final=True)) 2551 self._set_decoded_chars('') 2552 self._snapshot = None 2553 return result 2554 else: 2555 # Keep reading chunks until we have size characters to return. 2556 eof = False 2557 result = self._get_decoded_chars(size) 2558 while len(result) < size and not eof: 2559 eof = not self._read_chunk() 2560 result += self._get_decoded_chars(size - len(result)) 2561 return result 2562 2563 def __next__(self): 2564 self._telling = False 2565 line = self.readline() 2566 if not line: 2567 self._snapshot = None 2568 self._telling = self._seekable 2569 raise StopIteration 2570 return line 2571 2572 def readline(self, size=None): 2573 if self.closed: 2574 raise ValueError("read from closed file") 2575 if size is None: 2576 size = -1 2577 else: 2578 try: 2579 size_index = size.__index__ 2580 except AttributeError: 2581 raise TypeError(f"{size!r} is not an integer") 2582 else: 2583 size = size_index() 2584 2585 # Grab all the decoded text (we will rewind any extra bits later). 2586 line = self._get_decoded_chars() 2587 2588 start = 0 2589 # Make the decoder if it doesn't already exist. 2590 if not self._decoder: 2591 self._get_decoder() 2592 2593 pos = endpos = None 2594 while True: 2595 if self._readtranslate: 2596 # Newlines are already translated, only search for \n 2597 pos = line.find('\n', start) 2598 if pos >= 0: 2599 endpos = pos + 1 2600 break 2601 else: 2602 start = len(line) 2603 2604 elif self._readuniversal: 2605 # Universal newline search. Find any of \r, \r\n, \n 2606 # The decoder ensures that \r\n are not split in two pieces 2607 2608 # In C we'd look for these in parallel of course. 2609 nlpos = line.find("\n", start) 2610 crpos = line.find("\r", start) 2611 if crpos == -1: 2612 if nlpos == -1: 2613 # Nothing found 2614 start = len(line) 2615 else: 2616 # Found \n 2617 endpos = nlpos + 1 2618 break 2619 elif nlpos == -1: 2620 # Found lone \r 2621 endpos = crpos + 1 2622 break 2623 elif nlpos < crpos: 2624 # Found \n 2625 endpos = nlpos + 1 2626 break 2627 elif nlpos == crpos + 1: 2628 # Found \r\n 2629 endpos = crpos + 2 2630 break 2631 else: 2632 # Found \r 2633 endpos = crpos + 1 2634 break 2635 else: 2636 # non-universal 2637 pos = line.find(self._readnl) 2638 if pos >= 0: 2639 endpos = pos + len(self._readnl) 2640 break 2641 2642 if size >= 0 and len(line) >= size: 2643 endpos = size # reached length size 2644 break 2645 2646 # No line ending seen yet - get more data' 2647 while self._read_chunk(): 2648 if self._decoded_chars: 2649 break 2650 if self._decoded_chars: 2651 line += self._get_decoded_chars() 2652 else: 2653 # end of file 2654 self._set_decoded_chars('') 2655 self._snapshot = None 2656 return line 2657 2658 if size >= 0 and endpos > size: 2659 endpos = size # don't exceed size 2660 2661 # Rewind _decoded_chars to just after the line ending we found. 2662 self._rewind_decoded_chars(len(line) - endpos) 2663 return line[:endpos] 2664 2665 @property 2666 def newlines(self): 2667 return self._decoder.newlines if self._decoder else None 2668 2669 2670class StringIO(TextIOWrapper): 2671 """Text I/O implementation using an in-memory buffer. 2672 2673 The initial_value argument sets the value of object. The newline 2674 argument is like the one of TextIOWrapper's constructor. 2675 """ 2676 2677 def __init__(self, initial_value="", newline="\n"): 2678 super(StringIO, self).__init__(BytesIO(), 2679 encoding="utf-8", 2680 errors="surrogatepass", 2681 newline=newline) 2682 # Issue #5645: make universal newlines semantics the same as in the 2683 # C version, even under Windows. 2684 if newline is None: 2685 self._writetranslate = False 2686 if initial_value is not None: 2687 if not isinstance(initial_value, str): 2688 raise TypeError("initial_value must be str or None, not {0}" 2689 .format(type(initial_value).__name__)) 2690 self.write(initial_value) 2691 self.seek(0) 2692 2693 def getvalue(self): 2694 self.flush() 2695 decoder = self._decoder or self._get_decoder() 2696 old_state = decoder.getstate() 2697 decoder.reset() 2698 try: 2699 return decoder.decode(self.buffer.getvalue(), final=True) 2700 finally: 2701 decoder.setstate(old_state) 2702 2703 def __repr__(self): 2704 # TextIOWrapper tells the encoding in its repr. In StringIO, 2705 # that's an implementation detail. 2706 return object.__repr__(self) 2707 2708 @property 2709 def errors(self): 2710 return None 2711 2712 @property 2713 def encoding(self): 2714 return None 2715 2716 def detach(self): 2717 # This doesn't make sense on StringIO. 2718 self._unsupported("detach") 2719