1""" 2Python implementation of the io module. 3""" 4 5import os 6import abc 7import codecs 8import errno 9import stat 10import sys 11# Import _thread instead of threading to reduce startup cost 12from _thread import allocate_lock as Lock 13if sys.platform in {'win32', 'cygwin'}: 14 from msvcrt import setmode as _setmode 15else: 16 _setmode = None 17 18import io 19from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END) 20 21valid_seek_flags = {0, 1, 2} # Hardwired values 22if hasattr(os, 'SEEK_HOLE') : 23 valid_seek_flags.add(os.SEEK_HOLE) 24 valid_seek_flags.add(os.SEEK_DATA) 25 26# open() uses st_blksize whenever we can 27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes 28 29# NOTE: Base classes defined here are registered with the "official" ABCs 30# defined in io.py. We don't use real inheritance though, because we don't want 31# to inherit the C implementations. 32 33# Rebind for compatibility 34BlockingIOError = BlockingIOError 35 36# Does open() check its 'errors' argument? 37_CHECK_ERRORS = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) 38 39 40def text_encoding(encoding, stacklevel=2): 41 """ 42 A helper function to choose the text encoding. 43 44 When encoding is not None, this function returns it. 45 Otherwise, this function returns the default text encoding 46 (i.e. "locale" or "utf-8" depends on UTF-8 mode). 47 48 This function emits an EncodingWarning if *encoding* is None and 49 sys.flags.warn_default_encoding is true. 50 51 This can be used in APIs with an encoding=None parameter 52 that pass it to TextIOWrapper or open. 53 However, please consider using encoding="utf-8" for new APIs. 54 """ 55 if encoding is None: 56 if sys.flags.utf8_mode: 57 encoding = "utf-8" 58 else: 59 encoding = "locale" 60 if sys.flags.warn_default_encoding: 61 import warnings 62 warnings.warn("'encoding' argument not specified.", 63 EncodingWarning, stacklevel + 1) 64 return encoding 65 66 67# Wrapper for builtins.open 68# 69# Trick so that open() won't become a bound method when stored 70# as a class variable (as dbm.dumb does). 71# 72# See init_set_builtins_open() in Python/pylifecycle.c. 73@staticmethod 74def open(file, mode="r", buffering=-1, encoding=None, errors=None, 75 newline=None, closefd=True, opener=None): 76 77 r"""Open file and return a stream. Raise OSError upon failure. 78 79 file is either a text or byte string giving the name (and the path 80 if the file isn't in the current working directory) of the file to 81 be opened or an integer file descriptor of the file to be 82 wrapped. (If a file descriptor is given, it is closed when the 83 returned I/O object is closed, unless closefd is set to False.) 84 85 mode is an optional string that specifies the mode in which the file is 86 opened. It defaults to 'r' which means open for reading in text mode. Other 87 common values are 'w' for writing (truncating the file if it already 88 exists), 'x' for exclusive creation of a new file, and 'a' for appending 89 (which on some Unix systems, means that all writes append to the end of the 90 file regardless of the current seek position). In text mode, if encoding is 91 not specified the encoding used is platform dependent. (For reading and 92 writing raw bytes use binary mode and leave encoding unspecified.) The 93 available modes are: 94 95 ========= =============================================================== 96 Character Meaning 97 --------- --------------------------------------------------------------- 98 'r' open for reading (default) 99 'w' open for writing, truncating the file first 100 'x' create a new file and open it for writing 101 'a' open for writing, appending to the end of the file if it exists 102 'b' binary mode 103 't' text mode (default) 104 '+' open a disk file for updating (reading and writing) 105 ========= =============================================================== 106 107 The default mode is 'rt' (open for reading text). For binary random 108 access, the mode 'w+b' opens and truncates the file to 0 bytes, while 109 'r+b' opens the file without truncation. The 'x' mode implies 'w' and 110 raises an `FileExistsError` if the file already exists. 111 112 Python distinguishes between files opened in binary and text modes, 113 even when the underlying operating system doesn't. Files opened in 114 binary mode (appending 'b' to the mode argument) return contents as 115 bytes objects without any decoding. In text mode (the default, or when 116 't' is appended to the mode argument), the contents of the file are 117 returned as strings, the bytes having been first decoded using a 118 platform-dependent encoding or using the specified encoding if given. 119 120 buffering is an optional integer used to set the buffering policy. 121 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select 122 line buffering (only usable in text mode), and an integer > 1 to indicate 123 the size of a fixed-size chunk buffer. When no buffering argument is 124 given, the default buffering policy works as follows: 125 126 * Binary files are buffered in fixed-size chunks; the size of the buffer 127 is chosen using a heuristic trying to determine the underlying device's 128 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. 129 On many systems, the buffer will typically be 4096 or 8192 bytes long. 130 131 * "Interactive" text files (files for which isatty() returns True) 132 use line buffering. Other text files use the policy described above 133 for binary files. 134 135 encoding is the str name of the encoding used to decode or encode the 136 file. This should only be used in text mode. The default encoding is 137 platform dependent, but any encoding supported by Python can be 138 passed. See the codecs module for the list of supported encodings. 139 140 errors is an optional string that specifies how encoding errors are to 141 be handled---this argument should not be used in binary mode. Pass 142 'strict' to raise a ValueError exception if there is an encoding error 143 (the default of None has the same effect), or pass 'ignore' to ignore 144 errors. (Note that ignoring encoding errors can lead to data loss.) 145 See the documentation for codecs.register for a list of the permitted 146 encoding error strings. 147 148 newline is a string controlling how universal newlines works (it only 149 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works 150 as follows: 151 152 * On input, if newline is None, universal newlines mode is 153 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and 154 these are translated into '\n' before being returned to the 155 caller. If it is '', universal newline mode is enabled, but line 156 endings are returned to the caller untranslated. If it has any of 157 the other legal values, input lines are only terminated by the given 158 string, and the line ending is returned to the caller untranslated. 159 160 * On output, if newline is None, any '\n' characters written are 161 translated to the system default line separator, os.linesep. If 162 newline is '', no translation takes place. If newline is any of the 163 other legal values, any '\n' characters written are translated to 164 the given string. 165 166 closedfd is a bool. If closefd is False, the underlying file descriptor will 167 be kept open when the file is closed. This does not work when a file name is 168 given and must be True in that case. 169 170 The newly created file is non-inheritable. 171 172 A custom opener can be used by passing a callable as *opener*. The 173 underlying file descriptor for the file object is then obtained by calling 174 *opener* with (*file*, *flags*). *opener* must return an open file 175 descriptor (passing os.open as *opener* results in functionality similar to 176 passing None). 177 178 open() returns a file object whose type depends on the mode, and 179 through which the standard file operations such as reading and writing 180 are performed. When open() is used to open a file in a text mode ('w', 181 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open 182 a file in a binary mode, the returned class varies: in read binary 183 mode, it returns a BufferedReader; in write binary and append binary 184 modes, it returns a BufferedWriter, and in read/write mode, it returns 185 a BufferedRandom. 186 187 It is also possible to use a string or bytearray as a file for both 188 reading and writing. For strings StringIO can be used like a file 189 opened in a text mode, and for bytes a BytesIO can be used like a file 190 opened in a binary mode. 191 """ 192 if not isinstance(file, int): 193 file = os.fspath(file) 194 if not isinstance(file, (str, bytes, int)): 195 raise TypeError("invalid file: %r" % file) 196 if not isinstance(mode, str): 197 raise TypeError("invalid mode: %r" % mode) 198 if not isinstance(buffering, int): 199 raise TypeError("invalid buffering: %r" % buffering) 200 if encoding is not None and not isinstance(encoding, str): 201 raise TypeError("invalid encoding: %r" % encoding) 202 if errors is not None and not isinstance(errors, str): 203 raise TypeError("invalid errors: %r" % errors) 204 modes = set(mode) 205 if modes - set("axrwb+t") or len(mode) > len(modes): 206 raise ValueError("invalid mode: %r" % mode) 207 creating = "x" in modes 208 reading = "r" in modes 209 writing = "w" in modes 210 appending = "a" in modes 211 updating = "+" in modes 212 text = "t" in modes 213 binary = "b" in modes 214 if text and binary: 215 raise ValueError("can't have text and binary mode at once") 216 if creating + reading + writing + appending > 1: 217 raise ValueError("can't have read/write/append mode at once") 218 if not (creating or reading or writing or appending): 219 raise ValueError("must have exactly one of read/write/append mode") 220 if binary and encoding is not None: 221 raise ValueError("binary mode doesn't take an encoding argument") 222 if binary and errors is not None: 223 raise ValueError("binary mode doesn't take an errors argument") 224 if binary and newline is not None: 225 raise ValueError("binary mode doesn't take a newline argument") 226 if binary and buffering == 1: 227 import warnings 228 warnings.warn("line buffering (buffering=1) isn't supported in binary " 229 "mode, the default buffer size will be used", 230 RuntimeWarning, 2) 231 raw = FileIO(file, 232 (creating and "x" or "") + 233 (reading and "r" or "") + 234 (writing and "w" or "") + 235 (appending and "a" or "") + 236 (updating and "+" or ""), 237 closefd, opener=opener) 238 result = raw 239 try: 240 line_buffering = False 241 if buffering == 1 or buffering < 0 and raw.isatty(): 242 buffering = -1 243 line_buffering = True 244 if buffering < 0: 245 buffering = DEFAULT_BUFFER_SIZE 246 try: 247 bs = os.fstat(raw.fileno()).st_blksize 248 except (OSError, AttributeError): 249 pass 250 else: 251 if bs > 1: 252 buffering = bs 253 if buffering < 0: 254 raise ValueError("invalid buffering size") 255 if buffering == 0: 256 if binary: 257 return result 258 raise ValueError("can't have unbuffered text I/O") 259 if updating: 260 buffer = BufferedRandom(raw, buffering) 261 elif creating or writing or appending: 262 buffer = BufferedWriter(raw, buffering) 263 elif reading: 264 buffer = BufferedReader(raw, buffering) 265 else: 266 raise ValueError("unknown mode: %r" % mode) 267 result = buffer 268 if binary: 269 return result 270 encoding = text_encoding(encoding) 271 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) 272 result = text 273 text.mode = mode 274 return result 275 except: 276 result.close() 277 raise 278 279# Define a default pure-Python implementation for open_code() 280# that does not allow hooks. Warn on first use. Defined for tests. 281def _open_code_with_warning(path): 282 """Opens the provided file with mode ``'rb'``. This function 283 should be used when the intent is to treat the contents as 284 executable code. 285 286 ``path`` should be an absolute path. 287 288 When supported by the runtime, this function can be hooked 289 in order to allow embedders more control over code files. 290 This functionality is not supported on the current runtime. 291 """ 292 import warnings 293 warnings.warn("_pyio.open_code() may not be using hooks", 294 RuntimeWarning, 2) 295 return open(path, "rb") 296 297try: 298 open_code = io.open_code 299except AttributeError: 300 open_code = _open_code_with_warning 301 302 303# In normal operation, both `UnsupportedOperation`s should be bound to the 304# same object. 305try: 306 UnsupportedOperation = io.UnsupportedOperation 307except AttributeError: 308 class UnsupportedOperation(OSError, ValueError): 309 pass 310 311 312class IOBase(metaclass=abc.ABCMeta): 313 314 """The abstract base class for all I/O classes. 315 316 This class provides dummy implementations for many methods that 317 derived classes can override selectively; the default implementations 318 represent a file that cannot be read, written or seeked. 319 320 Even though IOBase does not declare read or write because 321 their signatures will vary, implementations and clients should 322 consider those methods part of the interface. Also, implementations 323 may raise UnsupportedOperation when operations they do not support are 324 called. 325 326 The basic type used for binary data read from or written to a file is 327 bytes. Other bytes-like objects are accepted as method arguments too. 328 Text I/O classes work with str data. 329 330 Note that calling any method (even inquiries) on a closed stream is 331 undefined. Implementations may raise OSError in this case. 332 333 IOBase (and its subclasses) support the iterator protocol, meaning 334 that an IOBase object can be iterated over yielding the lines in a 335 stream. 336 337 IOBase also supports the :keyword:`with` statement. In this example, 338 fp is closed after the suite of the with statement is complete: 339 340 with open('spam.txt', 'r') as fp: 341 fp.write('Spam and eggs!') 342 """ 343 344 ### Internal ### 345 346 def _unsupported(self, name): 347 """Internal: raise an OSError exception for unsupported operations.""" 348 raise UnsupportedOperation("%s.%s() not supported" % 349 (self.__class__.__name__, name)) 350 351 ### Positioning ### 352 353 def seek(self, pos, whence=0): 354 """Change stream position. 355 356 Change the stream position to byte offset pos. Argument pos is 357 interpreted relative to the position indicated by whence. Values 358 for whence are ints: 359 360 * 0 -- start of stream (the default); offset should be zero or positive 361 * 1 -- current stream position; offset may be negative 362 * 2 -- end of stream; offset is usually negative 363 Some operating systems / file systems could provide additional values. 364 365 Return an int indicating the new absolute position. 366 """ 367 self._unsupported("seek") 368 369 def tell(self): 370 """Return an int indicating the current stream position.""" 371 return self.seek(0, 1) 372 373 def truncate(self, pos=None): 374 """Truncate file to size bytes. 375 376 Size defaults to the current IO position as reported by tell(). Return 377 the new size. 378 """ 379 self._unsupported("truncate") 380 381 ### Flush and close ### 382 383 def flush(self): 384 """Flush write buffers, if applicable. 385 386 This is not implemented for read-only and non-blocking streams. 387 """ 388 self._checkClosed() 389 # XXX Should this return the number of bytes written??? 390 391 __closed = False 392 393 def close(self): 394 """Flush and close the IO object. 395 396 This method has no effect if the file is already closed. 397 """ 398 if not self.__closed: 399 try: 400 self.flush() 401 finally: 402 self.__closed = True 403 404 def __del__(self): 405 """Destructor. Calls close().""" 406 try: 407 closed = self.closed 408 except AttributeError: 409 # If getting closed fails, then the object is probably 410 # in an unusable state, so ignore. 411 return 412 413 if closed: 414 return 415 416 # If close() fails, the caller logs the exception with 417 # sys.unraisablehook. close() must be called at the end at __del__(). 418 self.close() 419 420 ### Inquiries ### 421 422 def seekable(self): 423 """Return a bool indicating whether object supports random access. 424 425 If False, seek(), tell() and truncate() will raise OSError. 426 This method may need to do a test seek(). 427 """ 428 return False 429 430 def _checkSeekable(self, msg=None): 431 """Internal: raise UnsupportedOperation if file is not seekable 432 """ 433 if not self.seekable(): 434 raise UnsupportedOperation("File or stream is not seekable." 435 if msg is None else msg) 436 437 def readable(self): 438 """Return a bool indicating whether object was opened for reading. 439 440 If False, read() will raise OSError. 441 """ 442 return False 443 444 def _checkReadable(self, msg=None): 445 """Internal: raise UnsupportedOperation if file is not readable 446 """ 447 if not self.readable(): 448 raise UnsupportedOperation("File or stream is not readable." 449 if msg is None else msg) 450 451 def writable(self): 452 """Return a bool indicating whether object was opened for writing. 453 454 If False, write() and truncate() will raise OSError. 455 """ 456 return False 457 458 def _checkWritable(self, msg=None): 459 """Internal: raise UnsupportedOperation if file is not writable 460 """ 461 if not self.writable(): 462 raise UnsupportedOperation("File or stream is not writable." 463 if msg is None else msg) 464 465 @property 466 def closed(self): 467 """closed: bool. True iff the file has been closed. 468 469 For backwards compatibility, this is a property, not a predicate. 470 """ 471 return self.__closed 472 473 def _checkClosed(self, msg=None): 474 """Internal: raise a ValueError if file is closed 475 """ 476 if self.closed: 477 raise ValueError("I/O operation on closed file." 478 if msg is None else msg) 479 480 ### Context manager ### 481 482 def __enter__(self): # That's a forward reference 483 """Context management protocol. Returns self (an instance of IOBase).""" 484 self._checkClosed() 485 return self 486 487 def __exit__(self, *args): 488 """Context management protocol. Calls close()""" 489 self.close() 490 491 ### Lower-level APIs ### 492 493 # XXX Should these be present even if unimplemented? 494 495 def fileno(self): 496 """Returns underlying file descriptor (an int) if one exists. 497 498 An OSError is raised if the IO object does not use a file descriptor. 499 """ 500 self._unsupported("fileno") 501 502 def isatty(self): 503 """Return a bool indicating whether this is an 'interactive' stream. 504 505 Return False if it can't be determined. 506 """ 507 self._checkClosed() 508 return False 509 510 ### Readline[s] and writelines ### 511 512 def readline(self, size=-1): 513 r"""Read and return a line of bytes from the stream. 514 515 If size is specified, at most size bytes will be read. 516 Size should be an int. 517 518 The line terminator is always b'\n' for binary files; for text 519 files, the newlines argument to open can be used to select the line 520 terminator(s) recognized. 521 """ 522 # For backwards compatibility, a (slowish) readline(). 523 if hasattr(self, "peek"): 524 def nreadahead(): 525 readahead = self.peek(1) 526 if not readahead: 527 return 1 528 n = (readahead.find(b"\n") + 1) or len(readahead) 529 if size >= 0: 530 n = min(n, size) 531 return n 532 else: 533 def nreadahead(): 534 return 1 535 if size is None: 536 size = -1 537 else: 538 try: 539 size_index = size.__index__ 540 except AttributeError: 541 raise TypeError(f"{size!r} is not an integer") 542 else: 543 size = size_index() 544 res = bytearray() 545 while size < 0 or len(res) < size: 546 b = self.read(nreadahead()) 547 if not b: 548 break 549 res += b 550 if res.endswith(b"\n"): 551 break 552 return bytes(res) 553 554 def __iter__(self): 555 self._checkClosed() 556 return self 557 558 def __next__(self): 559 line = self.readline() 560 if not line: 561 raise StopIteration 562 return line 563 564 def readlines(self, hint=None): 565 """Return a list of lines from the stream. 566 567 hint can be specified to control the number of lines read: no more 568 lines will be read if the total size (in bytes/characters) of all 569 lines so far exceeds hint. 570 """ 571 if hint is None or hint <= 0: 572 return list(self) 573 n = 0 574 lines = [] 575 for line in self: 576 lines.append(line) 577 n += len(line) 578 if n >= hint: 579 break 580 return lines 581 582 def writelines(self, lines): 583 """Write a list of lines to the stream. 584 585 Line separators are not added, so it is usual for each of the lines 586 provided to have a line separator at the end. 587 """ 588 self._checkClosed() 589 for line in lines: 590 self.write(line) 591 592io.IOBase.register(IOBase) 593 594 595class RawIOBase(IOBase): 596 597 """Base class for raw binary I/O.""" 598 599 # The read() method is implemented by calling readinto(); derived 600 # classes that want to support read() only need to implement 601 # readinto() as a primitive operation. In general, readinto() can be 602 # more efficient than read(). 603 604 # (It would be tempting to also provide an implementation of 605 # readinto() in terms of read(), in case the latter is a more suitable 606 # primitive operation, but that would lead to nasty recursion in case 607 # a subclass doesn't implement either.) 608 609 def read(self, size=-1): 610 """Read and return up to size bytes, where size is an int. 611 612 Returns an empty bytes object on EOF, or None if the object is 613 set not to block and has no data to read. 614 """ 615 if size is None: 616 size = -1 617 if size < 0: 618 return self.readall() 619 b = bytearray(size.__index__()) 620 n = self.readinto(b) 621 if n is None: 622 return None 623 del b[n:] 624 return bytes(b) 625 626 def readall(self): 627 """Read until EOF, using multiple read() call.""" 628 res = bytearray() 629 while data := self.read(DEFAULT_BUFFER_SIZE): 630 res += data 631 if res: 632 return bytes(res) 633 else: 634 # b'' or None 635 return data 636 637 def readinto(self, b): 638 """Read bytes into a pre-allocated bytes-like object b. 639 640 Returns an int representing the number of bytes read (0 for EOF), or 641 None if the object is set not to block and has no data to read. 642 """ 643 self._unsupported("readinto") 644 645 def write(self, b): 646 """Write the given buffer to the IO stream. 647 648 Returns the number of bytes written, which may be less than the 649 length of b in bytes. 650 """ 651 self._unsupported("write") 652 653io.RawIOBase.register(RawIOBase) 654from _io import FileIO 655RawIOBase.register(FileIO) 656 657 658class BufferedIOBase(IOBase): 659 660 """Base class for buffered IO objects. 661 662 The main difference with RawIOBase is that the read() method 663 supports omitting the size argument, and does not have a default 664 implementation that defers to readinto(). 665 666 In addition, read(), readinto() and write() may raise 667 BlockingIOError if the underlying raw stream is in non-blocking 668 mode and not ready; unlike their raw counterparts, they will never 669 return None. 670 671 A typical implementation should not inherit from a RawIOBase 672 implementation, but wrap one. 673 """ 674 675 def read(self, size=-1): 676 """Read and return up to size bytes, where size is an int. 677 678 If the argument is omitted, None, or negative, reads and 679 returns all data until EOF. 680 681 If the argument is positive, and the underlying raw stream is 682 not 'interactive', multiple raw reads may be issued to satisfy 683 the byte count (unless EOF is reached first). But for 684 interactive raw streams (XXX and for pipes?), at most one raw 685 read will be issued, and a short result does not imply that 686 EOF is imminent. 687 688 Returns an empty bytes array on EOF. 689 690 Raises BlockingIOError if the underlying raw stream has no 691 data at the moment. 692 """ 693 self._unsupported("read") 694 695 def read1(self, size=-1): 696 """Read up to size bytes with at most one read() system call, 697 where size is an int. 698 """ 699 self._unsupported("read1") 700 701 def readinto(self, b): 702 """Read bytes into a pre-allocated bytes-like object b. 703 704 Like read(), this may issue multiple reads to the underlying raw 705 stream, unless the latter is 'interactive'. 706 707 Returns an int representing the number of bytes read (0 for EOF). 708 709 Raises BlockingIOError if the underlying raw stream has no 710 data at the moment. 711 """ 712 713 return self._readinto(b, read1=False) 714 715 def readinto1(self, b): 716 """Read bytes into buffer *b*, using at most one system call 717 718 Returns an int representing the number of bytes read (0 for EOF). 719 720 Raises BlockingIOError if the underlying raw stream has no 721 data at the moment. 722 """ 723 724 return self._readinto(b, read1=True) 725 726 def _readinto(self, b, read1): 727 if not isinstance(b, memoryview): 728 b = memoryview(b) 729 b = b.cast('B') 730 731 if read1: 732 data = self.read1(len(b)) 733 else: 734 data = self.read(len(b)) 735 n = len(data) 736 737 b[:n] = data 738 739 return n 740 741 def write(self, b): 742 """Write the given bytes buffer to the IO stream. 743 744 Return the number of bytes written, which is always the length of b 745 in bytes. 746 747 Raises BlockingIOError if the buffer is full and the 748 underlying raw stream cannot accept more data at the moment. 749 """ 750 self._unsupported("write") 751 752 def detach(self): 753 """ 754 Separate the underlying raw stream from the buffer and return it. 755 756 After the raw stream has been detached, the buffer is in an unusable 757 state. 758 """ 759 self._unsupported("detach") 760 761io.BufferedIOBase.register(BufferedIOBase) 762 763 764class _BufferedIOMixin(BufferedIOBase): 765 766 """A mixin implementation of BufferedIOBase with an underlying raw stream. 767 768 This passes most requests on to the underlying raw stream. It 769 does *not* provide implementations of read(), readinto() or 770 write(). 771 """ 772 773 def __init__(self, raw): 774 self._raw = raw 775 776 ### Positioning ### 777 778 def seek(self, pos, whence=0): 779 new_position = self.raw.seek(pos, whence) 780 if new_position < 0: 781 raise OSError("seek() returned an invalid position") 782 return new_position 783 784 def tell(self): 785 pos = self.raw.tell() 786 if pos < 0: 787 raise OSError("tell() returned an invalid position") 788 return pos 789 790 def truncate(self, pos=None): 791 self._checkClosed() 792 self._checkWritable() 793 794 # Flush the stream. We're mixing buffered I/O with lower-level I/O, 795 # and a flush may be necessary to synch both views of the current 796 # file state. 797 self.flush() 798 799 if pos is None: 800 pos = self.tell() 801 # XXX: Should seek() be used, instead of passing the position 802 # XXX directly to truncate? 803 return self.raw.truncate(pos) 804 805 ### Flush and close ### 806 807 def flush(self): 808 if self.closed: 809 raise ValueError("flush on closed file") 810 self.raw.flush() 811 812 def close(self): 813 if self.raw is not None and not self.closed: 814 try: 815 # may raise BlockingIOError or BrokenPipeError etc 816 self.flush() 817 finally: 818 self.raw.close() 819 820 def detach(self): 821 if self.raw is None: 822 raise ValueError("raw stream already detached") 823 self.flush() 824 raw = self._raw 825 self._raw = None 826 return raw 827 828 ### Inquiries ### 829 830 def seekable(self): 831 return self.raw.seekable() 832 833 @property 834 def raw(self): 835 return self._raw 836 837 @property 838 def closed(self): 839 return self.raw.closed 840 841 @property 842 def name(self): 843 return self.raw.name 844 845 @property 846 def mode(self): 847 return self.raw.mode 848 849 def __getstate__(self): 850 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") 851 852 def __repr__(self): 853 modname = self.__class__.__module__ 854 clsname = self.__class__.__qualname__ 855 try: 856 name = self.name 857 except AttributeError: 858 return "<{}.{}>".format(modname, clsname) 859 else: 860 return "<{}.{} name={!r}>".format(modname, clsname, name) 861 862 ### Lower-level APIs ### 863 864 def fileno(self): 865 return self.raw.fileno() 866 867 def isatty(self): 868 return self.raw.isatty() 869 870 871class BytesIO(BufferedIOBase): 872 873 """Buffered I/O implementation using an in-memory bytes buffer.""" 874 875 # Initialize _buffer as soon as possible since it's used by __del__() 876 # which calls close() 877 _buffer = None 878 879 def __init__(self, initial_bytes=None): 880 buf = bytearray() 881 if initial_bytes is not None: 882 buf += initial_bytes 883 self._buffer = buf 884 self._pos = 0 885 886 def __getstate__(self): 887 if self.closed: 888 raise ValueError("__getstate__ on closed file") 889 return self.__dict__.copy() 890 891 def getvalue(self): 892 """Return the bytes value (contents) of the buffer 893 """ 894 if self.closed: 895 raise ValueError("getvalue on closed file") 896 return bytes(self._buffer) 897 898 def getbuffer(self): 899 """Return a readable and writable view of the buffer. 900 """ 901 if self.closed: 902 raise ValueError("getbuffer on closed file") 903 return memoryview(self._buffer) 904 905 def close(self): 906 if self._buffer is not None: 907 self._buffer.clear() 908 super().close() 909 910 def read(self, size=-1): 911 if self.closed: 912 raise ValueError("read from closed file") 913 if size is None: 914 size = -1 915 else: 916 try: 917 size_index = size.__index__ 918 except AttributeError: 919 raise TypeError(f"{size!r} is not an integer") 920 else: 921 size = size_index() 922 if size < 0: 923 size = len(self._buffer) 924 if len(self._buffer) <= self._pos: 925 return b"" 926 newpos = min(len(self._buffer), self._pos + size) 927 b = self._buffer[self._pos : newpos] 928 self._pos = newpos 929 return bytes(b) 930 931 def read1(self, size=-1): 932 """This is the same as read. 933 """ 934 return self.read(size) 935 936 def write(self, b): 937 if self.closed: 938 raise ValueError("write to closed file") 939 if isinstance(b, str): 940 raise TypeError("can't write str to binary stream") 941 with memoryview(b) as view: 942 n = view.nbytes # Size of any bytes-like object 943 if n == 0: 944 return 0 945 pos = self._pos 946 if pos > len(self._buffer): 947 # Inserts null bytes between the current end of the file 948 # and the new write position. 949 padding = b'\x00' * (pos - len(self._buffer)) 950 self._buffer += padding 951 self._buffer[pos:pos + n] = b 952 self._pos += n 953 return n 954 955 def seek(self, pos, whence=0): 956 if self.closed: 957 raise ValueError("seek on closed file") 958 try: 959 pos_index = pos.__index__ 960 except AttributeError: 961 raise TypeError(f"{pos!r} is not an integer") 962 else: 963 pos = pos_index() 964 if whence == 0: 965 if pos < 0: 966 raise ValueError("negative seek position %r" % (pos,)) 967 self._pos = pos 968 elif whence == 1: 969 self._pos = max(0, self._pos + pos) 970 elif whence == 2: 971 self._pos = max(0, len(self._buffer) + pos) 972 else: 973 raise ValueError("unsupported whence value") 974 return self._pos 975 976 def tell(self): 977 if self.closed: 978 raise ValueError("tell on closed file") 979 return self._pos 980 981 def truncate(self, pos=None): 982 if self.closed: 983 raise ValueError("truncate on closed file") 984 if pos is None: 985 pos = self._pos 986 else: 987 try: 988 pos_index = pos.__index__ 989 except AttributeError: 990 raise TypeError(f"{pos!r} is not an integer") 991 else: 992 pos = pos_index() 993 if pos < 0: 994 raise ValueError("negative truncate position %r" % (pos,)) 995 del self._buffer[pos:] 996 return pos 997 998 def readable(self): 999 if self.closed: 1000 raise ValueError("I/O operation on closed file.") 1001 return True 1002 1003 def writable(self): 1004 if self.closed: 1005 raise ValueError("I/O operation on closed file.") 1006 return True 1007 1008 def seekable(self): 1009 if self.closed: 1010 raise ValueError("I/O operation on closed file.") 1011 return True 1012 1013 1014class BufferedReader(_BufferedIOMixin): 1015 1016 """BufferedReader(raw[, buffer_size]) 1017 1018 A buffer for a readable, sequential BaseRawIO object. 1019 1020 The constructor creates a BufferedReader for the given readable raw 1021 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE 1022 is used. 1023 """ 1024 1025 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1026 """Create a new buffered reader using the given readable raw IO object. 1027 """ 1028 if not raw.readable(): 1029 raise OSError('"raw" argument must be readable.') 1030 1031 _BufferedIOMixin.__init__(self, raw) 1032 if buffer_size <= 0: 1033 raise ValueError("invalid buffer size") 1034 self.buffer_size = buffer_size 1035 self._reset_read_buf() 1036 self._read_lock = Lock() 1037 1038 def readable(self): 1039 return self.raw.readable() 1040 1041 def _reset_read_buf(self): 1042 self._read_buf = b"" 1043 self._read_pos = 0 1044 1045 def read(self, size=None): 1046 """Read size bytes. 1047 1048 Returns exactly size bytes of data unless the underlying raw IO 1049 stream reaches EOF or if the call would block in non-blocking 1050 mode. If size is negative, read until EOF or until read() would 1051 block. 1052 """ 1053 if size is not None and size < -1: 1054 raise ValueError("invalid number of bytes to read") 1055 with self._read_lock: 1056 return self._read_unlocked(size) 1057 1058 def _read_unlocked(self, n=None): 1059 nodata_val = b"" 1060 empty_values = (b"", None) 1061 buf = self._read_buf 1062 pos = self._read_pos 1063 1064 # Special case for when the number of bytes to read is unspecified. 1065 if n is None or n == -1: 1066 self._reset_read_buf() 1067 if hasattr(self.raw, 'readall'): 1068 chunk = self.raw.readall() 1069 if chunk is None: 1070 return buf[pos:] or None 1071 else: 1072 return buf[pos:] + chunk 1073 chunks = [buf[pos:]] # Strip the consumed bytes. 1074 current_size = 0 1075 while True: 1076 # Read until EOF or until read() would block. 1077 chunk = self.raw.read() 1078 if chunk in empty_values: 1079 nodata_val = chunk 1080 break 1081 current_size += len(chunk) 1082 chunks.append(chunk) 1083 return b"".join(chunks) or nodata_val 1084 1085 # The number of bytes to read is specified, return at most n bytes. 1086 avail = len(buf) - pos # Length of the available buffered data. 1087 if n <= avail: 1088 # Fast path: the data to read is fully buffered. 1089 self._read_pos += n 1090 return buf[pos:pos+n] 1091 # Slow path: read from the stream until enough bytes are read, 1092 # or until an EOF occurs or until read() would block. 1093 chunks = [buf[pos:]] 1094 wanted = max(self.buffer_size, n) 1095 while avail < n: 1096 chunk = self.raw.read(wanted) 1097 if chunk in empty_values: 1098 nodata_val = chunk 1099 break 1100 avail += len(chunk) 1101 chunks.append(chunk) 1102 # n is more than avail only when an EOF occurred or when 1103 # read() would have blocked. 1104 n = min(n, avail) 1105 out = b"".join(chunks) 1106 self._read_buf = out[n:] # Save the extra data in the buffer. 1107 self._read_pos = 0 1108 return out[:n] if out else nodata_val 1109 1110 def peek(self, size=0): 1111 """Returns buffered bytes without advancing the position. 1112 1113 The argument indicates a desired minimal number of bytes; we 1114 do at most one raw read to satisfy it. We never return more 1115 than self.buffer_size. 1116 """ 1117 self._checkClosed("peek of closed file") 1118 with self._read_lock: 1119 return self._peek_unlocked(size) 1120 1121 def _peek_unlocked(self, n=0): 1122 want = min(n, self.buffer_size) 1123 have = len(self._read_buf) - self._read_pos 1124 if have < want or have <= 0: 1125 to_read = self.buffer_size - have 1126 current = self.raw.read(to_read) 1127 if current: 1128 self._read_buf = self._read_buf[self._read_pos:] + current 1129 self._read_pos = 0 1130 return self._read_buf[self._read_pos:] 1131 1132 def read1(self, size=-1): 1133 """Reads up to size bytes, with at most one read() system call.""" 1134 # Returns up to size bytes. If at least one byte is buffered, we 1135 # only return buffered bytes. Otherwise, we do one raw read. 1136 self._checkClosed("read of closed file") 1137 if size < 0: 1138 size = self.buffer_size 1139 if size == 0: 1140 return b"" 1141 with self._read_lock: 1142 self._peek_unlocked(1) 1143 return self._read_unlocked( 1144 min(size, len(self._read_buf) - self._read_pos)) 1145 1146 # Implementing readinto() and readinto1() is not strictly necessary (we 1147 # could rely on the base class that provides an implementation in terms of 1148 # read() and read1()). We do it anyway to keep the _pyio implementation 1149 # similar to the io implementation (which implements the methods for 1150 # performance reasons). 1151 def _readinto(self, buf, read1): 1152 """Read data into *buf* with at most one system call.""" 1153 1154 self._checkClosed("readinto of closed file") 1155 1156 # Need to create a memoryview object of type 'b', otherwise 1157 # we may not be able to assign bytes to it, and slicing it 1158 # would create a new object. 1159 if not isinstance(buf, memoryview): 1160 buf = memoryview(buf) 1161 if buf.nbytes == 0: 1162 return 0 1163 buf = buf.cast('B') 1164 1165 written = 0 1166 with self._read_lock: 1167 while written < len(buf): 1168 1169 # First try to read from internal buffer 1170 avail = min(len(self._read_buf) - self._read_pos, len(buf)) 1171 if avail: 1172 buf[written:written+avail] = \ 1173 self._read_buf[self._read_pos:self._read_pos+avail] 1174 self._read_pos += avail 1175 written += avail 1176 if written == len(buf): 1177 break 1178 1179 # If remaining space in callers buffer is larger than 1180 # internal buffer, read directly into callers buffer 1181 if len(buf) - written > self.buffer_size: 1182 n = self.raw.readinto(buf[written:]) 1183 if not n: 1184 break # eof 1185 written += n 1186 1187 # Otherwise refill internal buffer - unless we're 1188 # in read1 mode and already got some data 1189 elif not (read1 and written): 1190 if not self._peek_unlocked(1): 1191 break # eof 1192 1193 # In readinto1 mode, return as soon as we have some data 1194 if read1 and written: 1195 break 1196 1197 return written 1198 1199 def tell(self): 1200 # GH-95782: Keep return value non-negative 1201 return max(_BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos, 0) 1202 1203 def seek(self, pos, whence=0): 1204 if whence not in valid_seek_flags: 1205 raise ValueError("invalid whence value") 1206 self._checkClosed("seek of closed file") 1207 with self._read_lock: 1208 if whence == 1: 1209 pos -= len(self._read_buf) - self._read_pos 1210 pos = _BufferedIOMixin.seek(self, pos, whence) 1211 self._reset_read_buf() 1212 return pos 1213 1214class BufferedWriter(_BufferedIOMixin): 1215 1216 """A buffer for a writeable sequential RawIO object. 1217 1218 The constructor creates a BufferedWriter for the given writeable raw 1219 stream. If the buffer_size is not given, it defaults to 1220 DEFAULT_BUFFER_SIZE. 1221 """ 1222 1223 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1224 if not raw.writable(): 1225 raise OSError('"raw" argument must be writable.') 1226 1227 _BufferedIOMixin.__init__(self, raw) 1228 if buffer_size <= 0: 1229 raise ValueError("invalid buffer size") 1230 self.buffer_size = buffer_size 1231 self._write_buf = bytearray() 1232 self._write_lock = Lock() 1233 1234 def writable(self): 1235 return self.raw.writable() 1236 1237 def write(self, b): 1238 if isinstance(b, str): 1239 raise TypeError("can't write str to binary stream") 1240 with self._write_lock: 1241 if self.closed: 1242 raise ValueError("write to closed file") 1243 # XXX we can implement some more tricks to try and avoid 1244 # partial writes 1245 if len(self._write_buf) > self.buffer_size: 1246 # We're full, so let's pre-flush the buffer. (This may 1247 # raise BlockingIOError with characters_written == 0.) 1248 self._flush_unlocked() 1249 before = len(self._write_buf) 1250 self._write_buf.extend(b) 1251 written = len(self._write_buf) - before 1252 if len(self._write_buf) > self.buffer_size: 1253 try: 1254 self._flush_unlocked() 1255 except BlockingIOError as e: 1256 if len(self._write_buf) > self.buffer_size: 1257 # We've hit the buffer_size. We have to accept a partial 1258 # write and cut back our buffer. 1259 overage = len(self._write_buf) - self.buffer_size 1260 written -= overage 1261 self._write_buf = self._write_buf[:self.buffer_size] 1262 raise BlockingIOError(e.errno, e.strerror, written) 1263 return written 1264 1265 def truncate(self, pos=None): 1266 with self._write_lock: 1267 self._flush_unlocked() 1268 if pos is None: 1269 pos = self.raw.tell() 1270 return self.raw.truncate(pos) 1271 1272 def flush(self): 1273 with self._write_lock: 1274 self._flush_unlocked() 1275 1276 def _flush_unlocked(self): 1277 if self.closed: 1278 raise ValueError("flush on closed file") 1279 while self._write_buf: 1280 try: 1281 n = self.raw.write(self._write_buf) 1282 except BlockingIOError: 1283 raise RuntimeError("self.raw should implement RawIOBase: it " 1284 "should not raise BlockingIOError") 1285 if n is None: 1286 raise BlockingIOError( 1287 errno.EAGAIN, 1288 "write could not complete without blocking", 0) 1289 if n > len(self._write_buf) or n < 0: 1290 raise OSError("write() returned incorrect number of bytes") 1291 del self._write_buf[:n] 1292 1293 def tell(self): 1294 return _BufferedIOMixin.tell(self) + len(self._write_buf) 1295 1296 def seek(self, pos, whence=0): 1297 if whence not in valid_seek_flags: 1298 raise ValueError("invalid whence value") 1299 with self._write_lock: 1300 self._flush_unlocked() 1301 return _BufferedIOMixin.seek(self, pos, whence) 1302 1303 def close(self): 1304 with self._write_lock: 1305 if self.raw is None or self.closed: 1306 return 1307 # We have to release the lock and call self.flush() (which will 1308 # probably just re-take the lock) in case flush has been overridden in 1309 # a subclass or the user set self.flush to something. This is the same 1310 # behavior as the C implementation. 1311 try: 1312 # may raise BlockingIOError or BrokenPipeError etc 1313 self.flush() 1314 finally: 1315 with self._write_lock: 1316 self.raw.close() 1317 1318 1319class BufferedRWPair(BufferedIOBase): 1320 1321 """A buffered reader and writer object together. 1322 1323 A buffered reader object and buffered writer object put together to 1324 form a sequential IO object that can read and write. This is typically 1325 used with a socket or two-way pipe. 1326 1327 reader and writer are RawIOBase objects that are readable and 1328 writeable respectively. If the buffer_size is omitted it defaults to 1329 DEFAULT_BUFFER_SIZE. 1330 """ 1331 1332 # XXX The usefulness of this (compared to having two separate IO 1333 # objects) is questionable. 1334 1335 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE): 1336 """Constructor. 1337 1338 The arguments are two RawIO instances. 1339 """ 1340 if not reader.readable(): 1341 raise OSError('"reader" argument must be readable.') 1342 1343 if not writer.writable(): 1344 raise OSError('"writer" argument must be writable.') 1345 1346 self.reader = BufferedReader(reader, buffer_size) 1347 self.writer = BufferedWriter(writer, buffer_size) 1348 1349 def read(self, size=-1): 1350 if size is None: 1351 size = -1 1352 return self.reader.read(size) 1353 1354 def readinto(self, b): 1355 return self.reader.readinto(b) 1356 1357 def write(self, b): 1358 return self.writer.write(b) 1359 1360 def peek(self, size=0): 1361 return self.reader.peek(size) 1362 1363 def read1(self, size=-1): 1364 return self.reader.read1(size) 1365 1366 def readinto1(self, b): 1367 return self.reader.readinto1(b) 1368 1369 def readable(self): 1370 return self.reader.readable() 1371 1372 def writable(self): 1373 return self.writer.writable() 1374 1375 def flush(self): 1376 return self.writer.flush() 1377 1378 def close(self): 1379 try: 1380 self.writer.close() 1381 finally: 1382 self.reader.close() 1383 1384 def isatty(self): 1385 return self.reader.isatty() or self.writer.isatty() 1386 1387 @property 1388 def closed(self): 1389 return self.writer.closed 1390 1391 1392class BufferedRandom(BufferedWriter, BufferedReader): 1393 1394 """A buffered interface to random access streams. 1395 1396 The constructor creates a reader and writer for a seekable stream, 1397 raw, given in the first argument. If the buffer_size is omitted it 1398 defaults to DEFAULT_BUFFER_SIZE. 1399 """ 1400 1401 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1402 raw._checkSeekable() 1403 BufferedReader.__init__(self, raw, buffer_size) 1404 BufferedWriter.__init__(self, raw, buffer_size) 1405 1406 def seek(self, pos, whence=0): 1407 if whence not in valid_seek_flags: 1408 raise ValueError("invalid whence value") 1409 self.flush() 1410 if self._read_buf: 1411 # Undo read ahead. 1412 with self._read_lock: 1413 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1414 # First do the raw seek, then empty the read buffer, so that 1415 # if the raw seek fails, we don't lose buffered data forever. 1416 pos = self.raw.seek(pos, whence) 1417 with self._read_lock: 1418 self._reset_read_buf() 1419 if pos < 0: 1420 raise OSError("seek() returned invalid position") 1421 return pos 1422 1423 def tell(self): 1424 if self._write_buf: 1425 return BufferedWriter.tell(self) 1426 else: 1427 return BufferedReader.tell(self) 1428 1429 def truncate(self, pos=None): 1430 if pos is None: 1431 pos = self.tell() 1432 # Use seek to flush the read buffer. 1433 return BufferedWriter.truncate(self, pos) 1434 1435 def read(self, size=None): 1436 if size is None: 1437 size = -1 1438 self.flush() 1439 return BufferedReader.read(self, size) 1440 1441 def readinto(self, b): 1442 self.flush() 1443 return BufferedReader.readinto(self, b) 1444 1445 def peek(self, size=0): 1446 self.flush() 1447 return BufferedReader.peek(self, size) 1448 1449 def read1(self, size=-1): 1450 self.flush() 1451 return BufferedReader.read1(self, size) 1452 1453 def readinto1(self, b): 1454 self.flush() 1455 return BufferedReader.readinto1(self, b) 1456 1457 def write(self, b): 1458 if self._read_buf: 1459 # Undo readahead 1460 with self._read_lock: 1461 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1462 self._reset_read_buf() 1463 return BufferedWriter.write(self, b) 1464 1465 1466class FileIO(RawIOBase): 1467 _fd = -1 1468 _created = False 1469 _readable = False 1470 _writable = False 1471 _appending = False 1472 _seekable = None 1473 _closefd = True 1474 1475 def __init__(self, file, mode='r', closefd=True, opener=None): 1476 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading, 1477 writing, exclusive creation or appending. The file will be created if it 1478 doesn't exist when opened for writing or appending; it will be truncated 1479 when opened for writing. A FileExistsError will be raised if it already 1480 exists when opened for creating. Opening a file for creating implies 1481 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode 1482 to allow simultaneous reading and writing. A custom opener can be used by 1483 passing a callable as *opener*. The underlying file descriptor for the file 1484 object is then obtained by calling opener with (*name*, *flags*). 1485 *opener* must return an open file descriptor (passing os.open as *opener* 1486 results in functionality similar to passing None). 1487 """ 1488 if self._fd >= 0: 1489 # Have to close the existing file first. 1490 try: 1491 if self._closefd: 1492 os.close(self._fd) 1493 finally: 1494 self._fd = -1 1495 1496 if isinstance(file, float): 1497 raise TypeError('integer argument expected, got float') 1498 if isinstance(file, int): 1499 if isinstance(file, bool): 1500 import warnings 1501 warnings.warn("bool is used as a file descriptor", 1502 RuntimeWarning, stacklevel=2) 1503 file = int(file) 1504 fd = file 1505 if fd < 0: 1506 raise ValueError('negative file descriptor') 1507 else: 1508 fd = -1 1509 1510 if not isinstance(mode, str): 1511 raise TypeError('invalid mode: %s' % (mode,)) 1512 if not set(mode) <= set('xrwab+'): 1513 raise ValueError('invalid mode: %s' % (mode,)) 1514 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1: 1515 raise ValueError('Must have exactly one of create/read/write/append ' 1516 'mode and at most one plus') 1517 1518 if 'x' in mode: 1519 self._created = True 1520 self._writable = True 1521 flags = os.O_EXCL | os.O_CREAT 1522 elif 'r' in mode: 1523 self._readable = True 1524 flags = 0 1525 elif 'w' in mode: 1526 self._writable = True 1527 flags = os.O_CREAT | os.O_TRUNC 1528 elif 'a' in mode: 1529 self._writable = True 1530 self._appending = True 1531 flags = os.O_APPEND | os.O_CREAT 1532 1533 if '+' in mode: 1534 self._readable = True 1535 self._writable = True 1536 1537 if self._readable and self._writable: 1538 flags |= os.O_RDWR 1539 elif self._readable: 1540 flags |= os.O_RDONLY 1541 else: 1542 flags |= os.O_WRONLY 1543 1544 flags |= getattr(os, 'O_BINARY', 0) 1545 1546 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or 1547 getattr(os, 'O_CLOEXEC', 0)) 1548 flags |= noinherit_flag 1549 1550 owned_fd = None 1551 try: 1552 if fd < 0: 1553 if not closefd: 1554 raise ValueError('Cannot use closefd=False with file name') 1555 if opener is None: 1556 fd = os.open(file, flags, 0o666) 1557 else: 1558 fd = opener(file, flags) 1559 if not isinstance(fd, int): 1560 raise TypeError('expected integer from opener') 1561 if fd < 0: 1562 raise OSError('Negative file descriptor') 1563 owned_fd = fd 1564 if not noinherit_flag: 1565 os.set_inheritable(fd, False) 1566 1567 self._closefd = closefd 1568 fdfstat = os.fstat(fd) 1569 try: 1570 if stat.S_ISDIR(fdfstat.st_mode): 1571 raise IsADirectoryError(errno.EISDIR, 1572 os.strerror(errno.EISDIR), file) 1573 except AttributeError: 1574 # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR 1575 # don't exist. 1576 pass 1577 self._blksize = getattr(fdfstat, 'st_blksize', 0) 1578 if self._blksize <= 1: 1579 self._blksize = DEFAULT_BUFFER_SIZE 1580 1581 if _setmode: 1582 # don't translate newlines (\r\n <=> \n) 1583 _setmode(fd, os.O_BINARY) 1584 1585 self.name = file 1586 if self._appending: 1587 # For consistent behaviour, we explicitly seek to the 1588 # end of file (otherwise, it might be done only on the 1589 # first write()). 1590 try: 1591 os.lseek(fd, 0, SEEK_END) 1592 except OSError as e: 1593 if e.errno != errno.ESPIPE: 1594 raise 1595 except: 1596 if owned_fd is not None: 1597 os.close(owned_fd) 1598 raise 1599 self._fd = fd 1600 1601 def __del__(self): 1602 if self._fd >= 0 and self._closefd and not self.closed: 1603 import warnings 1604 warnings.warn('unclosed file %r' % (self,), ResourceWarning, 1605 stacklevel=2, source=self) 1606 self.close() 1607 1608 def __getstate__(self): 1609 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") 1610 1611 def __repr__(self): 1612 class_name = '%s.%s' % (self.__class__.__module__, 1613 self.__class__.__qualname__) 1614 if self.closed: 1615 return '<%s [closed]>' % class_name 1616 try: 1617 name = self.name 1618 except AttributeError: 1619 return ('<%s fd=%d mode=%r closefd=%r>' % 1620 (class_name, self._fd, self.mode, self._closefd)) 1621 else: 1622 return ('<%s name=%r mode=%r closefd=%r>' % 1623 (class_name, name, self.mode, self._closefd)) 1624 1625 def _checkReadable(self): 1626 if not self._readable: 1627 raise UnsupportedOperation('File not open for reading') 1628 1629 def _checkWritable(self, msg=None): 1630 if not self._writable: 1631 raise UnsupportedOperation('File not open for writing') 1632 1633 def read(self, size=None): 1634 """Read at most size bytes, returned as bytes. 1635 1636 Only makes one system call, so less data may be returned than requested 1637 In non-blocking mode, returns None if no data is available. 1638 Return an empty bytes object at EOF. 1639 """ 1640 self._checkClosed() 1641 self._checkReadable() 1642 if size is None or size < 0: 1643 return self.readall() 1644 try: 1645 return os.read(self._fd, size) 1646 except BlockingIOError: 1647 return None 1648 1649 def readall(self): 1650 """Read all data from the file, returned as bytes. 1651 1652 In non-blocking mode, returns as much as is immediately available, 1653 or None if no data is available. Return an empty bytes object at EOF. 1654 """ 1655 self._checkClosed() 1656 self._checkReadable() 1657 bufsize = DEFAULT_BUFFER_SIZE 1658 try: 1659 pos = os.lseek(self._fd, 0, SEEK_CUR) 1660 end = os.fstat(self._fd).st_size 1661 if end >= pos: 1662 bufsize = end - pos + 1 1663 except OSError: 1664 pass 1665 1666 result = bytearray() 1667 while True: 1668 if len(result) >= bufsize: 1669 bufsize = len(result) 1670 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE) 1671 n = bufsize - len(result) 1672 try: 1673 chunk = os.read(self._fd, n) 1674 except BlockingIOError: 1675 if result: 1676 break 1677 return None 1678 if not chunk: # reached the end of the file 1679 break 1680 result += chunk 1681 1682 return bytes(result) 1683 1684 def readinto(self, b): 1685 """Same as RawIOBase.readinto().""" 1686 m = memoryview(b).cast('B') 1687 data = self.read(len(m)) 1688 n = len(data) 1689 m[:n] = data 1690 return n 1691 1692 def write(self, b): 1693 """Write bytes b to file, return number written. 1694 1695 Only makes one system call, so not all of the data may be written. 1696 The number of bytes actually written is returned. In non-blocking mode, 1697 returns None if the write would block. 1698 """ 1699 self._checkClosed() 1700 self._checkWritable() 1701 try: 1702 return os.write(self._fd, b) 1703 except BlockingIOError: 1704 return None 1705 1706 def seek(self, pos, whence=SEEK_SET): 1707 """Move to new file position. 1708 1709 Argument offset is a byte count. Optional argument whence defaults to 1710 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values 1711 are SEEK_CUR or 1 (move relative to current position, positive or negative), 1712 and SEEK_END or 2 (move relative to end of file, usually negative, although 1713 many platforms allow seeking beyond the end of a file). 1714 1715 Note that not all file objects are seekable. 1716 """ 1717 if isinstance(pos, float): 1718 raise TypeError('an integer is required') 1719 self._checkClosed() 1720 return os.lseek(self._fd, pos, whence) 1721 1722 def tell(self): 1723 """tell() -> int. Current file position. 1724 1725 Can raise OSError for non seekable files.""" 1726 self._checkClosed() 1727 return os.lseek(self._fd, 0, SEEK_CUR) 1728 1729 def truncate(self, size=None): 1730 """Truncate the file to at most size bytes. 1731 1732 Size defaults to the current file position, as returned by tell(). 1733 The current file position is changed to the value of size. 1734 """ 1735 self._checkClosed() 1736 self._checkWritable() 1737 if size is None: 1738 size = self.tell() 1739 os.ftruncate(self._fd, size) 1740 return size 1741 1742 def close(self): 1743 """Close the file. 1744 1745 A closed file cannot be used for further I/O operations. close() may be 1746 called more than once without error. 1747 """ 1748 if not self.closed: 1749 try: 1750 if self._closefd: 1751 os.close(self._fd) 1752 finally: 1753 super().close() 1754 1755 def seekable(self): 1756 """True if file supports random-access.""" 1757 self._checkClosed() 1758 if self._seekable is None: 1759 try: 1760 self.tell() 1761 except OSError: 1762 self._seekable = False 1763 else: 1764 self._seekable = True 1765 return self._seekable 1766 1767 def readable(self): 1768 """True if file was opened in a read mode.""" 1769 self._checkClosed() 1770 return self._readable 1771 1772 def writable(self): 1773 """True if file was opened in a write mode.""" 1774 self._checkClosed() 1775 return self._writable 1776 1777 def fileno(self): 1778 """Return the underlying file descriptor (an integer).""" 1779 self._checkClosed() 1780 return self._fd 1781 1782 def isatty(self): 1783 """True if the file is connected to a TTY device.""" 1784 self._checkClosed() 1785 return os.isatty(self._fd) 1786 1787 @property 1788 def closefd(self): 1789 """True if the file descriptor will be closed by close().""" 1790 return self._closefd 1791 1792 @property 1793 def mode(self): 1794 """String giving the file mode""" 1795 if self._created: 1796 if self._readable: 1797 return 'xb+' 1798 else: 1799 return 'xb' 1800 elif self._appending: 1801 if self._readable: 1802 return 'ab+' 1803 else: 1804 return 'ab' 1805 elif self._readable: 1806 if self._writable: 1807 return 'rb+' 1808 else: 1809 return 'rb' 1810 else: 1811 return 'wb' 1812 1813 1814class TextIOBase(IOBase): 1815 1816 """Base class for text I/O. 1817 1818 This class provides a character and line based interface to stream 1819 I/O. 1820 """ 1821 1822 def read(self, size=-1): 1823 """Read at most size characters from stream, where size is an int. 1824 1825 Read from underlying buffer until we have size characters or we hit EOF. 1826 If size is negative or omitted, read until EOF. 1827 1828 Returns a string. 1829 """ 1830 self._unsupported("read") 1831 1832 def write(self, s): 1833 """Write string s to stream and returning an int.""" 1834 self._unsupported("write") 1835 1836 def truncate(self, pos=None): 1837 """Truncate size to pos, where pos is an int.""" 1838 self._unsupported("truncate") 1839 1840 def readline(self): 1841 """Read until newline or EOF. 1842 1843 Returns an empty string if EOF is hit immediately. 1844 """ 1845 self._unsupported("readline") 1846 1847 def detach(self): 1848 """ 1849 Separate the underlying buffer from the TextIOBase and return it. 1850 1851 After the underlying buffer has been detached, the TextIO is in an 1852 unusable state. 1853 """ 1854 self._unsupported("detach") 1855 1856 @property 1857 def encoding(self): 1858 """Subclasses should override.""" 1859 return None 1860 1861 @property 1862 def newlines(self): 1863 """Line endings translated so far. 1864 1865 Only line endings translated during reading are considered. 1866 1867 Subclasses should override. 1868 """ 1869 return None 1870 1871 @property 1872 def errors(self): 1873 """Error setting of the decoder or encoder. 1874 1875 Subclasses should override.""" 1876 return None 1877 1878io.TextIOBase.register(TextIOBase) 1879 1880 1881class IncrementalNewlineDecoder(codecs.IncrementalDecoder): 1882 r"""Codec used when reading a file in universal newlines mode. It wraps 1883 another incremental decoder, translating \r\n and \r into \n. It also 1884 records the types of newlines encountered. When used with 1885 translate=False, it ensures that the newline sequence is returned in 1886 one piece. 1887 """ 1888 def __init__(self, decoder, translate, errors='strict'): 1889 codecs.IncrementalDecoder.__init__(self, errors=errors) 1890 self.translate = translate 1891 self.decoder = decoder 1892 self.seennl = 0 1893 self.pendingcr = False 1894 1895 def decode(self, input, final=False): 1896 # decode input (with the eventual \r from a previous pass) 1897 if self.decoder is None: 1898 output = input 1899 else: 1900 output = self.decoder.decode(input, final=final) 1901 if self.pendingcr and (output or final): 1902 output = "\r" + output 1903 self.pendingcr = False 1904 1905 # retain last \r even when not translating data: 1906 # then readline() is sure to get \r\n in one pass 1907 if output.endswith("\r") and not final: 1908 output = output[:-1] 1909 self.pendingcr = True 1910 1911 # Record which newlines are read 1912 crlf = output.count('\r\n') 1913 cr = output.count('\r') - crlf 1914 lf = output.count('\n') - crlf 1915 self.seennl |= (lf and self._LF) | (cr and self._CR) \ 1916 | (crlf and self._CRLF) 1917 1918 if self.translate: 1919 if crlf: 1920 output = output.replace("\r\n", "\n") 1921 if cr: 1922 output = output.replace("\r", "\n") 1923 1924 return output 1925 1926 def getstate(self): 1927 if self.decoder is None: 1928 buf = b"" 1929 flag = 0 1930 else: 1931 buf, flag = self.decoder.getstate() 1932 flag <<= 1 1933 if self.pendingcr: 1934 flag |= 1 1935 return buf, flag 1936 1937 def setstate(self, state): 1938 buf, flag = state 1939 self.pendingcr = bool(flag & 1) 1940 if self.decoder is not None: 1941 self.decoder.setstate((buf, flag >> 1)) 1942 1943 def reset(self): 1944 self.seennl = 0 1945 self.pendingcr = False 1946 if self.decoder is not None: 1947 self.decoder.reset() 1948 1949 _LF = 1 1950 _CR = 2 1951 _CRLF = 4 1952 1953 @property 1954 def newlines(self): 1955 return (None, 1956 "\n", 1957 "\r", 1958 ("\r", "\n"), 1959 "\r\n", 1960 ("\n", "\r\n"), 1961 ("\r", "\r\n"), 1962 ("\r", "\n", "\r\n") 1963 )[self.seennl] 1964 1965 1966class TextIOWrapper(TextIOBase): 1967 1968 r"""Character and line based layer over a BufferedIOBase object, buffer. 1969 1970 encoding gives the name of the encoding that the stream will be 1971 decoded or encoded with. It defaults to locale.getencoding(). 1972 1973 errors determines the strictness of encoding and decoding (see the 1974 codecs.register) and defaults to "strict". 1975 1976 newline can be None, '', '\n', '\r', or '\r\n'. It controls the 1977 handling of line endings. If it is None, universal newlines is 1978 enabled. With this enabled, on input, the lines endings '\n', '\r', 1979 or '\r\n' are translated to '\n' before being returned to the 1980 caller. Conversely, on output, '\n' is translated to the system 1981 default line separator, os.linesep. If newline is any other of its 1982 legal values, that newline becomes the newline when the file is read 1983 and it is returned untranslated. On output, '\n' is converted to the 1984 newline. 1985 1986 If line_buffering is True, a call to flush is implied when a call to 1987 write contains a newline character. 1988 """ 1989 1990 _CHUNK_SIZE = 2048 1991 1992 # Initialize _buffer as soon as possible since it's used by __del__() 1993 # which calls close() 1994 _buffer = None 1995 1996 # The write_through argument has no effect here since this 1997 # implementation always writes through. The argument is present only 1998 # so that the signature can match the signature of the C version. 1999 def __init__(self, buffer, encoding=None, errors=None, newline=None, 2000 line_buffering=False, write_through=False): 2001 self._check_newline(newline) 2002 encoding = text_encoding(encoding) 2003 2004 if encoding == "locale": 2005 encoding = self._get_locale_encoding() 2006 2007 if not isinstance(encoding, str): 2008 raise ValueError("invalid encoding: %r" % encoding) 2009 2010 if not codecs.lookup(encoding)._is_text_encoding: 2011 msg = ("%r is not a text encoding; " 2012 "use codecs.open() to handle arbitrary codecs") 2013 raise LookupError(msg % encoding) 2014 2015 if errors is None: 2016 errors = "strict" 2017 else: 2018 if not isinstance(errors, str): 2019 raise ValueError("invalid errors: %r" % errors) 2020 if _CHECK_ERRORS: 2021 codecs.lookup_error(errors) 2022 2023 self._buffer = buffer 2024 self._decoded_chars = '' # buffer for text returned from decoder 2025 self._decoded_chars_used = 0 # offset into _decoded_chars for read() 2026 self._snapshot = None # info for reconstructing decoder state 2027 self._seekable = self._telling = self.buffer.seekable() 2028 self._has_read1 = hasattr(self.buffer, 'read1') 2029 self._configure(encoding, errors, newline, 2030 line_buffering, write_through) 2031 2032 def _check_newline(self, newline): 2033 if newline is not None and not isinstance(newline, str): 2034 raise TypeError("illegal newline type: %r" % (type(newline),)) 2035 if newline not in (None, "", "\n", "\r", "\r\n"): 2036 raise ValueError("illegal newline value: %r" % (newline,)) 2037 2038 def _configure(self, encoding=None, errors=None, newline=None, 2039 line_buffering=False, write_through=False): 2040 self._encoding = encoding 2041 self._errors = errors 2042 self._encoder = None 2043 self._decoder = None 2044 self._b2cratio = 0.0 2045 2046 self._readuniversal = not newline 2047 self._readtranslate = newline is None 2048 self._readnl = newline 2049 self._writetranslate = newline != '' 2050 self._writenl = newline or os.linesep 2051 2052 self._line_buffering = line_buffering 2053 self._write_through = write_through 2054 2055 # don't write a BOM in the middle of a file 2056 if self._seekable and self.writable(): 2057 position = self.buffer.tell() 2058 if position != 0: 2059 try: 2060 self._get_encoder().setstate(0) 2061 except LookupError: 2062 # Sometimes the encoder doesn't exist 2063 pass 2064 2065 # self._snapshot is either None, or a tuple (dec_flags, next_input) 2066 # where dec_flags is the second (integer) item of the decoder state 2067 # and next_input is the chunk of input bytes that comes next after the 2068 # snapshot point. We use this to reconstruct decoder states in tell(). 2069 2070 # Naming convention: 2071 # - "bytes_..." for integer variables that count input bytes 2072 # - "chars_..." for integer variables that count decoded characters 2073 2074 def __repr__(self): 2075 result = "<{}.{}".format(self.__class__.__module__, 2076 self.__class__.__qualname__) 2077 try: 2078 name = self.name 2079 except AttributeError: 2080 pass 2081 else: 2082 result += " name={0!r}".format(name) 2083 try: 2084 mode = self.mode 2085 except AttributeError: 2086 pass 2087 else: 2088 result += " mode={0!r}".format(mode) 2089 return result + " encoding={0!r}>".format(self.encoding) 2090 2091 @property 2092 def encoding(self): 2093 return self._encoding 2094 2095 @property 2096 def errors(self): 2097 return self._errors 2098 2099 @property 2100 def line_buffering(self): 2101 return self._line_buffering 2102 2103 @property 2104 def write_through(self): 2105 return self._write_through 2106 2107 @property 2108 def buffer(self): 2109 return self._buffer 2110 2111 def reconfigure(self, *, 2112 encoding=None, errors=None, newline=Ellipsis, 2113 line_buffering=None, write_through=None): 2114 """Reconfigure the text stream with new parameters. 2115 2116 This also flushes the stream. 2117 """ 2118 if (self._decoder is not None 2119 and (encoding is not None or errors is not None 2120 or newline is not Ellipsis)): 2121 raise UnsupportedOperation( 2122 "It is not possible to set the encoding or newline of stream " 2123 "after the first read") 2124 2125 if errors is None: 2126 if encoding is None: 2127 errors = self._errors 2128 else: 2129 errors = 'strict' 2130 elif not isinstance(errors, str): 2131 raise TypeError("invalid errors: %r" % errors) 2132 2133 if encoding is None: 2134 encoding = self._encoding 2135 else: 2136 if not isinstance(encoding, str): 2137 raise TypeError("invalid encoding: %r" % encoding) 2138 if encoding == "locale": 2139 encoding = self._get_locale_encoding() 2140 2141 if newline is Ellipsis: 2142 newline = self._readnl 2143 self._check_newline(newline) 2144 2145 if line_buffering is None: 2146 line_buffering = self.line_buffering 2147 if write_through is None: 2148 write_through = self.write_through 2149 2150 self.flush() 2151 self._configure(encoding, errors, newline, 2152 line_buffering, write_through) 2153 2154 def seekable(self): 2155 if self.closed: 2156 raise ValueError("I/O operation on closed file.") 2157 return self._seekable 2158 2159 def readable(self): 2160 return self.buffer.readable() 2161 2162 def writable(self): 2163 return self.buffer.writable() 2164 2165 def flush(self): 2166 self.buffer.flush() 2167 self._telling = self._seekable 2168 2169 def close(self): 2170 if self.buffer is not None and not self.closed: 2171 try: 2172 self.flush() 2173 finally: 2174 self.buffer.close() 2175 2176 @property 2177 def closed(self): 2178 return self.buffer.closed 2179 2180 @property 2181 def name(self): 2182 return self.buffer.name 2183 2184 def fileno(self): 2185 return self.buffer.fileno() 2186 2187 def isatty(self): 2188 return self.buffer.isatty() 2189 2190 def write(self, s): 2191 'Write data, where s is a str' 2192 if self.closed: 2193 raise ValueError("write to closed file") 2194 if not isinstance(s, str): 2195 raise TypeError("can't write %s to text stream" % 2196 s.__class__.__name__) 2197 length = len(s) 2198 haslf = (self._writetranslate or self._line_buffering) and "\n" in s 2199 if haslf and self._writetranslate and self._writenl != "\n": 2200 s = s.replace("\n", self._writenl) 2201 encoder = self._encoder or self._get_encoder() 2202 # XXX What if we were just reading? 2203 b = encoder.encode(s) 2204 self.buffer.write(b) 2205 if self._line_buffering and (haslf or "\r" in s): 2206 self.flush() 2207 if self._snapshot is not None: 2208 self._set_decoded_chars('') 2209 self._snapshot = None 2210 if self._decoder: 2211 self._decoder.reset() 2212 return length 2213 2214 def _get_encoder(self): 2215 make_encoder = codecs.getincrementalencoder(self._encoding) 2216 self._encoder = make_encoder(self._errors) 2217 return self._encoder 2218 2219 def _get_decoder(self): 2220 make_decoder = codecs.getincrementaldecoder(self._encoding) 2221 decoder = make_decoder(self._errors) 2222 if self._readuniversal: 2223 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) 2224 self._decoder = decoder 2225 return decoder 2226 2227 # The following three methods implement an ADT for _decoded_chars. 2228 # Text returned from the decoder is buffered here until the client 2229 # requests it by calling our read() or readline() method. 2230 def _set_decoded_chars(self, chars): 2231 """Set the _decoded_chars buffer.""" 2232 self._decoded_chars = chars 2233 self._decoded_chars_used = 0 2234 2235 def _get_decoded_chars(self, n=None): 2236 """Advance into the _decoded_chars buffer.""" 2237 offset = self._decoded_chars_used 2238 if n is None: 2239 chars = self._decoded_chars[offset:] 2240 else: 2241 chars = self._decoded_chars[offset:offset + n] 2242 self._decoded_chars_used += len(chars) 2243 return chars 2244 2245 def _get_locale_encoding(self): 2246 try: 2247 import locale 2248 except ImportError: 2249 # Importing locale may fail if Python is being built 2250 return "utf-8" 2251 else: 2252 return locale.getencoding() 2253 2254 def _rewind_decoded_chars(self, n): 2255 """Rewind the _decoded_chars buffer.""" 2256 if self._decoded_chars_used < n: 2257 raise AssertionError("rewind decoded_chars out of bounds") 2258 self._decoded_chars_used -= n 2259 2260 def _read_chunk(self): 2261 """ 2262 Read and decode the next chunk of data from the BufferedReader. 2263 """ 2264 2265 # The return value is True unless EOF was reached. The decoded 2266 # string is placed in self._decoded_chars (replacing its previous 2267 # value). The entire input chunk is sent to the decoder, though 2268 # some of it may remain buffered in the decoder, yet to be 2269 # converted. 2270 2271 if self._decoder is None: 2272 raise ValueError("no decoder") 2273 2274 if self._telling: 2275 # To prepare for tell(), we need to snapshot a point in the 2276 # file where the decoder's input buffer is empty. 2277 2278 dec_buffer, dec_flags = self._decoder.getstate() 2279 # Given this, we know there was a valid snapshot point 2280 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). 2281 2282 # Read a chunk, decode it, and put the result in self._decoded_chars. 2283 if self._has_read1: 2284 input_chunk = self.buffer.read1(self._CHUNK_SIZE) 2285 else: 2286 input_chunk = self.buffer.read(self._CHUNK_SIZE) 2287 eof = not input_chunk 2288 decoded_chars = self._decoder.decode(input_chunk, eof) 2289 self._set_decoded_chars(decoded_chars) 2290 if decoded_chars: 2291 self._b2cratio = len(input_chunk) / len(self._decoded_chars) 2292 else: 2293 self._b2cratio = 0.0 2294 2295 if self._telling: 2296 # At the snapshot point, len(dec_buffer) bytes before the read, 2297 # the next input to be decoded is dec_buffer + input_chunk. 2298 self._snapshot = (dec_flags, dec_buffer + input_chunk) 2299 2300 return not eof 2301 2302 def _pack_cookie(self, position, dec_flags=0, 2303 bytes_to_feed=0, need_eof=False, chars_to_skip=0): 2304 # The meaning of a tell() cookie is: seek to position, set the 2305 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them 2306 # into the decoder with need_eof as the EOF flag, then skip 2307 # chars_to_skip characters of the decoded result. For most simple 2308 # decoders, tell() will often just give a byte offset in the file. 2309 return (position | (dec_flags<<64) | (bytes_to_feed<<128) | 2310 (chars_to_skip<<192) | bool(need_eof)<<256) 2311 2312 def _unpack_cookie(self, bigint): 2313 rest, position = divmod(bigint, 1<<64) 2314 rest, dec_flags = divmod(rest, 1<<64) 2315 rest, bytes_to_feed = divmod(rest, 1<<64) 2316 need_eof, chars_to_skip = divmod(rest, 1<<64) 2317 return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip 2318 2319 def tell(self): 2320 if not self._seekable: 2321 raise UnsupportedOperation("underlying stream is not seekable") 2322 if not self._telling: 2323 raise OSError("telling position disabled by next() call") 2324 self.flush() 2325 position = self.buffer.tell() 2326 decoder = self._decoder 2327 if decoder is None or self._snapshot is None: 2328 if self._decoded_chars: 2329 # This should never happen. 2330 raise AssertionError("pending decoded text") 2331 return position 2332 2333 # Skip backward to the snapshot point (see _read_chunk). 2334 dec_flags, next_input = self._snapshot 2335 position -= len(next_input) 2336 2337 # How many decoded characters have been used up since the snapshot? 2338 chars_to_skip = self._decoded_chars_used 2339 if chars_to_skip == 0: 2340 # We haven't moved from the snapshot point. 2341 return self._pack_cookie(position, dec_flags) 2342 2343 # Starting from the snapshot position, we will walk the decoder 2344 # forward until it gives us enough decoded characters. 2345 saved_state = decoder.getstate() 2346 try: 2347 # Fast search for an acceptable start point, close to our 2348 # current pos. 2349 # Rationale: calling decoder.decode() has a large overhead 2350 # regardless of chunk size; we want the number of such calls to 2351 # be O(1) in most situations (common decoders, sensible input). 2352 # Actually, it will be exactly 1 for fixed-size codecs (all 2353 # 8-bit codecs, also UTF-16 and UTF-32). 2354 skip_bytes = int(self._b2cratio * chars_to_skip) 2355 skip_back = 1 2356 assert skip_bytes <= len(next_input) 2357 while skip_bytes > 0: 2358 decoder.setstate((b'', dec_flags)) 2359 # Decode up to temptative start point 2360 n = len(decoder.decode(next_input[:skip_bytes])) 2361 if n <= chars_to_skip: 2362 b, d = decoder.getstate() 2363 if not b: 2364 # Before pos and no bytes buffered in decoder => OK 2365 dec_flags = d 2366 chars_to_skip -= n 2367 break 2368 # Skip back by buffered amount and reset heuristic 2369 skip_bytes -= len(b) 2370 skip_back = 1 2371 else: 2372 # We're too far ahead, skip back a bit 2373 skip_bytes -= skip_back 2374 skip_back = skip_back * 2 2375 else: 2376 skip_bytes = 0 2377 decoder.setstate((b'', dec_flags)) 2378 2379 # Note our initial start point. 2380 start_pos = position + skip_bytes 2381 start_flags = dec_flags 2382 if chars_to_skip == 0: 2383 # We haven't moved from the start point. 2384 return self._pack_cookie(start_pos, start_flags) 2385 2386 # Feed the decoder one byte at a time. As we go, note the 2387 # nearest "safe start point" before the current location 2388 # (a point where the decoder has nothing buffered, so seek() 2389 # can safely start from there and advance to this location). 2390 bytes_fed = 0 2391 need_eof = False 2392 # Chars decoded since `start_pos` 2393 chars_decoded = 0 2394 for i in range(skip_bytes, len(next_input)): 2395 bytes_fed += 1 2396 chars_decoded += len(decoder.decode(next_input[i:i+1])) 2397 dec_buffer, dec_flags = decoder.getstate() 2398 if not dec_buffer and chars_decoded <= chars_to_skip: 2399 # Decoder buffer is empty, so this is a safe start point. 2400 start_pos += bytes_fed 2401 chars_to_skip -= chars_decoded 2402 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 2403 if chars_decoded >= chars_to_skip: 2404 break 2405 else: 2406 # We didn't get enough decoded data; signal EOF to get more. 2407 chars_decoded += len(decoder.decode(b'', final=True)) 2408 need_eof = True 2409 if chars_decoded < chars_to_skip: 2410 raise OSError("can't reconstruct logical file position") 2411 2412 # The returned cookie corresponds to the last safe start point. 2413 return self._pack_cookie( 2414 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) 2415 finally: 2416 decoder.setstate(saved_state) 2417 2418 def truncate(self, pos=None): 2419 self.flush() 2420 if pos is None: 2421 pos = self.tell() 2422 return self.buffer.truncate(pos) 2423 2424 def detach(self): 2425 if self.buffer is None: 2426 raise ValueError("buffer is already detached") 2427 self.flush() 2428 buffer = self._buffer 2429 self._buffer = None 2430 return buffer 2431 2432 def seek(self, cookie, whence=0): 2433 def _reset_encoder(position): 2434 """Reset the encoder (merely useful for proper BOM handling)""" 2435 try: 2436 encoder = self._encoder or self._get_encoder() 2437 except LookupError: 2438 # Sometimes the encoder doesn't exist 2439 pass 2440 else: 2441 if position != 0: 2442 encoder.setstate(0) 2443 else: 2444 encoder.reset() 2445 2446 if self.closed: 2447 raise ValueError("tell on closed file") 2448 if not self._seekable: 2449 raise UnsupportedOperation("underlying stream is not seekable") 2450 if whence == SEEK_CUR: 2451 if cookie != 0: 2452 raise UnsupportedOperation("can't do nonzero cur-relative seeks") 2453 # Seeking to the current position should attempt to 2454 # sync the underlying buffer with the current position. 2455 whence = 0 2456 cookie = self.tell() 2457 elif whence == SEEK_END: 2458 if cookie != 0: 2459 raise UnsupportedOperation("can't do nonzero end-relative seeks") 2460 self.flush() 2461 position = self.buffer.seek(0, whence) 2462 self._set_decoded_chars('') 2463 self._snapshot = None 2464 if self._decoder: 2465 self._decoder.reset() 2466 _reset_encoder(position) 2467 return position 2468 if whence != 0: 2469 raise ValueError("unsupported whence (%r)" % (whence,)) 2470 if cookie < 0: 2471 raise ValueError("negative seek position %r" % (cookie,)) 2472 self.flush() 2473 2474 # The strategy of seek() is to go back to the safe start point 2475 # and replay the effect of read(chars_to_skip) from there. 2476 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ 2477 self._unpack_cookie(cookie) 2478 2479 # Seek back to the safe start point. 2480 self.buffer.seek(start_pos) 2481 self._set_decoded_chars('') 2482 self._snapshot = None 2483 2484 # Restore the decoder to its state from the safe start point. 2485 if cookie == 0 and self._decoder: 2486 self._decoder.reset() 2487 elif self._decoder or dec_flags or chars_to_skip: 2488 self._decoder = self._decoder or self._get_decoder() 2489 self._decoder.setstate((b'', dec_flags)) 2490 self._snapshot = (dec_flags, b'') 2491 2492 if chars_to_skip: 2493 # Just like _read_chunk, feed the decoder and save a snapshot. 2494 input_chunk = self.buffer.read(bytes_to_feed) 2495 self._set_decoded_chars( 2496 self._decoder.decode(input_chunk, need_eof)) 2497 self._snapshot = (dec_flags, input_chunk) 2498 2499 # Skip chars_to_skip of the decoded characters. 2500 if len(self._decoded_chars) < chars_to_skip: 2501 raise OSError("can't restore logical file position") 2502 self._decoded_chars_used = chars_to_skip 2503 2504 _reset_encoder(cookie) 2505 return cookie 2506 2507 def read(self, size=None): 2508 self._checkReadable() 2509 if size is None: 2510 size = -1 2511 else: 2512 try: 2513 size_index = size.__index__ 2514 except AttributeError: 2515 raise TypeError(f"{size!r} is not an integer") 2516 else: 2517 size = size_index() 2518 decoder = self._decoder or self._get_decoder() 2519 if size < 0: 2520 # Read everything. 2521 result = (self._get_decoded_chars() + 2522 decoder.decode(self.buffer.read(), final=True)) 2523 if self._snapshot is not None: 2524 self._set_decoded_chars('') 2525 self._snapshot = None 2526 return result 2527 else: 2528 # Keep reading chunks until we have size characters to return. 2529 eof = False 2530 result = self._get_decoded_chars(size) 2531 while len(result) < size and not eof: 2532 eof = not self._read_chunk() 2533 result += self._get_decoded_chars(size - len(result)) 2534 return result 2535 2536 def __next__(self): 2537 self._telling = False 2538 line = self.readline() 2539 if not line: 2540 self._snapshot = None 2541 self._telling = self._seekable 2542 raise StopIteration 2543 return line 2544 2545 def readline(self, size=None): 2546 if self.closed: 2547 raise ValueError("read from closed file") 2548 if size is None: 2549 size = -1 2550 else: 2551 try: 2552 size_index = size.__index__ 2553 except AttributeError: 2554 raise TypeError(f"{size!r} is not an integer") 2555 else: 2556 size = size_index() 2557 2558 # Grab all the decoded text (we will rewind any extra bits later). 2559 line = self._get_decoded_chars() 2560 2561 start = 0 2562 # Make the decoder if it doesn't already exist. 2563 if not self._decoder: 2564 self._get_decoder() 2565 2566 pos = endpos = None 2567 while True: 2568 if self._readtranslate: 2569 # Newlines are already translated, only search for \n 2570 pos = line.find('\n', start) 2571 if pos >= 0: 2572 endpos = pos + 1 2573 break 2574 else: 2575 start = len(line) 2576 2577 elif self._readuniversal: 2578 # Universal newline search. Find any of \r, \r\n, \n 2579 # The decoder ensures that \r\n are not split in two pieces 2580 2581 # In C we'd look for these in parallel of course. 2582 nlpos = line.find("\n", start) 2583 crpos = line.find("\r", start) 2584 if crpos == -1: 2585 if nlpos == -1: 2586 # Nothing found 2587 start = len(line) 2588 else: 2589 # Found \n 2590 endpos = nlpos + 1 2591 break 2592 elif nlpos == -1: 2593 # Found lone \r 2594 endpos = crpos + 1 2595 break 2596 elif nlpos < crpos: 2597 # Found \n 2598 endpos = nlpos + 1 2599 break 2600 elif nlpos == crpos + 1: 2601 # Found \r\n 2602 endpos = crpos + 2 2603 break 2604 else: 2605 # Found \r 2606 endpos = crpos + 1 2607 break 2608 else: 2609 # non-universal 2610 pos = line.find(self._readnl) 2611 if pos >= 0: 2612 endpos = pos + len(self._readnl) 2613 break 2614 2615 if size >= 0 and len(line) >= size: 2616 endpos = size # reached length size 2617 break 2618 2619 # No line ending seen yet - get more data' 2620 while self._read_chunk(): 2621 if self._decoded_chars: 2622 break 2623 if self._decoded_chars: 2624 line += self._get_decoded_chars() 2625 else: 2626 # end of file 2627 self._set_decoded_chars('') 2628 self._snapshot = None 2629 return line 2630 2631 if size >= 0 and endpos > size: 2632 endpos = size # don't exceed size 2633 2634 # Rewind _decoded_chars to just after the line ending we found. 2635 self._rewind_decoded_chars(len(line) - endpos) 2636 return line[:endpos] 2637 2638 @property 2639 def newlines(self): 2640 return self._decoder.newlines if self._decoder else None 2641 2642 2643class StringIO(TextIOWrapper): 2644 """Text I/O implementation using an in-memory buffer. 2645 2646 The initial_value argument sets the value of object. The newline 2647 argument is like the one of TextIOWrapper's constructor. 2648 """ 2649 2650 def __init__(self, initial_value="", newline="\n"): 2651 super(StringIO, self).__init__(BytesIO(), 2652 encoding="utf-8", 2653 errors="surrogatepass", 2654 newline=newline) 2655 # Issue #5645: make universal newlines semantics the same as in the 2656 # C version, even under Windows. 2657 if newline is None: 2658 self._writetranslate = False 2659 if initial_value is not None: 2660 if not isinstance(initial_value, str): 2661 raise TypeError("initial_value must be str or None, not {0}" 2662 .format(type(initial_value).__name__)) 2663 self.write(initial_value) 2664 self.seek(0) 2665 2666 def getvalue(self): 2667 self.flush() 2668 decoder = self._decoder or self._get_decoder() 2669 old_state = decoder.getstate() 2670 decoder.reset() 2671 try: 2672 return decoder.decode(self.buffer.getvalue(), final=True) 2673 finally: 2674 decoder.setstate(old_state) 2675 2676 def __repr__(self): 2677 # TextIOWrapper tells the encoding in its repr. In StringIO, 2678 # that's an implementation detail. 2679 return object.__repr__(self) 2680 2681 @property 2682 def errors(self): 2683 return None 2684 2685 @property 2686 def encoding(self): 2687 return None 2688 2689 def detach(self): 2690 # This doesn't make sense on StringIO. 2691 self._unsupported("detach") 2692