1""" 2Python implementation of the io module. 3""" 4 5import os 6import abc 7import codecs 8import errno 9import stat 10import sys 11# Import _thread instead of threading to reduce startup cost 12try: 13 from _thread import allocate_lock as Lock 14except ImportError: 15 from _dummy_thread import allocate_lock as Lock 16if sys.platform in {'win32', 'cygwin'}: 17 from msvcrt import setmode as _setmode 18else: 19 _setmode = None 20 21import io 22from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END) 23 24valid_seek_flags = {0, 1, 2} # Hardwired values 25if hasattr(os, 'SEEK_HOLE') : 26 valid_seek_flags.add(os.SEEK_HOLE) 27 valid_seek_flags.add(os.SEEK_DATA) 28 29# open() uses st_blksize whenever we can 30DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes 31 32# NOTE: Base classes defined here are registered with the "official" ABCs 33# defined in io.py. We don't use real inheritance though, because we don't want 34# to inherit the C implementations. 35 36# Rebind for compatibility 37BlockingIOError = BlockingIOError 38 39 40def open(file, mode="r", buffering=-1, encoding=None, errors=None, 41 newline=None, closefd=True, opener=None): 42 43 r"""Open file and return a stream. Raise OSError upon failure. 44 45 file is either a text or byte string giving the name (and the path 46 if the file isn't in the current working directory) of the file to 47 be opened or an integer file descriptor of the file to be 48 wrapped. (If a file descriptor is given, it is closed when the 49 returned I/O object is closed, unless closefd is set to False.) 50 51 mode is an optional string that specifies the mode in which the file is 52 opened. It defaults to 'r' which means open for reading in text mode. Other 53 common values are 'w' for writing (truncating the file if it already 54 exists), 'x' for exclusive creation of a new file, and 'a' for appending 55 (which on some Unix systems, means that all writes append to the end of the 56 file regardless of the current seek position). In text mode, if encoding is 57 not specified the encoding used is platform dependent. (For reading and 58 writing raw bytes use binary mode and leave encoding unspecified.) The 59 available modes are: 60 61 ========= =============================================================== 62 Character Meaning 63 --------- --------------------------------------------------------------- 64 'r' open for reading (default) 65 'w' open for writing, truncating the file first 66 'x' create a new file and open it for writing 67 'a' open for writing, appending to the end of the file if it exists 68 'b' binary mode 69 't' text mode (default) 70 '+' open a disk file for updating (reading and writing) 71 'U' universal newline mode (deprecated) 72 ========= =============================================================== 73 74 The default mode is 'rt' (open for reading text). For binary random 75 access, the mode 'w+b' opens and truncates the file to 0 bytes, while 76 'r+b' opens the file without truncation. The 'x' mode implies 'w' and 77 raises an `FileExistsError` if the file already exists. 78 79 Python distinguishes between files opened in binary and text modes, 80 even when the underlying operating system doesn't. Files opened in 81 binary mode (appending 'b' to the mode argument) return contents as 82 bytes objects without any decoding. In text mode (the default, or when 83 't' is appended to the mode argument), the contents of the file are 84 returned as strings, the bytes having been first decoded using a 85 platform-dependent encoding or using the specified encoding if given. 86 87 'U' mode is deprecated and will raise an exception in future versions 88 of Python. It has no effect in Python 3. Use newline to control 89 universal newlines mode. 90 91 buffering is an optional integer used to set the buffering policy. 92 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select 93 line buffering (only usable in text mode), and an integer > 1 to indicate 94 the size of a fixed-size chunk buffer. When no buffering argument is 95 given, the default buffering policy works as follows: 96 97 * Binary files are buffered in fixed-size chunks; the size of the buffer 98 is chosen using a heuristic trying to determine the underlying device's 99 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. 100 On many systems, the buffer will typically be 4096 or 8192 bytes long. 101 102 * "Interactive" text files (files for which isatty() returns True) 103 use line buffering. Other text files use the policy described above 104 for binary files. 105 106 encoding is the str name of the encoding used to decode or encode the 107 file. This should only be used in text mode. The default encoding is 108 platform dependent, but any encoding supported by Python can be 109 passed. See the codecs module for the list of supported encodings. 110 111 errors is an optional string that specifies how encoding errors are to 112 be handled---this argument should not be used in binary mode. Pass 113 'strict' to raise a ValueError exception if there is an encoding error 114 (the default of None has the same effect), or pass 'ignore' to ignore 115 errors. (Note that ignoring encoding errors can lead to data loss.) 116 See the documentation for codecs.register for a list of the permitted 117 encoding error strings. 118 119 newline is a string controlling how universal newlines works (it only 120 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works 121 as follows: 122 123 * On input, if newline is None, universal newlines mode is 124 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and 125 these are translated into '\n' before being returned to the 126 caller. If it is '', universal newline mode is enabled, but line 127 endings are returned to the caller untranslated. If it has any of 128 the other legal values, input lines are only terminated by the given 129 string, and the line ending is returned to the caller untranslated. 130 131 * On output, if newline is None, any '\n' characters written are 132 translated to the system default line separator, os.linesep. If 133 newline is '', no translation takes place. If newline is any of the 134 other legal values, any '\n' characters written are translated to 135 the given string. 136 137 closedfd is a bool. If closefd is False, the underlying file descriptor will 138 be kept open when the file is closed. This does not work when a file name is 139 given and must be True in that case. 140 141 The newly created file is non-inheritable. 142 143 A custom opener can be used by passing a callable as *opener*. The 144 underlying file descriptor for the file object is then obtained by calling 145 *opener* with (*file*, *flags*). *opener* must return an open file 146 descriptor (passing os.open as *opener* results in functionality similar to 147 passing None). 148 149 open() returns a file object whose type depends on the mode, and 150 through which the standard file operations such as reading and writing 151 are performed. When open() is used to open a file in a text mode ('w', 152 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open 153 a file in a binary mode, the returned class varies: in read binary 154 mode, it returns a BufferedReader; in write binary and append binary 155 modes, it returns a BufferedWriter, and in read/write mode, it returns 156 a BufferedRandom. 157 158 It is also possible to use a string or bytearray as a file for both 159 reading and writing. For strings StringIO can be used like a file 160 opened in a text mode, and for bytes a BytesIO can be used like a file 161 opened in a binary mode. 162 """ 163 if not isinstance(file, int): 164 file = os.fspath(file) 165 if not isinstance(file, (str, bytes, int)): 166 raise TypeError("invalid file: %r" % file) 167 if not isinstance(mode, str): 168 raise TypeError("invalid mode: %r" % mode) 169 if not isinstance(buffering, int): 170 raise TypeError("invalid buffering: %r" % buffering) 171 if encoding is not None and not isinstance(encoding, str): 172 raise TypeError("invalid encoding: %r" % encoding) 173 if errors is not None and not isinstance(errors, str): 174 raise TypeError("invalid errors: %r" % errors) 175 modes = set(mode) 176 if modes - set("axrwb+tU") or len(mode) > len(modes): 177 raise ValueError("invalid mode: %r" % mode) 178 creating = "x" in modes 179 reading = "r" in modes 180 writing = "w" in modes 181 appending = "a" in modes 182 updating = "+" in modes 183 text = "t" in modes 184 binary = "b" in modes 185 if "U" in modes: 186 if creating or writing or appending or updating: 187 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'") 188 import warnings 189 warnings.warn("'U' mode is deprecated", 190 DeprecationWarning, 2) 191 reading = True 192 if text and binary: 193 raise ValueError("can't have text and binary mode at once") 194 if creating + reading + writing + appending > 1: 195 raise ValueError("can't have read/write/append mode at once") 196 if not (creating or reading or writing or appending): 197 raise ValueError("must have exactly one of read/write/append mode") 198 if binary and encoding is not None: 199 raise ValueError("binary mode doesn't take an encoding argument") 200 if binary and errors is not None: 201 raise ValueError("binary mode doesn't take an errors argument") 202 if binary and newline is not None: 203 raise ValueError("binary mode doesn't take a newline argument") 204 raw = FileIO(file, 205 (creating and "x" or "") + 206 (reading and "r" or "") + 207 (writing and "w" or "") + 208 (appending and "a" or "") + 209 (updating and "+" or ""), 210 closefd, opener=opener) 211 result = raw 212 try: 213 line_buffering = False 214 if buffering == 1 or buffering < 0 and raw.isatty(): 215 buffering = -1 216 line_buffering = True 217 if buffering < 0: 218 buffering = DEFAULT_BUFFER_SIZE 219 try: 220 bs = os.fstat(raw.fileno()).st_blksize 221 except (OSError, AttributeError): 222 pass 223 else: 224 if bs > 1: 225 buffering = bs 226 if buffering < 0: 227 raise ValueError("invalid buffering size") 228 if buffering == 0: 229 if binary: 230 return result 231 raise ValueError("can't have unbuffered text I/O") 232 if updating: 233 buffer = BufferedRandom(raw, buffering) 234 elif creating or writing or appending: 235 buffer = BufferedWriter(raw, buffering) 236 elif reading: 237 buffer = BufferedReader(raw, buffering) 238 else: 239 raise ValueError("unknown mode: %r" % mode) 240 result = buffer 241 if binary: 242 return result 243 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) 244 result = text 245 text.mode = mode 246 return result 247 except: 248 result.close() 249 raise 250 251 252class DocDescriptor: 253 """Helper for builtins.open.__doc__ 254 """ 255 def __get__(self, obj, typ): 256 return ( 257 "open(file, mode='r', buffering=-1, encoding=None, " 258 "errors=None, newline=None, closefd=True)\n\n" + 259 open.__doc__) 260 261class OpenWrapper: 262 """Wrapper for builtins.open 263 264 Trick so that open won't become a bound method when stored 265 as a class variable (as dbm.dumb does). 266 267 See initstdio() in Python/pylifecycle.c. 268 """ 269 __doc__ = DocDescriptor() 270 271 def __new__(cls, *args, **kwargs): 272 return open(*args, **kwargs) 273 274 275# In normal operation, both `UnsupportedOperation`s should be bound to the 276# same object. 277try: 278 UnsupportedOperation = io.UnsupportedOperation 279except AttributeError: 280 class UnsupportedOperation(OSError, ValueError): 281 pass 282 283 284class IOBase(metaclass=abc.ABCMeta): 285 286 """The abstract base class for all I/O classes, acting on streams of 287 bytes. There is no public constructor. 288 289 This class provides dummy implementations for many methods that 290 derived classes can override selectively; the default implementations 291 represent a file that cannot be read, written or seeked. 292 293 Even though IOBase does not declare read, readinto, or write because 294 their signatures will vary, implementations and clients should 295 consider those methods part of the interface. Also, implementations 296 may raise UnsupportedOperation when operations they do not support are 297 called. 298 299 The basic type used for binary data read from or written to a file is 300 bytes. Other bytes-like objects are accepted as method arguments too. In 301 some cases (such as readinto), a writable object is required. Text I/O 302 classes work with str data. 303 304 Note that calling any method (even inquiries) on a closed stream is 305 undefined. Implementations may raise OSError in this case. 306 307 IOBase (and its subclasses) support the iterator protocol, meaning 308 that an IOBase object can be iterated over yielding the lines in a 309 stream. 310 311 IOBase also supports the :keyword:`with` statement. In this example, 312 fp is closed after the suite of the with statement is complete: 313 314 with open('spam.txt', 'r') as fp: 315 fp.write('Spam and eggs!') 316 """ 317 318 ### Internal ### 319 320 def _unsupported(self, name): 321 """Internal: raise an OSError exception for unsupported operations.""" 322 raise UnsupportedOperation("%s.%s() not supported" % 323 (self.__class__.__name__, name)) 324 325 ### Positioning ### 326 327 def seek(self, pos, whence=0): 328 """Change stream position. 329 330 Change the stream position to byte offset pos. Argument pos is 331 interpreted relative to the position indicated by whence. Values 332 for whence are ints: 333 334 * 0 -- start of stream (the default); offset should be zero or positive 335 * 1 -- current stream position; offset may be negative 336 * 2 -- end of stream; offset is usually negative 337 Some operating systems / file systems could provide additional values. 338 339 Return an int indicating the new absolute position. 340 """ 341 self._unsupported("seek") 342 343 def tell(self): 344 """Return an int indicating the current stream position.""" 345 return self.seek(0, 1) 346 347 def truncate(self, pos=None): 348 """Truncate file to size bytes. 349 350 Size defaults to the current IO position as reported by tell(). Return 351 the new size. 352 """ 353 self._unsupported("truncate") 354 355 ### Flush and close ### 356 357 def flush(self): 358 """Flush write buffers, if applicable. 359 360 This is not implemented for read-only and non-blocking streams. 361 """ 362 self._checkClosed() 363 # XXX Should this return the number of bytes written??? 364 365 __closed = False 366 367 def close(self): 368 """Flush and close the IO object. 369 370 This method has no effect if the file is already closed. 371 """ 372 if not self.__closed: 373 try: 374 self.flush() 375 finally: 376 self.__closed = True 377 378 def __del__(self): 379 """Destructor. Calls close().""" 380 # The try/except block is in case this is called at program 381 # exit time, when it's possible that globals have already been 382 # deleted, and then the close() call might fail. Since 383 # there's nothing we can do about such failures and they annoy 384 # the end users, we suppress the traceback. 385 try: 386 self.close() 387 except: 388 pass 389 390 ### Inquiries ### 391 392 def seekable(self): 393 """Return a bool indicating whether object supports random access. 394 395 If False, seek(), tell() and truncate() will raise OSError. 396 This method may need to do a test seek(). 397 """ 398 return False 399 400 def _checkSeekable(self, msg=None): 401 """Internal: raise UnsupportedOperation if file is not seekable 402 """ 403 if not self.seekable(): 404 raise UnsupportedOperation("File or stream is not seekable." 405 if msg is None else msg) 406 407 def readable(self): 408 """Return a bool indicating whether object was opened for reading. 409 410 If False, read() will raise OSError. 411 """ 412 return False 413 414 def _checkReadable(self, msg=None): 415 """Internal: raise UnsupportedOperation if file is not readable 416 """ 417 if not self.readable(): 418 raise UnsupportedOperation("File or stream is not readable." 419 if msg is None else msg) 420 421 def writable(self): 422 """Return a bool indicating whether object was opened for writing. 423 424 If False, write() and truncate() will raise OSError. 425 """ 426 return False 427 428 def _checkWritable(self, msg=None): 429 """Internal: raise UnsupportedOperation if file is not writable 430 """ 431 if not self.writable(): 432 raise UnsupportedOperation("File or stream is not writable." 433 if msg is None else msg) 434 435 @property 436 def closed(self): 437 """closed: bool. True iff the file has been closed. 438 439 For backwards compatibility, this is a property, not a predicate. 440 """ 441 return self.__closed 442 443 def _checkClosed(self, msg=None): 444 """Internal: raise a ValueError if file is closed 445 """ 446 if self.closed: 447 raise ValueError("I/O operation on closed file." 448 if msg is None else msg) 449 450 ### Context manager ### 451 452 def __enter__(self): # That's a forward reference 453 """Context management protocol. Returns self (an instance of IOBase).""" 454 self._checkClosed() 455 return self 456 457 def __exit__(self, *args): 458 """Context management protocol. Calls close()""" 459 self.close() 460 461 ### Lower-level APIs ### 462 463 # XXX Should these be present even if unimplemented? 464 465 def fileno(self): 466 """Returns underlying file descriptor (an int) if one exists. 467 468 An OSError is raised if the IO object does not use a file descriptor. 469 """ 470 self._unsupported("fileno") 471 472 def isatty(self): 473 """Return a bool indicating whether this is an 'interactive' stream. 474 475 Return False if it can't be determined. 476 """ 477 self._checkClosed() 478 return False 479 480 ### Readline[s] and writelines ### 481 482 def readline(self, size=-1): 483 r"""Read and return a line of bytes from the stream. 484 485 If size is specified, at most size bytes will be read. 486 Size should be an int. 487 488 The line terminator is always b'\n' for binary files; for text 489 files, the newlines argument to open can be used to select the line 490 terminator(s) recognized. 491 """ 492 # For backwards compatibility, a (slowish) readline(). 493 if hasattr(self, "peek"): 494 def nreadahead(): 495 readahead = self.peek(1) 496 if not readahead: 497 return 1 498 n = (readahead.find(b"\n") + 1) or len(readahead) 499 if size >= 0: 500 n = min(n, size) 501 return n 502 else: 503 def nreadahead(): 504 return 1 505 if size is None: 506 size = -1 507 elif not isinstance(size, int): 508 raise TypeError("size must be an integer") 509 res = bytearray() 510 while size < 0 or len(res) < size: 511 b = self.read(nreadahead()) 512 if not b: 513 break 514 res += b 515 if res.endswith(b"\n"): 516 break 517 return bytes(res) 518 519 def __iter__(self): 520 self._checkClosed() 521 return self 522 523 def __next__(self): 524 line = self.readline() 525 if not line: 526 raise StopIteration 527 return line 528 529 def readlines(self, hint=None): 530 """Return a list of lines from the stream. 531 532 hint can be specified to control the number of lines read: no more 533 lines will be read if the total size (in bytes/characters) of all 534 lines so far exceeds hint. 535 """ 536 if hint is None or hint <= 0: 537 return list(self) 538 n = 0 539 lines = [] 540 for line in self: 541 lines.append(line) 542 n += len(line) 543 if n >= hint: 544 break 545 return lines 546 547 def writelines(self, lines): 548 self._checkClosed() 549 for line in lines: 550 self.write(line) 551 552io.IOBase.register(IOBase) 553 554 555class RawIOBase(IOBase): 556 557 """Base class for raw binary I/O.""" 558 559 # The read() method is implemented by calling readinto(); derived 560 # classes that want to support read() only need to implement 561 # readinto() as a primitive operation. In general, readinto() can be 562 # more efficient than read(). 563 564 # (It would be tempting to also provide an implementation of 565 # readinto() in terms of read(), in case the latter is a more suitable 566 # primitive operation, but that would lead to nasty recursion in case 567 # a subclass doesn't implement either.) 568 569 def read(self, size=-1): 570 """Read and return up to size bytes, where size is an int. 571 572 Returns an empty bytes object on EOF, or None if the object is 573 set not to block and has no data to read. 574 """ 575 if size is None: 576 size = -1 577 if size < 0: 578 return self.readall() 579 b = bytearray(size.__index__()) 580 n = self.readinto(b) 581 if n is None: 582 return None 583 del b[n:] 584 return bytes(b) 585 586 def readall(self): 587 """Read until EOF, using multiple read() call.""" 588 res = bytearray() 589 while True: 590 data = self.read(DEFAULT_BUFFER_SIZE) 591 if not data: 592 break 593 res += data 594 if res: 595 return bytes(res) 596 else: 597 # b'' or None 598 return data 599 600 def readinto(self, b): 601 """Read bytes into a pre-allocated bytes-like object b. 602 603 Returns an int representing the number of bytes read (0 for EOF), or 604 None if the object is set not to block and has no data to read. 605 """ 606 self._unsupported("readinto") 607 608 def write(self, b): 609 """Write the given buffer to the IO stream. 610 611 Returns the number of bytes written, which may be less than the 612 length of b in bytes. 613 """ 614 self._unsupported("write") 615 616io.RawIOBase.register(RawIOBase) 617from _io import FileIO 618RawIOBase.register(FileIO) 619 620 621class BufferedIOBase(IOBase): 622 623 """Base class for buffered IO objects. 624 625 The main difference with RawIOBase is that the read() method 626 supports omitting the size argument, and does not have a default 627 implementation that defers to readinto(). 628 629 In addition, read(), readinto() and write() may raise 630 BlockingIOError if the underlying raw stream is in non-blocking 631 mode and not ready; unlike their raw counterparts, they will never 632 return None. 633 634 A typical implementation should not inherit from a RawIOBase 635 implementation, but wrap one. 636 """ 637 638 def read(self, size=None): 639 """Read and return up to size bytes, where size is an int. 640 641 If the argument is omitted, None, or negative, reads and 642 returns all data until EOF. 643 644 If the argument is positive, and the underlying raw stream is 645 not 'interactive', multiple raw reads may be issued to satisfy 646 the byte count (unless EOF is reached first). But for 647 interactive raw streams (XXX and for pipes?), at most one raw 648 read will be issued, and a short result does not imply that 649 EOF is imminent. 650 651 Returns an empty bytes array on EOF. 652 653 Raises BlockingIOError if the underlying raw stream has no 654 data at the moment. 655 """ 656 self._unsupported("read") 657 658 def read1(self, size=None): 659 """Read up to size bytes with at most one read() system call, 660 where size is an int. 661 """ 662 self._unsupported("read1") 663 664 def readinto(self, b): 665 """Read bytes into a pre-allocated bytes-like object b. 666 667 Like read(), this may issue multiple reads to the underlying raw 668 stream, unless the latter is 'interactive'. 669 670 Returns an int representing the number of bytes read (0 for EOF). 671 672 Raises BlockingIOError if the underlying raw stream has no 673 data at the moment. 674 """ 675 676 return self._readinto(b, read1=False) 677 678 def readinto1(self, b): 679 """Read bytes into buffer *b*, using at most one system call 680 681 Returns an int representing the number of bytes read (0 for EOF). 682 683 Raises BlockingIOError if the underlying raw stream has no 684 data at the moment. 685 """ 686 687 return self._readinto(b, read1=True) 688 689 def _readinto(self, b, read1): 690 if not isinstance(b, memoryview): 691 b = memoryview(b) 692 b = b.cast('B') 693 694 if read1: 695 data = self.read1(len(b)) 696 else: 697 data = self.read(len(b)) 698 n = len(data) 699 700 b[:n] = data 701 702 return n 703 704 def write(self, b): 705 """Write the given bytes buffer to the IO stream. 706 707 Return the number of bytes written, which is always the length of b 708 in bytes. 709 710 Raises BlockingIOError if the buffer is full and the 711 underlying raw stream cannot accept more data at the moment. 712 """ 713 self._unsupported("write") 714 715 def detach(self): 716 """ 717 Separate the underlying raw stream from the buffer and return it. 718 719 After the raw stream has been detached, the buffer is in an unusable 720 state. 721 """ 722 self._unsupported("detach") 723 724io.BufferedIOBase.register(BufferedIOBase) 725 726 727class _BufferedIOMixin(BufferedIOBase): 728 729 """A mixin implementation of BufferedIOBase with an underlying raw stream. 730 731 This passes most requests on to the underlying raw stream. It 732 does *not* provide implementations of read(), readinto() or 733 write(). 734 """ 735 736 def __init__(self, raw): 737 self._raw = raw 738 739 ### Positioning ### 740 741 def seek(self, pos, whence=0): 742 new_position = self.raw.seek(pos, whence) 743 if new_position < 0: 744 raise OSError("seek() returned an invalid position") 745 return new_position 746 747 def tell(self): 748 pos = self.raw.tell() 749 if pos < 0: 750 raise OSError("tell() returned an invalid position") 751 return pos 752 753 def truncate(self, pos=None): 754 # Flush the stream. We're mixing buffered I/O with lower-level I/O, 755 # and a flush may be necessary to synch both views of the current 756 # file state. 757 self.flush() 758 759 if pos is None: 760 pos = self.tell() 761 # XXX: Should seek() be used, instead of passing the position 762 # XXX directly to truncate? 763 return self.raw.truncate(pos) 764 765 ### Flush and close ### 766 767 def flush(self): 768 if self.closed: 769 raise ValueError("flush of closed file") 770 self.raw.flush() 771 772 def close(self): 773 if self.raw is not None and not self.closed: 774 try: 775 # may raise BlockingIOError or BrokenPipeError etc 776 self.flush() 777 finally: 778 self.raw.close() 779 780 def detach(self): 781 if self.raw is None: 782 raise ValueError("raw stream already detached") 783 self.flush() 784 raw = self._raw 785 self._raw = None 786 return raw 787 788 ### Inquiries ### 789 790 def seekable(self): 791 return self.raw.seekable() 792 793 @property 794 def raw(self): 795 return self._raw 796 797 @property 798 def closed(self): 799 return self.raw.closed 800 801 @property 802 def name(self): 803 return self.raw.name 804 805 @property 806 def mode(self): 807 return self.raw.mode 808 809 def __getstate__(self): 810 raise TypeError("can not serialize a '{0}' object" 811 .format(self.__class__.__name__)) 812 813 def __repr__(self): 814 modname = self.__class__.__module__ 815 clsname = self.__class__.__qualname__ 816 try: 817 name = self.name 818 except Exception: 819 return "<{}.{}>".format(modname, clsname) 820 else: 821 return "<{}.{} name={!r}>".format(modname, clsname, name) 822 823 ### Lower-level APIs ### 824 825 def fileno(self): 826 return self.raw.fileno() 827 828 def isatty(self): 829 return self.raw.isatty() 830 831 832class BytesIO(BufferedIOBase): 833 834 """Buffered I/O implementation using an in-memory bytes buffer.""" 835 836 def __init__(self, initial_bytes=None): 837 buf = bytearray() 838 if initial_bytes is not None: 839 buf += initial_bytes 840 self._buffer = buf 841 self._pos = 0 842 843 def __getstate__(self): 844 if self.closed: 845 raise ValueError("__getstate__ on closed file") 846 return self.__dict__.copy() 847 848 def getvalue(self): 849 """Return the bytes value (contents) of the buffer 850 """ 851 if self.closed: 852 raise ValueError("getvalue on closed file") 853 return bytes(self._buffer) 854 855 def getbuffer(self): 856 """Return a readable and writable view of the buffer. 857 """ 858 if self.closed: 859 raise ValueError("getbuffer on closed file") 860 return memoryview(self._buffer) 861 862 def close(self): 863 self._buffer.clear() 864 super().close() 865 866 def read(self, size=None): 867 if self.closed: 868 raise ValueError("read from closed file") 869 if size is None: 870 size = -1 871 if size < 0: 872 size = len(self._buffer) 873 if len(self._buffer) <= self._pos: 874 return b"" 875 newpos = min(len(self._buffer), self._pos + size) 876 b = self._buffer[self._pos : newpos] 877 self._pos = newpos 878 return bytes(b) 879 880 def read1(self, size): 881 """This is the same as read. 882 """ 883 return self.read(size) 884 885 def write(self, b): 886 if self.closed: 887 raise ValueError("write to closed file") 888 if isinstance(b, str): 889 raise TypeError("can't write str to binary stream") 890 with memoryview(b) as view: 891 n = view.nbytes # Size of any bytes-like object 892 if n == 0: 893 return 0 894 pos = self._pos 895 if pos > len(self._buffer): 896 # Inserts null bytes between the current end of the file 897 # and the new write position. 898 padding = b'\x00' * (pos - len(self._buffer)) 899 self._buffer += padding 900 self._buffer[pos:pos + n] = b 901 self._pos += n 902 return n 903 904 def seek(self, pos, whence=0): 905 if self.closed: 906 raise ValueError("seek on closed file") 907 try: 908 pos.__index__ 909 except AttributeError as err: 910 raise TypeError("an integer is required") from err 911 if whence == 0: 912 if pos < 0: 913 raise ValueError("negative seek position %r" % (pos,)) 914 self._pos = pos 915 elif whence == 1: 916 self._pos = max(0, self._pos + pos) 917 elif whence == 2: 918 self._pos = max(0, len(self._buffer) + pos) 919 else: 920 raise ValueError("unsupported whence value") 921 return self._pos 922 923 def tell(self): 924 if self.closed: 925 raise ValueError("tell on closed file") 926 return self._pos 927 928 def truncate(self, pos=None): 929 if self.closed: 930 raise ValueError("truncate on closed file") 931 if pos is None: 932 pos = self._pos 933 else: 934 try: 935 pos.__index__ 936 except AttributeError as err: 937 raise TypeError("an integer is required") from err 938 if pos < 0: 939 raise ValueError("negative truncate position %r" % (pos,)) 940 del self._buffer[pos:] 941 return pos 942 943 def readable(self): 944 if self.closed: 945 raise ValueError("I/O operation on closed file.") 946 return True 947 948 def writable(self): 949 if self.closed: 950 raise ValueError("I/O operation on closed file.") 951 return True 952 953 def seekable(self): 954 if self.closed: 955 raise ValueError("I/O operation on closed file.") 956 return True 957 958 959class BufferedReader(_BufferedIOMixin): 960 961 """BufferedReader(raw[, buffer_size]) 962 963 A buffer for a readable, sequential BaseRawIO object. 964 965 The constructor creates a BufferedReader for the given readable raw 966 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE 967 is used. 968 """ 969 970 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 971 """Create a new buffered reader using the given readable raw IO object. 972 """ 973 if not raw.readable(): 974 raise OSError('"raw" argument must be readable.') 975 976 _BufferedIOMixin.__init__(self, raw) 977 if buffer_size <= 0: 978 raise ValueError("invalid buffer size") 979 self.buffer_size = buffer_size 980 self._reset_read_buf() 981 self._read_lock = Lock() 982 983 def readable(self): 984 return self.raw.readable() 985 986 def _reset_read_buf(self): 987 self._read_buf = b"" 988 self._read_pos = 0 989 990 def read(self, size=None): 991 """Read size bytes. 992 993 Returns exactly size bytes of data unless the underlying raw IO 994 stream reaches EOF or if the call would block in non-blocking 995 mode. If size is negative, read until EOF or until read() would 996 block. 997 """ 998 if size is not None and size < -1: 999 raise ValueError("invalid number of bytes to read") 1000 with self._read_lock: 1001 return self._read_unlocked(size) 1002 1003 def _read_unlocked(self, n=None): 1004 nodata_val = b"" 1005 empty_values = (b"", None) 1006 buf = self._read_buf 1007 pos = self._read_pos 1008 1009 # Special case for when the number of bytes to read is unspecified. 1010 if n is None or n == -1: 1011 self._reset_read_buf() 1012 if hasattr(self.raw, 'readall'): 1013 chunk = self.raw.readall() 1014 if chunk is None: 1015 return buf[pos:] or None 1016 else: 1017 return buf[pos:] + chunk 1018 chunks = [buf[pos:]] # Strip the consumed bytes. 1019 current_size = 0 1020 while True: 1021 # Read until EOF or until read() would block. 1022 chunk = self.raw.read() 1023 if chunk in empty_values: 1024 nodata_val = chunk 1025 break 1026 current_size += len(chunk) 1027 chunks.append(chunk) 1028 return b"".join(chunks) or nodata_val 1029 1030 # The number of bytes to read is specified, return at most n bytes. 1031 avail = len(buf) - pos # Length of the available buffered data. 1032 if n <= avail: 1033 # Fast path: the data to read is fully buffered. 1034 self._read_pos += n 1035 return buf[pos:pos+n] 1036 # Slow path: read from the stream until enough bytes are read, 1037 # or until an EOF occurs or until read() would block. 1038 chunks = [buf[pos:]] 1039 wanted = max(self.buffer_size, n) 1040 while avail < n: 1041 chunk = self.raw.read(wanted) 1042 if chunk in empty_values: 1043 nodata_val = chunk 1044 break 1045 avail += len(chunk) 1046 chunks.append(chunk) 1047 # n is more than avail only when an EOF occurred or when 1048 # read() would have blocked. 1049 n = min(n, avail) 1050 out = b"".join(chunks) 1051 self._read_buf = out[n:] # Save the extra data in the buffer. 1052 self._read_pos = 0 1053 return out[:n] if out else nodata_val 1054 1055 def peek(self, size=0): 1056 """Returns buffered bytes without advancing the position. 1057 1058 The argument indicates a desired minimal number of bytes; we 1059 do at most one raw read to satisfy it. We never return more 1060 than self.buffer_size. 1061 """ 1062 with self._read_lock: 1063 return self._peek_unlocked(size) 1064 1065 def _peek_unlocked(self, n=0): 1066 want = min(n, self.buffer_size) 1067 have = len(self._read_buf) - self._read_pos 1068 if have < want or have <= 0: 1069 to_read = self.buffer_size - have 1070 current = self.raw.read(to_read) 1071 if current: 1072 self._read_buf = self._read_buf[self._read_pos:] + current 1073 self._read_pos = 0 1074 return self._read_buf[self._read_pos:] 1075 1076 def read1(self, size): 1077 """Reads up to size bytes, with at most one read() system call.""" 1078 # Returns up to size bytes. If at least one byte is buffered, we 1079 # only return buffered bytes. Otherwise, we do one raw read. 1080 if size < 0: 1081 raise ValueError("number of bytes to read must be positive") 1082 if size == 0: 1083 return b"" 1084 with self._read_lock: 1085 self._peek_unlocked(1) 1086 return self._read_unlocked( 1087 min(size, len(self._read_buf) - self._read_pos)) 1088 1089 # Implementing readinto() and readinto1() is not strictly necessary (we 1090 # could rely on the base class that provides an implementation in terms of 1091 # read() and read1()). We do it anyway to keep the _pyio implementation 1092 # similar to the io implementation (which implements the methods for 1093 # performance reasons). 1094 def _readinto(self, buf, read1): 1095 """Read data into *buf* with at most one system call.""" 1096 1097 # Need to create a memoryview object of type 'b', otherwise 1098 # we may not be able to assign bytes to it, and slicing it 1099 # would create a new object. 1100 if not isinstance(buf, memoryview): 1101 buf = memoryview(buf) 1102 if buf.nbytes == 0: 1103 return 0 1104 buf = buf.cast('B') 1105 1106 written = 0 1107 with self._read_lock: 1108 while written < len(buf): 1109 1110 # First try to read from internal buffer 1111 avail = min(len(self._read_buf) - self._read_pos, len(buf)) 1112 if avail: 1113 buf[written:written+avail] = \ 1114 self._read_buf[self._read_pos:self._read_pos+avail] 1115 self._read_pos += avail 1116 written += avail 1117 if written == len(buf): 1118 break 1119 1120 # If remaining space in callers buffer is larger than 1121 # internal buffer, read directly into callers buffer 1122 if len(buf) - written > self.buffer_size: 1123 n = self.raw.readinto(buf[written:]) 1124 if not n: 1125 break # eof 1126 written += n 1127 1128 # Otherwise refill internal buffer - unless we're 1129 # in read1 mode and already got some data 1130 elif not (read1 and written): 1131 if not self._peek_unlocked(1): 1132 break # eof 1133 1134 # In readinto1 mode, return as soon as we have some data 1135 if read1 and written: 1136 break 1137 1138 return written 1139 1140 def tell(self): 1141 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos 1142 1143 def seek(self, pos, whence=0): 1144 if whence not in valid_seek_flags: 1145 raise ValueError("invalid whence value") 1146 with self._read_lock: 1147 if whence == 1: 1148 pos -= len(self._read_buf) - self._read_pos 1149 pos = _BufferedIOMixin.seek(self, pos, whence) 1150 self._reset_read_buf() 1151 return pos 1152 1153class BufferedWriter(_BufferedIOMixin): 1154 1155 """A buffer for a writeable sequential RawIO object. 1156 1157 The constructor creates a BufferedWriter for the given writeable raw 1158 stream. If the buffer_size is not given, it defaults to 1159 DEFAULT_BUFFER_SIZE. 1160 """ 1161 1162 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1163 if not raw.writable(): 1164 raise OSError('"raw" argument must be writable.') 1165 1166 _BufferedIOMixin.__init__(self, raw) 1167 if buffer_size <= 0: 1168 raise ValueError("invalid buffer size") 1169 self.buffer_size = buffer_size 1170 self._write_buf = bytearray() 1171 self._write_lock = Lock() 1172 1173 def writable(self): 1174 return self.raw.writable() 1175 1176 def write(self, b): 1177 if self.closed: 1178 raise ValueError("write to closed file") 1179 if isinstance(b, str): 1180 raise TypeError("can't write str to binary stream") 1181 with self._write_lock: 1182 # XXX we can implement some more tricks to try and avoid 1183 # partial writes 1184 if len(self._write_buf) > self.buffer_size: 1185 # We're full, so let's pre-flush the buffer. (This may 1186 # raise BlockingIOError with characters_written == 0.) 1187 self._flush_unlocked() 1188 before = len(self._write_buf) 1189 self._write_buf.extend(b) 1190 written = len(self._write_buf) - before 1191 if len(self._write_buf) > self.buffer_size: 1192 try: 1193 self._flush_unlocked() 1194 except BlockingIOError as e: 1195 if len(self._write_buf) > self.buffer_size: 1196 # We've hit the buffer_size. We have to accept a partial 1197 # write and cut back our buffer. 1198 overage = len(self._write_buf) - self.buffer_size 1199 written -= overage 1200 self._write_buf = self._write_buf[:self.buffer_size] 1201 raise BlockingIOError(e.errno, e.strerror, written) 1202 return written 1203 1204 def truncate(self, pos=None): 1205 with self._write_lock: 1206 self._flush_unlocked() 1207 if pos is None: 1208 pos = self.raw.tell() 1209 return self.raw.truncate(pos) 1210 1211 def flush(self): 1212 with self._write_lock: 1213 self._flush_unlocked() 1214 1215 def _flush_unlocked(self): 1216 if self.closed: 1217 raise ValueError("flush of closed file") 1218 while self._write_buf: 1219 try: 1220 n = self.raw.write(self._write_buf) 1221 except BlockingIOError: 1222 raise RuntimeError("self.raw should implement RawIOBase: it " 1223 "should not raise BlockingIOError") 1224 if n is None: 1225 raise BlockingIOError( 1226 errno.EAGAIN, 1227 "write could not complete without blocking", 0) 1228 if n > len(self._write_buf) or n < 0: 1229 raise OSError("write() returned incorrect number of bytes") 1230 del self._write_buf[:n] 1231 1232 def tell(self): 1233 return _BufferedIOMixin.tell(self) + len(self._write_buf) 1234 1235 def seek(self, pos, whence=0): 1236 if whence not in valid_seek_flags: 1237 raise ValueError("invalid whence value") 1238 with self._write_lock: 1239 self._flush_unlocked() 1240 return _BufferedIOMixin.seek(self, pos, whence) 1241 1242 1243class BufferedRWPair(BufferedIOBase): 1244 1245 """A buffered reader and writer object together. 1246 1247 A buffered reader object and buffered writer object put together to 1248 form a sequential IO object that can read and write. This is typically 1249 used with a socket or two-way pipe. 1250 1251 reader and writer are RawIOBase objects that are readable and 1252 writeable respectively. If the buffer_size is omitted it defaults to 1253 DEFAULT_BUFFER_SIZE. 1254 """ 1255 1256 # XXX The usefulness of this (compared to having two separate IO 1257 # objects) is questionable. 1258 1259 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE): 1260 """Constructor. 1261 1262 The arguments are two RawIO instances. 1263 """ 1264 if not reader.readable(): 1265 raise OSError('"reader" argument must be readable.') 1266 1267 if not writer.writable(): 1268 raise OSError('"writer" argument must be writable.') 1269 1270 self.reader = BufferedReader(reader, buffer_size) 1271 self.writer = BufferedWriter(writer, buffer_size) 1272 1273 def read(self, size=None): 1274 if size is None: 1275 size = -1 1276 return self.reader.read(size) 1277 1278 def readinto(self, b): 1279 return self.reader.readinto(b) 1280 1281 def write(self, b): 1282 return self.writer.write(b) 1283 1284 def peek(self, size=0): 1285 return self.reader.peek(size) 1286 1287 def read1(self, size): 1288 return self.reader.read1(size) 1289 1290 def readinto1(self, b): 1291 return self.reader.readinto1(b) 1292 1293 def readable(self): 1294 return self.reader.readable() 1295 1296 def writable(self): 1297 return self.writer.writable() 1298 1299 def flush(self): 1300 return self.writer.flush() 1301 1302 def close(self): 1303 try: 1304 self.writer.close() 1305 finally: 1306 self.reader.close() 1307 1308 def isatty(self): 1309 return self.reader.isatty() or self.writer.isatty() 1310 1311 @property 1312 def closed(self): 1313 return self.writer.closed 1314 1315 1316class BufferedRandom(BufferedWriter, BufferedReader): 1317 1318 """A buffered interface to random access streams. 1319 1320 The constructor creates a reader and writer for a seekable stream, 1321 raw, given in the first argument. If the buffer_size is omitted it 1322 defaults to DEFAULT_BUFFER_SIZE. 1323 """ 1324 1325 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1326 raw._checkSeekable() 1327 BufferedReader.__init__(self, raw, buffer_size) 1328 BufferedWriter.__init__(self, raw, buffer_size) 1329 1330 def seek(self, pos, whence=0): 1331 if whence not in valid_seek_flags: 1332 raise ValueError("invalid whence value") 1333 self.flush() 1334 if self._read_buf: 1335 # Undo read ahead. 1336 with self._read_lock: 1337 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1338 # First do the raw seek, then empty the read buffer, so that 1339 # if the raw seek fails, we don't lose buffered data forever. 1340 pos = self.raw.seek(pos, whence) 1341 with self._read_lock: 1342 self._reset_read_buf() 1343 if pos < 0: 1344 raise OSError("seek() returned invalid position") 1345 return pos 1346 1347 def tell(self): 1348 if self._write_buf: 1349 return BufferedWriter.tell(self) 1350 else: 1351 return BufferedReader.tell(self) 1352 1353 def truncate(self, pos=None): 1354 if pos is None: 1355 pos = self.tell() 1356 # Use seek to flush the read buffer. 1357 return BufferedWriter.truncate(self, pos) 1358 1359 def read(self, size=None): 1360 if size is None: 1361 size = -1 1362 self.flush() 1363 return BufferedReader.read(self, size) 1364 1365 def readinto(self, b): 1366 self.flush() 1367 return BufferedReader.readinto(self, b) 1368 1369 def peek(self, size=0): 1370 self.flush() 1371 return BufferedReader.peek(self, size) 1372 1373 def read1(self, size): 1374 self.flush() 1375 return BufferedReader.read1(self, size) 1376 1377 def readinto1(self, b): 1378 self.flush() 1379 return BufferedReader.readinto1(self, b) 1380 1381 def write(self, b): 1382 if self._read_buf: 1383 # Undo readahead 1384 with self._read_lock: 1385 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1386 self._reset_read_buf() 1387 return BufferedWriter.write(self, b) 1388 1389 1390class FileIO(RawIOBase): 1391 _fd = -1 1392 _created = False 1393 _readable = False 1394 _writable = False 1395 _appending = False 1396 _seekable = None 1397 _closefd = True 1398 1399 def __init__(self, file, mode='r', closefd=True, opener=None): 1400 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading, 1401 writing, exclusive creation or appending. The file will be created if it 1402 doesn't exist when opened for writing or appending; it will be truncated 1403 when opened for writing. A FileExistsError will be raised if it already 1404 exists when opened for creating. Opening a file for creating implies 1405 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode 1406 to allow simultaneous reading and writing. A custom opener can be used by 1407 passing a callable as *opener*. The underlying file descriptor for the file 1408 object is then obtained by calling opener with (*name*, *flags*). 1409 *opener* must return an open file descriptor (passing os.open as *opener* 1410 results in functionality similar to passing None). 1411 """ 1412 if self._fd >= 0: 1413 # Have to close the existing file first. 1414 try: 1415 if self._closefd: 1416 os.close(self._fd) 1417 finally: 1418 self._fd = -1 1419 1420 if isinstance(file, float): 1421 raise TypeError('integer argument expected, got float') 1422 if isinstance(file, int): 1423 fd = file 1424 if fd < 0: 1425 raise ValueError('negative file descriptor') 1426 else: 1427 fd = -1 1428 1429 if not isinstance(mode, str): 1430 raise TypeError('invalid mode: %s' % (mode,)) 1431 if not set(mode) <= set('xrwab+'): 1432 raise ValueError('invalid mode: %s' % (mode,)) 1433 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1: 1434 raise ValueError('Must have exactly one of create/read/write/append ' 1435 'mode and at most one plus') 1436 1437 if 'x' in mode: 1438 self._created = True 1439 self._writable = True 1440 flags = os.O_EXCL | os.O_CREAT 1441 elif 'r' in mode: 1442 self._readable = True 1443 flags = 0 1444 elif 'w' in mode: 1445 self._writable = True 1446 flags = os.O_CREAT | os.O_TRUNC 1447 elif 'a' in mode: 1448 self._writable = True 1449 self._appending = True 1450 flags = os.O_APPEND | os.O_CREAT 1451 1452 if '+' in mode: 1453 self._readable = True 1454 self._writable = True 1455 1456 if self._readable and self._writable: 1457 flags |= os.O_RDWR 1458 elif self._readable: 1459 flags |= os.O_RDONLY 1460 else: 1461 flags |= os.O_WRONLY 1462 1463 flags |= getattr(os, 'O_BINARY', 0) 1464 1465 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or 1466 getattr(os, 'O_CLOEXEC', 0)) 1467 flags |= noinherit_flag 1468 1469 owned_fd = None 1470 try: 1471 if fd < 0: 1472 if not closefd: 1473 raise ValueError('Cannot use closefd=False with file name') 1474 if opener is None: 1475 fd = os.open(file, flags, 0o666) 1476 else: 1477 fd = opener(file, flags) 1478 if not isinstance(fd, int): 1479 raise TypeError('expected integer from opener') 1480 if fd < 0: 1481 raise OSError('Negative file descriptor') 1482 owned_fd = fd 1483 if not noinherit_flag: 1484 os.set_inheritable(fd, False) 1485 1486 self._closefd = closefd 1487 fdfstat = os.fstat(fd) 1488 try: 1489 if stat.S_ISDIR(fdfstat.st_mode): 1490 raise IsADirectoryError(errno.EISDIR, 1491 os.strerror(errno.EISDIR), file) 1492 except AttributeError: 1493 # Ignore the AttribueError if stat.S_ISDIR or errno.EISDIR 1494 # don't exist. 1495 pass 1496 self._blksize = getattr(fdfstat, 'st_blksize', 0) 1497 if self._blksize <= 1: 1498 self._blksize = DEFAULT_BUFFER_SIZE 1499 1500 if _setmode: 1501 # don't translate newlines (\r\n <=> \n) 1502 _setmode(fd, os.O_BINARY) 1503 1504 self.name = file 1505 if self._appending: 1506 # For consistent behaviour, we explicitly seek to the 1507 # end of file (otherwise, it might be done only on the 1508 # first write()). 1509 os.lseek(fd, 0, SEEK_END) 1510 except: 1511 if owned_fd is not None: 1512 os.close(owned_fd) 1513 raise 1514 self._fd = fd 1515 1516 def __del__(self): 1517 if self._fd >= 0 and self._closefd and not self.closed: 1518 import warnings 1519 warnings.warn('unclosed file %r' % (self,), ResourceWarning, 1520 stacklevel=2, source=self) 1521 self.close() 1522 1523 def __getstate__(self): 1524 raise TypeError("cannot serialize '%s' object", self.__class__.__name__) 1525 1526 def __repr__(self): 1527 class_name = '%s.%s' % (self.__class__.__module__, 1528 self.__class__.__qualname__) 1529 if self.closed: 1530 return '<%s [closed]>' % class_name 1531 try: 1532 name = self.name 1533 except AttributeError: 1534 return ('<%s fd=%d mode=%r closefd=%r>' % 1535 (class_name, self._fd, self.mode, self._closefd)) 1536 else: 1537 return ('<%s name=%r mode=%r closefd=%r>' % 1538 (class_name, name, self.mode, self._closefd)) 1539 1540 def _checkReadable(self): 1541 if not self._readable: 1542 raise UnsupportedOperation('File not open for reading') 1543 1544 def _checkWritable(self, msg=None): 1545 if not self._writable: 1546 raise UnsupportedOperation('File not open for writing') 1547 1548 def read(self, size=None): 1549 """Read at most size bytes, returned as bytes. 1550 1551 Only makes one system call, so less data may be returned than requested 1552 In non-blocking mode, returns None if no data is available. 1553 Return an empty bytes object at EOF. 1554 """ 1555 self._checkClosed() 1556 self._checkReadable() 1557 if size is None or size < 0: 1558 return self.readall() 1559 try: 1560 return os.read(self._fd, size) 1561 except BlockingIOError: 1562 return None 1563 1564 def readall(self): 1565 """Read all data from the file, returned as bytes. 1566 1567 In non-blocking mode, returns as much as is immediately available, 1568 or None if no data is available. Return an empty bytes object at EOF. 1569 """ 1570 self._checkClosed() 1571 self._checkReadable() 1572 bufsize = DEFAULT_BUFFER_SIZE 1573 try: 1574 pos = os.lseek(self._fd, 0, SEEK_CUR) 1575 end = os.fstat(self._fd).st_size 1576 if end >= pos: 1577 bufsize = end - pos + 1 1578 except OSError: 1579 pass 1580 1581 result = bytearray() 1582 while True: 1583 if len(result) >= bufsize: 1584 bufsize = len(result) 1585 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE) 1586 n = bufsize - len(result) 1587 try: 1588 chunk = os.read(self._fd, n) 1589 except BlockingIOError: 1590 if result: 1591 break 1592 return None 1593 if not chunk: # reached the end of the file 1594 break 1595 result += chunk 1596 1597 return bytes(result) 1598 1599 def readinto(self, b): 1600 """Same as RawIOBase.readinto().""" 1601 m = memoryview(b).cast('B') 1602 data = self.read(len(m)) 1603 n = len(data) 1604 m[:n] = data 1605 return n 1606 1607 def write(self, b): 1608 """Write bytes b to file, return number written. 1609 1610 Only makes one system call, so not all of the data may be written. 1611 The number of bytes actually written is returned. In non-blocking mode, 1612 returns None if the write would block. 1613 """ 1614 self._checkClosed() 1615 self._checkWritable() 1616 try: 1617 return os.write(self._fd, b) 1618 except BlockingIOError: 1619 return None 1620 1621 def seek(self, pos, whence=SEEK_SET): 1622 """Move to new file position. 1623 1624 Argument offset is a byte count. Optional argument whence defaults to 1625 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values 1626 are SEEK_CUR or 1 (move relative to current position, positive or negative), 1627 and SEEK_END or 2 (move relative to end of file, usually negative, although 1628 many platforms allow seeking beyond the end of a file). 1629 1630 Note that not all file objects are seekable. 1631 """ 1632 if isinstance(pos, float): 1633 raise TypeError('an integer is required') 1634 self._checkClosed() 1635 return os.lseek(self._fd, pos, whence) 1636 1637 def tell(self): 1638 """tell() -> int. Current file position. 1639 1640 Can raise OSError for non seekable files.""" 1641 self._checkClosed() 1642 return os.lseek(self._fd, 0, SEEK_CUR) 1643 1644 def truncate(self, size=None): 1645 """Truncate the file to at most size bytes. 1646 1647 Size defaults to the current file position, as returned by tell(). 1648 The current file position is changed to the value of size. 1649 """ 1650 self._checkClosed() 1651 self._checkWritable() 1652 if size is None: 1653 size = self.tell() 1654 os.ftruncate(self._fd, size) 1655 return size 1656 1657 def close(self): 1658 """Close the file. 1659 1660 A closed file cannot be used for further I/O operations. close() may be 1661 called more than once without error. 1662 """ 1663 if not self.closed: 1664 try: 1665 if self._closefd: 1666 os.close(self._fd) 1667 finally: 1668 super().close() 1669 1670 def seekable(self): 1671 """True if file supports random-access.""" 1672 self._checkClosed() 1673 if self._seekable is None: 1674 try: 1675 self.tell() 1676 except OSError: 1677 self._seekable = False 1678 else: 1679 self._seekable = True 1680 return self._seekable 1681 1682 def readable(self): 1683 """True if file was opened in a read mode.""" 1684 self._checkClosed() 1685 return self._readable 1686 1687 def writable(self): 1688 """True if file was opened in a write mode.""" 1689 self._checkClosed() 1690 return self._writable 1691 1692 def fileno(self): 1693 """Return the underlying file descriptor (an integer).""" 1694 self._checkClosed() 1695 return self._fd 1696 1697 def isatty(self): 1698 """True if the file is connected to a TTY device.""" 1699 self._checkClosed() 1700 return os.isatty(self._fd) 1701 1702 @property 1703 def closefd(self): 1704 """True if the file descriptor will be closed by close().""" 1705 return self._closefd 1706 1707 @property 1708 def mode(self): 1709 """String giving the file mode""" 1710 if self._created: 1711 if self._readable: 1712 return 'xb+' 1713 else: 1714 return 'xb' 1715 elif self._appending: 1716 if self._readable: 1717 return 'ab+' 1718 else: 1719 return 'ab' 1720 elif self._readable: 1721 if self._writable: 1722 return 'rb+' 1723 else: 1724 return 'rb' 1725 else: 1726 return 'wb' 1727 1728 1729class TextIOBase(IOBase): 1730 1731 """Base class for text I/O. 1732 1733 This class provides a character and line based interface to stream 1734 I/O. There is no readinto method because Python's character strings 1735 are immutable. There is no public constructor. 1736 """ 1737 1738 def read(self, size=-1): 1739 """Read at most size characters from stream, where size is an int. 1740 1741 Read from underlying buffer until we have size characters or we hit EOF. 1742 If size is negative or omitted, read until EOF. 1743 1744 Returns a string. 1745 """ 1746 self._unsupported("read") 1747 1748 def write(self, s): 1749 """Write string s to stream and returning an int.""" 1750 self._unsupported("write") 1751 1752 def truncate(self, pos=None): 1753 """Truncate size to pos, where pos is an int.""" 1754 self._unsupported("truncate") 1755 1756 def readline(self): 1757 """Read until newline or EOF. 1758 1759 Returns an empty string if EOF is hit immediately. 1760 """ 1761 self._unsupported("readline") 1762 1763 def detach(self): 1764 """ 1765 Separate the underlying buffer from the TextIOBase and return it. 1766 1767 After the underlying buffer has been detached, the TextIO is in an 1768 unusable state. 1769 """ 1770 self._unsupported("detach") 1771 1772 @property 1773 def encoding(self): 1774 """Subclasses should override.""" 1775 return None 1776 1777 @property 1778 def newlines(self): 1779 """Line endings translated so far. 1780 1781 Only line endings translated during reading are considered. 1782 1783 Subclasses should override. 1784 """ 1785 return None 1786 1787 @property 1788 def errors(self): 1789 """Error setting of the decoder or encoder. 1790 1791 Subclasses should override.""" 1792 return None 1793 1794io.TextIOBase.register(TextIOBase) 1795 1796 1797class IncrementalNewlineDecoder(codecs.IncrementalDecoder): 1798 r"""Codec used when reading a file in universal newlines mode. It wraps 1799 another incremental decoder, translating \r\n and \r into \n. It also 1800 records the types of newlines encountered. When used with 1801 translate=False, it ensures that the newline sequence is returned in 1802 one piece. 1803 """ 1804 def __init__(self, decoder, translate, errors='strict'): 1805 codecs.IncrementalDecoder.__init__(self, errors=errors) 1806 self.translate = translate 1807 self.decoder = decoder 1808 self.seennl = 0 1809 self.pendingcr = False 1810 1811 def decode(self, input, final=False): 1812 # decode input (with the eventual \r from a previous pass) 1813 if self.decoder is None: 1814 output = input 1815 else: 1816 output = self.decoder.decode(input, final=final) 1817 if self.pendingcr and (output or final): 1818 output = "\r" + output 1819 self.pendingcr = False 1820 1821 # retain last \r even when not translating data: 1822 # then readline() is sure to get \r\n in one pass 1823 if output.endswith("\r") and not final: 1824 output = output[:-1] 1825 self.pendingcr = True 1826 1827 # Record which newlines are read 1828 crlf = output.count('\r\n') 1829 cr = output.count('\r') - crlf 1830 lf = output.count('\n') - crlf 1831 self.seennl |= (lf and self._LF) | (cr and self._CR) \ 1832 | (crlf and self._CRLF) 1833 1834 if self.translate: 1835 if crlf: 1836 output = output.replace("\r\n", "\n") 1837 if cr: 1838 output = output.replace("\r", "\n") 1839 1840 return output 1841 1842 def getstate(self): 1843 if self.decoder is None: 1844 buf = b"" 1845 flag = 0 1846 else: 1847 buf, flag = self.decoder.getstate() 1848 flag <<= 1 1849 if self.pendingcr: 1850 flag |= 1 1851 return buf, flag 1852 1853 def setstate(self, state): 1854 buf, flag = state 1855 self.pendingcr = bool(flag & 1) 1856 if self.decoder is not None: 1857 self.decoder.setstate((buf, flag >> 1)) 1858 1859 def reset(self): 1860 self.seennl = 0 1861 self.pendingcr = False 1862 if self.decoder is not None: 1863 self.decoder.reset() 1864 1865 _LF = 1 1866 _CR = 2 1867 _CRLF = 4 1868 1869 @property 1870 def newlines(self): 1871 return (None, 1872 "\n", 1873 "\r", 1874 ("\r", "\n"), 1875 "\r\n", 1876 ("\n", "\r\n"), 1877 ("\r", "\r\n"), 1878 ("\r", "\n", "\r\n") 1879 )[self.seennl] 1880 1881 1882class TextIOWrapper(TextIOBase): 1883 1884 r"""Character and line based layer over a BufferedIOBase object, buffer. 1885 1886 encoding gives the name of the encoding that the stream will be 1887 decoded or encoded with. It defaults to locale.getpreferredencoding(False). 1888 1889 errors determines the strictness of encoding and decoding (see the 1890 codecs.register) and defaults to "strict". 1891 1892 newline can be None, '', '\n', '\r', or '\r\n'. It controls the 1893 handling of line endings. If it is None, universal newlines is 1894 enabled. With this enabled, on input, the lines endings '\n', '\r', 1895 or '\r\n' are translated to '\n' before being returned to the 1896 caller. Conversely, on output, '\n' is translated to the system 1897 default line separator, os.linesep. If newline is any other of its 1898 legal values, that newline becomes the newline when the file is read 1899 and it is returned untranslated. On output, '\n' is converted to the 1900 newline. 1901 1902 If line_buffering is True, a call to flush is implied when a call to 1903 write contains a newline character. 1904 """ 1905 1906 _CHUNK_SIZE = 2048 1907 1908 # The write_through argument has no effect here since this 1909 # implementation always writes through. The argument is present only 1910 # so that the signature can match the signature of the C version. 1911 def __init__(self, buffer, encoding=None, errors=None, newline=None, 1912 line_buffering=False, write_through=False): 1913 if newline is not None and not isinstance(newline, str): 1914 raise TypeError("illegal newline type: %r" % (type(newline),)) 1915 if newline not in (None, "", "\n", "\r", "\r\n"): 1916 raise ValueError("illegal newline value: %r" % (newline,)) 1917 if encoding is None: 1918 try: 1919 encoding = os.device_encoding(buffer.fileno()) 1920 except (AttributeError, UnsupportedOperation): 1921 pass 1922 if encoding is None: 1923 try: 1924 import locale 1925 except ImportError: 1926 # Importing locale may fail if Python is being built 1927 encoding = "ascii" 1928 else: 1929 encoding = locale.getpreferredencoding(False) 1930 1931 if not isinstance(encoding, str): 1932 raise ValueError("invalid encoding: %r" % encoding) 1933 1934 if not codecs.lookup(encoding)._is_text_encoding: 1935 msg = ("%r is not a text encoding; " 1936 "use codecs.open() to handle arbitrary codecs") 1937 raise LookupError(msg % encoding) 1938 1939 if errors is None: 1940 errors = "strict" 1941 else: 1942 if not isinstance(errors, str): 1943 raise ValueError("invalid errors: %r" % errors) 1944 1945 self._buffer = buffer 1946 self._line_buffering = line_buffering 1947 self._encoding = encoding 1948 self._errors = errors 1949 self._readuniversal = not newline 1950 self._readtranslate = newline is None 1951 self._readnl = newline 1952 self._writetranslate = newline != '' 1953 self._writenl = newline or os.linesep 1954 self._encoder = None 1955 self._decoder = None 1956 self._decoded_chars = '' # buffer for text returned from decoder 1957 self._decoded_chars_used = 0 # offset into _decoded_chars for read() 1958 self._snapshot = None # info for reconstructing decoder state 1959 self._seekable = self._telling = self.buffer.seekable() 1960 self._has_read1 = hasattr(self.buffer, 'read1') 1961 self._b2cratio = 0.0 1962 1963 if self._seekable and self.writable(): 1964 position = self.buffer.tell() 1965 if position != 0: 1966 try: 1967 self._get_encoder().setstate(0) 1968 except LookupError: 1969 # Sometimes the encoder doesn't exist 1970 pass 1971 1972 # self._snapshot is either None, or a tuple (dec_flags, next_input) 1973 # where dec_flags is the second (integer) item of the decoder state 1974 # and next_input is the chunk of input bytes that comes next after the 1975 # snapshot point. We use this to reconstruct decoder states in tell(). 1976 1977 # Naming convention: 1978 # - "bytes_..." for integer variables that count input bytes 1979 # - "chars_..." for integer variables that count decoded characters 1980 1981 def __repr__(self): 1982 result = "<{}.{}".format(self.__class__.__module__, 1983 self.__class__.__qualname__) 1984 try: 1985 name = self.name 1986 except Exception: 1987 pass 1988 else: 1989 result += " name={0!r}".format(name) 1990 try: 1991 mode = self.mode 1992 except Exception: 1993 pass 1994 else: 1995 result += " mode={0!r}".format(mode) 1996 return result + " encoding={0!r}>".format(self.encoding) 1997 1998 @property 1999 def encoding(self): 2000 return self._encoding 2001 2002 @property 2003 def errors(self): 2004 return self._errors 2005 2006 @property 2007 def line_buffering(self): 2008 return self._line_buffering 2009 2010 @property 2011 def buffer(self): 2012 return self._buffer 2013 2014 def seekable(self): 2015 if self.closed: 2016 raise ValueError("I/O operation on closed file.") 2017 return self._seekable 2018 2019 def readable(self): 2020 return self.buffer.readable() 2021 2022 def writable(self): 2023 return self.buffer.writable() 2024 2025 def flush(self): 2026 self.buffer.flush() 2027 self._telling = self._seekable 2028 2029 def close(self): 2030 if self.buffer is not None and not self.closed: 2031 try: 2032 self.flush() 2033 finally: 2034 self.buffer.close() 2035 2036 @property 2037 def closed(self): 2038 return self.buffer.closed 2039 2040 @property 2041 def name(self): 2042 return self.buffer.name 2043 2044 def fileno(self): 2045 return self.buffer.fileno() 2046 2047 def isatty(self): 2048 return self.buffer.isatty() 2049 2050 def write(self, s): 2051 'Write data, where s is a str' 2052 if self.closed: 2053 raise ValueError("write to closed file") 2054 if not isinstance(s, str): 2055 raise TypeError("can't write %s to text stream" % 2056 s.__class__.__name__) 2057 length = len(s) 2058 haslf = (self._writetranslate or self._line_buffering) and "\n" in s 2059 if haslf and self._writetranslate and self._writenl != "\n": 2060 s = s.replace("\n", self._writenl) 2061 encoder = self._encoder or self._get_encoder() 2062 # XXX What if we were just reading? 2063 b = encoder.encode(s) 2064 self.buffer.write(b) 2065 if self._line_buffering and (haslf or "\r" in s): 2066 self.flush() 2067 self._snapshot = None 2068 if self._decoder: 2069 self._decoder.reset() 2070 return length 2071 2072 def _get_encoder(self): 2073 make_encoder = codecs.getincrementalencoder(self._encoding) 2074 self._encoder = make_encoder(self._errors) 2075 return self._encoder 2076 2077 def _get_decoder(self): 2078 make_decoder = codecs.getincrementaldecoder(self._encoding) 2079 decoder = make_decoder(self._errors) 2080 if self._readuniversal: 2081 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) 2082 self._decoder = decoder 2083 return decoder 2084 2085 # The following three methods implement an ADT for _decoded_chars. 2086 # Text returned from the decoder is buffered here until the client 2087 # requests it by calling our read() or readline() method. 2088 def _set_decoded_chars(self, chars): 2089 """Set the _decoded_chars buffer.""" 2090 self._decoded_chars = chars 2091 self._decoded_chars_used = 0 2092 2093 def _get_decoded_chars(self, n=None): 2094 """Advance into the _decoded_chars buffer.""" 2095 offset = self._decoded_chars_used 2096 if n is None: 2097 chars = self._decoded_chars[offset:] 2098 else: 2099 chars = self._decoded_chars[offset:offset + n] 2100 self._decoded_chars_used += len(chars) 2101 return chars 2102 2103 def _rewind_decoded_chars(self, n): 2104 """Rewind the _decoded_chars buffer.""" 2105 if self._decoded_chars_used < n: 2106 raise AssertionError("rewind decoded_chars out of bounds") 2107 self._decoded_chars_used -= n 2108 2109 def _read_chunk(self): 2110 """ 2111 Read and decode the next chunk of data from the BufferedReader. 2112 """ 2113 2114 # The return value is True unless EOF was reached. The decoded 2115 # string is placed in self._decoded_chars (replacing its previous 2116 # value). The entire input chunk is sent to the decoder, though 2117 # some of it may remain buffered in the decoder, yet to be 2118 # converted. 2119 2120 if self._decoder is None: 2121 raise ValueError("no decoder") 2122 2123 if self._telling: 2124 # To prepare for tell(), we need to snapshot a point in the 2125 # file where the decoder's input buffer is empty. 2126 2127 dec_buffer, dec_flags = self._decoder.getstate() 2128 # Given this, we know there was a valid snapshot point 2129 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). 2130 2131 # Read a chunk, decode it, and put the result in self._decoded_chars. 2132 if self._has_read1: 2133 input_chunk = self.buffer.read1(self._CHUNK_SIZE) 2134 else: 2135 input_chunk = self.buffer.read(self._CHUNK_SIZE) 2136 eof = not input_chunk 2137 decoded_chars = self._decoder.decode(input_chunk, eof) 2138 self._set_decoded_chars(decoded_chars) 2139 if decoded_chars: 2140 self._b2cratio = len(input_chunk) / len(self._decoded_chars) 2141 else: 2142 self._b2cratio = 0.0 2143 2144 if self._telling: 2145 # At the snapshot point, len(dec_buffer) bytes before the read, 2146 # the next input to be decoded is dec_buffer + input_chunk. 2147 self._snapshot = (dec_flags, dec_buffer + input_chunk) 2148 2149 return not eof 2150 2151 def _pack_cookie(self, position, dec_flags=0, 2152 bytes_to_feed=0, need_eof=0, chars_to_skip=0): 2153 # The meaning of a tell() cookie is: seek to position, set the 2154 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them 2155 # into the decoder with need_eof as the EOF flag, then skip 2156 # chars_to_skip characters of the decoded result. For most simple 2157 # decoders, tell() will often just give a byte offset in the file. 2158 return (position | (dec_flags<<64) | (bytes_to_feed<<128) | 2159 (chars_to_skip<<192) | bool(need_eof)<<256) 2160 2161 def _unpack_cookie(self, bigint): 2162 rest, position = divmod(bigint, 1<<64) 2163 rest, dec_flags = divmod(rest, 1<<64) 2164 rest, bytes_to_feed = divmod(rest, 1<<64) 2165 need_eof, chars_to_skip = divmod(rest, 1<<64) 2166 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip 2167 2168 def tell(self): 2169 if not self._seekable: 2170 raise UnsupportedOperation("underlying stream is not seekable") 2171 if not self._telling: 2172 raise OSError("telling position disabled by next() call") 2173 self.flush() 2174 position = self.buffer.tell() 2175 decoder = self._decoder 2176 if decoder is None or self._snapshot is None: 2177 if self._decoded_chars: 2178 # This should never happen. 2179 raise AssertionError("pending decoded text") 2180 return position 2181 2182 # Skip backward to the snapshot point (see _read_chunk). 2183 dec_flags, next_input = self._snapshot 2184 position -= len(next_input) 2185 2186 # How many decoded characters have been used up since the snapshot? 2187 chars_to_skip = self._decoded_chars_used 2188 if chars_to_skip == 0: 2189 # We haven't moved from the snapshot point. 2190 return self._pack_cookie(position, dec_flags) 2191 2192 # Starting from the snapshot position, we will walk the decoder 2193 # forward until it gives us enough decoded characters. 2194 saved_state = decoder.getstate() 2195 try: 2196 # Fast search for an acceptable start point, close to our 2197 # current pos. 2198 # Rationale: calling decoder.decode() has a large overhead 2199 # regardless of chunk size; we want the number of such calls to 2200 # be O(1) in most situations (common decoders, non-crazy input). 2201 # Actually, it will be exactly 1 for fixed-size codecs (all 2202 # 8-bit codecs, also UTF-16 and UTF-32). 2203 skip_bytes = int(self._b2cratio * chars_to_skip) 2204 skip_back = 1 2205 assert skip_bytes <= len(next_input) 2206 while skip_bytes > 0: 2207 decoder.setstate((b'', dec_flags)) 2208 # Decode up to temptative start point 2209 n = len(decoder.decode(next_input[:skip_bytes])) 2210 if n <= chars_to_skip: 2211 b, d = decoder.getstate() 2212 if not b: 2213 # Before pos and no bytes buffered in decoder => OK 2214 dec_flags = d 2215 chars_to_skip -= n 2216 break 2217 # Skip back by buffered amount and reset heuristic 2218 skip_bytes -= len(b) 2219 skip_back = 1 2220 else: 2221 # We're too far ahead, skip back a bit 2222 skip_bytes -= skip_back 2223 skip_back = skip_back * 2 2224 else: 2225 skip_bytes = 0 2226 decoder.setstate((b'', dec_flags)) 2227 2228 # Note our initial start point. 2229 start_pos = position + skip_bytes 2230 start_flags = dec_flags 2231 if chars_to_skip == 0: 2232 # We haven't moved from the start point. 2233 return self._pack_cookie(start_pos, start_flags) 2234 2235 # Feed the decoder one byte at a time. As we go, note the 2236 # nearest "safe start point" before the current location 2237 # (a point where the decoder has nothing buffered, so seek() 2238 # can safely start from there and advance to this location). 2239 bytes_fed = 0 2240 need_eof = 0 2241 # Chars decoded since `start_pos` 2242 chars_decoded = 0 2243 for i in range(skip_bytes, len(next_input)): 2244 bytes_fed += 1 2245 chars_decoded += len(decoder.decode(next_input[i:i+1])) 2246 dec_buffer, dec_flags = decoder.getstate() 2247 if not dec_buffer and chars_decoded <= chars_to_skip: 2248 # Decoder buffer is empty, so this is a safe start point. 2249 start_pos += bytes_fed 2250 chars_to_skip -= chars_decoded 2251 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 2252 if chars_decoded >= chars_to_skip: 2253 break 2254 else: 2255 # We didn't get enough decoded data; signal EOF to get more. 2256 chars_decoded += len(decoder.decode(b'', final=True)) 2257 need_eof = 1 2258 if chars_decoded < chars_to_skip: 2259 raise OSError("can't reconstruct logical file position") 2260 2261 # The returned cookie corresponds to the last safe start point. 2262 return self._pack_cookie( 2263 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) 2264 finally: 2265 decoder.setstate(saved_state) 2266 2267 def truncate(self, pos=None): 2268 self.flush() 2269 if pos is None: 2270 pos = self.tell() 2271 return self.buffer.truncate(pos) 2272 2273 def detach(self): 2274 if self.buffer is None: 2275 raise ValueError("buffer is already detached") 2276 self.flush() 2277 buffer = self._buffer 2278 self._buffer = None 2279 return buffer 2280 2281 def seek(self, cookie, whence=0): 2282 def _reset_encoder(position): 2283 """Reset the encoder (merely useful for proper BOM handling)""" 2284 try: 2285 encoder = self._encoder or self._get_encoder() 2286 except LookupError: 2287 # Sometimes the encoder doesn't exist 2288 pass 2289 else: 2290 if position != 0: 2291 encoder.setstate(0) 2292 else: 2293 encoder.reset() 2294 2295 if self.closed: 2296 raise ValueError("tell on closed file") 2297 if not self._seekable: 2298 raise UnsupportedOperation("underlying stream is not seekable") 2299 if whence == 1: # seek relative to current position 2300 if cookie != 0: 2301 raise UnsupportedOperation("can't do nonzero cur-relative seeks") 2302 # Seeking to the current position should attempt to 2303 # sync the underlying buffer with the current position. 2304 whence = 0 2305 cookie = self.tell() 2306 if whence == 2: # seek relative to end of file 2307 if cookie != 0: 2308 raise UnsupportedOperation("can't do nonzero end-relative seeks") 2309 self.flush() 2310 position = self.buffer.seek(0, 2) 2311 self._set_decoded_chars('') 2312 self._snapshot = None 2313 if self._decoder: 2314 self._decoder.reset() 2315 _reset_encoder(position) 2316 return position 2317 if whence != 0: 2318 raise ValueError("unsupported whence (%r)" % (whence,)) 2319 if cookie < 0: 2320 raise ValueError("negative seek position %r" % (cookie,)) 2321 self.flush() 2322 2323 # The strategy of seek() is to go back to the safe start point 2324 # and replay the effect of read(chars_to_skip) from there. 2325 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ 2326 self._unpack_cookie(cookie) 2327 2328 # Seek back to the safe start point. 2329 self.buffer.seek(start_pos) 2330 self._set_decoded_chars('') 2331 self._snapshot = None 2332 2333 # Restore the decoder to its state from the safe start point. 2334 if cookie == 0 and self._decoder: 2335 self._decoder.reset() 2336 elif self._decoder or dec_flags or chars_to_skip: 2337 self._decoder = self._decoder or self._get_decoder() 2338 self._decoder.setstate((b'', dec_flags)) 2339 self._snapshot = (dec_flags, b'') 2340 2341 if chars_to_skip: 2342 # Just like _read_chunk, feed the decoder and save a snapshot. 2343 input_chunk = self.buffer.read(bytes_to_feed) 2344 self._set_decoded_chars( 2345 self._decoder.decode(input_chunk, need_eof)) 2346 self._snapshot = (dec_flags, input_chunk) 2347 2348 # Skip chars_to_skip of the decoded characters. 2349 if len(self._decoded_chars) < chars_to_skip: 2350 raise OSError("can't restore logical file position") 2351 self._decoded_chars_used = chars_to_skip 2352 2353 _reset_encoder(cookie) 2354 return cookie 2355 2356 def read(self, size=None): 2357 self._checkReadable() 2358 if size is None: 2359 size = -1 2360 decoder = self._decoder or self._get_decoder() 2361 try: 2362 size.__index__ 2363 except AttributeError as err: 2364 raise TypeError("an integer is required") from err 2365 if size < 0: 2366 # Read everything. 2367 result = (self._get_decoded_chars() + 2368 decoder.decode(self.buffer.read(), final=True)) 2369 self._set_decoded_chars('') 2370 self._snapshot = None 2371 return result 2372 else: 2373 # Keep reading chunks until we have size characters to return. 2374 eof = False 2375 result = self._get_decoded_chars(size) 2376 while len(result) < size and not eof: 2377 eof = not self._read_chunk() 2378 result += self._get_decoded_chars(size - len(result)) 2379 return result 2380 2381 def __next__(self): 2382 self._telling = False 2383 line = self.readline() 2384 if not line: 2385 self._snapshot = None 2386 self._telling = self._seekable 2387 raise StopIteration 2388 return line 2389 2390 def readline(self, size=None): 2391 if self.closed: 2392 raise ValueError("read from closed file") 2393 if size is None: 2394 size = -1 2395 elif not isinstance(size, int): 2396 raise TypeError("size must be an integer") 2397 2398 # Grab all the decoded text (we will rewind any extra bits later). 2399 line = self._get_decoded_chars() 2400 2401 start = 0 2402 # Make the decoder if it doesn't already exist. 2403 if not self._decoder: 2404 self._get_decoder() 2405 2406 pos = endpos = None 2407 while True: 2408 if self._readtranslate: 2409 # Newlines are already translated, only search for \n 2410 pos = line.find('\n', start) 2411 if pos >= 0: 2412 endpos = pos + 1 2413 break 2414 else: 2415 start = len(line) 2416 2417 elif self._readuniversal: 2418 # Universal newline search. Find any of \r, \r\n, \n 2419 # The decoder ensures that \r\n are not split in two pieces 2420 2421 # In C we'd look for these in parallel of course. 2422 nlpos = line.find("\n", start) 2423 crpos = line.find("\r", start) 2424 if crpos == -1: 2425 if nlpos == -1: 2426 # Nothing found 2427 start = len(line) 2428 else: 2429 # Found \n 2430 endpos = nlpos + 1 2431 break 2432 elif nlpos == -1: 2433 # Found lone \r 2434 endpos = crpos + 1 2435 break 2436 elif nlpos < crpos: 2437 # Found \n 2438 endpos = nlpos + 1 2439 break 2440 elif nlpos == crpos + 1: 2441 # Found \r\n 2442 endpos = crpos + 2 2443 break 2444 else: 2445 # Found \r 2446 endpos = crpos + 1 2447 break 2448 else: 2449 # non-universal 2450 pos = line.find(self._readnl) 2451 if pos >= 0: 2452 endpos = pos + len(self._readnl) 2453 break 2454 2455 if size >= 0 and len(line) >= size: 2456 endpos = size # reached length size 2457 break 2458 2459 # No line ending seen yet - get more data' 2460 while self._read_chunk(): 2461 if self._decoded_chars: 2462 break 2463 if self._decoded_chars: 2464 line += self._get_decoded_chars() 2465 else: 2466 # end of file 2467 self._set_decoded_chars('') 2468 self._snapshot = None 2469 return line 2470 2471 if size >= 0 and endpos > size: 2472 endpos = size # don't exceed size 2473 2474 # Rewind _decoded_chars to just after the line ending we found. 2475 self._rewind_decoded_chars(len(line) - endpos) 2476 return line[:endpos] 2477 2478 @property 2479 def newlines(self): 2480 return self._decoder.newlines if self._decoder else None 2481 2482 2483class StringIO(TextIOWrapper): 2484 """Text I/O implementation using an in-memory buffer. 2485 2486 The initial_value argument sets the value of object. The newline 2487 argument is like the one of TextIOWrapper's constructor. 2488 """ 2489 2490 def __init__(self, initial_value="", newline="\n"): 2491 super(StringIO, self).__init__(BytesIO(), 2492 encoding="utf-8", 2493 errors="surrogatepass", 2494 newline=newline) 2495 # Issue #5645: make universal newlines semantics the same as in the 2496 # C version, even under Windows. 2497 if newline is None: 2498 self._writetranslate = False 2499 if initial_value is not None: 2500 if not isinstance(initial_value, str): 2501 raise TypeError("initial_value must be str or None, not {0}" 2502 .format(type(initial_value).__name__)) 2503 self.write(initial_value) 2504 self.seek(0) 2505 2506 def getvalue(self): 2507 self.flush() 2508 decoder = self._decoder or self._get_decoder() 2509 old_state = decoder.getstate() 2510 decoder.reset() 2511 try: 2512 return decoder.decode(self.buffer.getvalue(), final=True) 2513 finally: 2514 decoder.setstate(old_state) 2515 2516 def __repr__(self): 2517 # TextIOWrapper tells the encoding in its repr. In StringIO, 2518 # that's an implementation detail. 2519 return object.__repr__(self) 2520 2521 @property 2522 def errors(self): 2523 return None 2524 2525 @property 2526 def encoding(self): 2527 return None 2528 2529 def detach(self): 2530 # This doesn't make sense on StringIO. 2531 self._unsupported("detach") 2532