1""" 2Python implementation of the io module. 3""" 4 5from __future__ import (print_function, unicode_literals) 6 7import os 8import abc 9import codecs 10import warnings 11# Import thread instead of threading to reduce startup cost 12try: 13 from thread import allocate_lock as Lock 14except ImportError: 15 from dummy_thread import allocate_lock as Lock 16 17import io 18from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END) 19from errno import EINTR 20 21__metaclass__ = type 22 23# open() uses st_blksize whenever we can 24DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes 25 26# NOTE: Base classes defined here are registered with the "official" ABCs 27# defined in io.py. We don't use real inheritance though, because we don't 28# want to inherit the C implementations. 29 30 31class BlockingIOError(IOError): 32 33 """Exception raised when I/O would block on a non-blocking I/O stream.""" 34 35 def __init__(self, errno, strerror, characters_written=0): 36 super(IOError, self).__init__(errno, strerror) 37 if not isinstance(characters_written, (int, long)): 38 raise TypeError("characters_written must be a integer") 39 self.characters_written = characters_written 40 41 42def open(file, mode="r", buffering=-1, 43 encoding=None, errors=None, 44 newline=None, closefd=True): 45 46 r"""Open file and return a stream. Raise IOError upon failure. 47 48 file is either a text or byte string giving the name (and the path 49 if the file isn't in the current working directory) of the file to 50 be opened or an integer file descriptor of the file to be 51 wrapped. (If a file descriptor is given, it is closed when the 52 returned I/O object is closed, unless closefd is set to False.) 53 54 mode is an optional string that specifies the mode in which the file 55 is opened. It defaults to 'r' which means open for reading in text 56 mode. Other common values are 'w' for writing (truncating the file if 57 it already exists), and 'a' for appending (which on some Unix systems, 58 means that all writes append to the end of the file regardless of the 59 current seek position). In text mode, if encoding is not specified the 60 encoding used is platform dependent. (For reading and writing raw 61 bytes use binary mode and leave encoding unspecified.) The available 62 modes are: 63 64 ========= =============================================================== 65 Character Meaning 66 --------- --------------------------------------------------------------- 67 'r' open for reading (default) 68 'w' open for writing, truncating the file first 69 'a' open for writing, appending to the end of the file if it exists 70 'b' binary mode 71 't' text mode (default) 72 '+' open a disk file for updating (reading and writing) 73 'U' universal newline mode (for backwards compatibility; unneeded 74 for new code) 75 ========= =============================================================== 76 77 The default mode is 'rt' (open for reading text). For binary random 78 access, the mode 'w+b' opens and truncates the file to 0 bytes, while 79 'r+b' opens the file without truncation. 80 81 Python distinguishes between files opened in binary and text modes, 82 even when the underlying operating system doesn't. Files opened in 83 binary mode (appending 'b' to the mode argument) return contents as 84 bytes objects without any decoding. In text mode (the default, or when 85 't' is appended to the mode argument), the contents of the file are 86 returned as strings, the bytes having been first decoded using a 87 platform-dependent encoding or using the specified encoding if given. 88 89 buffering is an optional integer used to set the buffering policy. 90 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select 91 line buffering (only usable in text mode), and an integer > 1 to indicate 92 the size of a fixed-size chunk buffer. When no buffering argument is 93 given, the default buffering policy works as follows: 94 95 * Binary files are buffered in fixed-size chunks; the size of the buffer 96 is chosen using a heuristic trying to determine the underlying device's 97 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. 98 On many systems, the buffer will typically be 4096 or 8192 bytes long. 99 100 * "Interactive" text files (files for which isatty() returns True) 101 use line buffering. Other text files use the policy described above 102 for binary files. 103 104 encoding is the name of the encoding used to decode or encode the 105 file. This should only be used in text mode. The default encoding is 106 platform dependent, but any encoding supported by Python can be 107 passed. See the codecs module for the list of supported encodings. 108 109 errors is an optional string that specifies how encoding errors are to 110 be handled---this argument should not be used in binary mode. Pass 111 'strict' to raise a ValueError exception if there is an encoding error 112 (the default of None has the same effect), or pass 'ignore' to ignore 113 errors. (Note that ignoring encoding errors can lead to data loss.) 114 See the documentation for codecs.register for a list of the permitted 115 encoding error strings. 116 117 newline controls how universal newlines works (it only applies to text 118 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as 119 follows: 120 121 * On input, if newline is None, universal newlines mode is 122 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and 123 these are translated into '\n' before being returned to the 124 caller. If it is '', universal newline mode is enabled, but line 125 endings are returned to the caller untranslated. If it has any of 126 the other legal values, input lines are only terminated by the given 127 string, and the line ending is returned to the caller untranslated. 128 129 * On output, if newline is None, any '\n' characters written are 130 translated to the system default line separator, os.linesep. If 131 newline is '', no translation takes place. If newline is any of the 132 other legal values, any '\n' characters written are translated to 133 the given string. 134 135 If closefd is False, the underlying file descriptor will be kept open 136 when the file is closed. This does not work when a file name is given 137 and must be True in that case. 138 139 open() returns a file object whose type depends on the mode, and 140 through which the standard file operations such as reading and writing 141 are performed. When open() is used to open a file in a text mode ('w', 142 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open 143 a file in a binary mode, the returned class varies: in read binary 144 mode, it returns a BufferedReader; in write binary and append binary 145 modes, it returns a BufferedWriter, and in read/write mode, it returns 146 a BufferedRandom. 147 148 It is also possible to use a string or bytearray as a file for both 149 reading and writing. For strings StringIO can be used like a file 150 opened in a text mode, and for bytes a BytesIO can be used like a file 151 opened in a binary mode. 152 """ 153 if not isinstance(file, (basestring, int, long)): 154 raise TypeError("invalid file: %r" % file) 155 if not isinstance(mode, basestring): 156 raise TypeError("invalid mode: %r" % mode) 157 if not isinstance(buffering, (int, long)): 158 raise TypeError("invalid buffering: %r" % buffering) 159 if encoding is not None and not isinstance(encoding, basestring): 160 raise TypeError("invalid encoding: %r" % encoding) 161 if errors is not None and not isinstance(errors, basestring): 162 raise TypeError("invalid errors: %r" % errors) 163 modes = set(mode) 164 if modes - set("arwb+tU") or len(mode) > len(modes): 165 raise ValueError("invalid mode: %r" % mode) 166 reading = "r" in modes 167 writing = "w" in modes 168 appending = "a" in modes 169 updating = "+" in modes 170 text = "t" in modes 171 binary = "b" in modes 172 if "U" in modes: 173 if writing or appending: 174 raise ValueError("can't use U and writing mode at once") 175 reading = True 176 if text and binary: 177 raise ValueError("can't have text and binary mode at once") 178 if reading + writing + appending > 1: 179 raise ValueError("can't have read/write/append mode at once") 180 if not (reading or writing or appending): 181 raise ValueError("must have exactly one of read/write/append mode") 182 if binary and encoding is not None: 183 raise ValueError("binary mode doesn't take an encoding argument") 184 if binary and errors is not None: 185 raise ValueError("binary mode doesn't take an errors argument") 186 if binary and newline is not None: 187 raise ValueError("binary mode doesn't take a newline argument") 188 raw = FileIO(file, 189 (reading and "r" or "") + 190 (writing and "w" or "") + 191 (appending and "a" or "") + 192 (updating and "+" or ""), 193 closefd) 194 line_buffering = False 195 if buffering == 1 or buffering < 0 and raw.isatty(): 196 buffering = -1 197 line_buffering = True 198 if buffering < 0: 199 buffering = DEFAULT_BUFFER_SIZE 200 try: 201 bs = os.fstat(raw.fileno()).st_blksize 202 except (os.error, AttributeError): 203 pass 204 else: 205 if bs > 1: 206 buffering = bs 207 if buffering < 0: 208 raise ValueError("invalid buffering size") 209 if buffering == 0: 210 if binary: 211 return raw 212 raise ValueError("can't have unbuffered text I/O") 213 if updating: 214 buffer = BufferedRandom(raw, buffering) 215 elif writing or appending: 216 buffer = BufferedWriter(raw, buffering) 217 elif reading: 218 buffer = BufferedReader(raw, buffering) 219 else: 220 raise ValueError("unknown mode: %r" % mode) 221 if binary: 222 return buffer 223 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) 224 text.mode = mode 225 return text 226 227 228class DocDescriptor: 229 """Helper for builtins.open.__doc__ 230 """ 231 def __get__(self, obj, typ): 232 return ( 233 "open(file, mode='r', buffering=-1, encoding=None, " 234 "errors=None, newline=None, closefd=True)\n\n" + 235 open.__doc__) 236 237class OpenWrapper: 238 """Wrapper for builtins.open 239 240 Trick so that open won't become a bound method when stored 241 as a class variable (as dbm.dumb does). 242 243 See initstdio() in Python/pythonrun.c. 244 """ 245 __doc__ = DocDescriptor() 246 247 def __new__(cls, *args, **kwargs): 248 return open(*args, **kwargs) 249 250 251class UnsupportedOperation(ValueError, IOError): 252 pass 253 254 255class IOBase: 256 __metaclass__ = abc.ABCMeta 257 258 """The abstract base class for all I/O classes, acting on streams of 259 bytes. There is no public constructor. 260 261 This class provides dummy implementations for many methods that 262 derived classes can override selectively; the default implementations 263 represent a file that cannot be read, written or seeked. 264 265 Even though IOBase does not declare read, readinto, or write because 266 their signatures will vary, implementations and clients should 267 consider those methods part of the interface. Also, implementations 268 may raise a IOError when operations they do not support are called. 269 270 The basic type used for binary data read from or written to a file is 271 bytes. bytearrays are accepted too, and in some cases (such as 272 readinto) needed. Text I/O classes work with str data. 273 274 Note that calling any method (even inquiries) on a closed stream is 275 undefined. Implementations may raise IOError in this case. 276 277 IOBase (and its subclasses) support the iterator protocol, meaning 278 that an IOBase object can be iterated over yielding the lines in a 279 stream. 280 281 IOBase also supports the :keyword:`with` statement. In this example, 282 fp is closed after the suite of the with statement is complete: 283 284 with open('spam.txt', 'r') as fp: 285 fp.write('Spam and eggs!') 286 """ 287 288 ### Internal ### 289 290 def _unsupported(self, name): 291 """Internal: raise an exception for unsupported operations.""" 292 raise UnsupportedOperation("%s.%s() not supported" % 293 (self.__class__.__name__, name)) 294 295 ### Positioning ### 296 297 def seek(self, pos, whence=0): 298 """Change stream position. 299 300 Change the stream position to byte offset offset. offset is 301 interpreted relative to the position indicated by whence. Values 302 for whence are: 303 304 * 0 -- start of stream (the default); offset should be zero or positive 305 * 1 -- current stream position; offset may be negative 306 * 2 -- end of stream; offset is usually negative 307 308 Return the new absolute position. 309 """ 310 self._unsupported("seek") 311 312 def tell(self): 313 """Return current stream position.""" 314 return self.seek(0, 1) 315 316 def truncate(self, pos=None): 317 """Truncate file to size bytes. 318 319 Size defaults to the current IO position as reported by tell(). Return 320 the new size. 321 """ 322 self._unsupported("truncate") 323 324 ### Flush and close ### 325 326 def flush(self): 327 """Flush write buffers, if applicable. 328 329 This is not implemented for read-only and non-blocking streams. 330 """ 331 self._checkClosed() 332 # XXX Should this return the number of bytes written??? 333 334 __closed = False 335 336 def close(self): 337 """Flush and close the IO object. 338 339 This method has no effect if the file is already closed. 340 """ 341 if not self.__closed: 342 self.flush() 343 self.__closed = True 344 345 def __del__(self): 346 """Destructor. Calls close().""" 347 # The try/except block is in case this is called at program 348 # exit time, when it's possible that globals have already been 349 # deleted, and then the close() call might fail. Since 350 # there's nothing we can do about such failures and they annoy 351 # the end users, we suppress the traceback. 352 try: 353 self.close() 354 except: 355 pass 356 357 ### Inquiries ### 358 359 def seekable(self): 360 """Return whether object supports random access. 361 362 If False, seek(), tell() and truncate() will raise IOError. 363 This method may need to do a test seek(). 364 """ 365 return False 366 367 def _checkSeekable(self, msg=None): 368 """Internal: raise an IOError if file is not seekable 369 """ 370 if not self.seekable(): 371 raise IOError("File or stream is not seekable." 372 if msg is None else msg) 373 374 375 def readable(self): 376 """Return whether object was opened for reading. 377 378 If False, read() will raise IOError. 379 """ 380 return False 381 382 def _checkReadable(self, msg=None): 383 """Internal: raise an IOError if file is not readable 384 """ 385 if not self.readable(): 386 raise IOError("File or stream is not readable." 387 if msg is None else msg) 388 389 def writable(self): 390 """Return whether object was opened for writing. 391 392 If False, write() and truncate() will raise IOError. 393 """ 394 return False 395 396 def _checkWritable(self, msg=None): 397 """Internal: raise an IOError if file is not writable 398 """ 399 if not self.writable(): 400 raise IOError("File or stream is not writable." 401 if msg is None else msg) 402 403 @property 404 def closed(self): 405 """closed: bool. True iff the file has been closed. 406 407 For backwards compatibility, this is a property, not a predicate. 408 """ 409 return self.__closed 410 411 def _checkClosed(self, msg=None): 412 """Internal: raise an ValueError if file is closed 413 """ 414 if self.closed: 415 raise ValueError("I/O operation on closed file." 416 if msg is None else msg) 417 418 ### Context manager ### 419 420 def __enter__(self): 421 """Context management protocol. Returns self.""" 422 self._checkClosed() 423 return self 424 425 def __exit__(self, *args): 426 """Context management protocol. Calls close()""" 427 self.close() 428 429 ### Lower-level APIs ### 430 431 # XXX Should these be present even if unimplemented? 432 433 def fileno(self): 434 """Returns underlying file descriptor if one exists. 435 436 An IOError is raised if the IO object does not use a file descriptor. 437 """ 438 self._unsupported("fileno") 439 440 def isatty(self): 441 """Return whether this is an 'interactive' stream. 442 443 Return False if it can't be determined. 444 """ 445 self._checkClosed() 446 return False 447 448 ### Readline[s] and writelines ### 449 450 def readline(self, limit=-1): 451 r"""Read and return a line from the stream. 452 453 If limit is specified, at most limit bytes will be read. 454 455 The line terminator is always b'\n' for binary files; for text 456 files, the newlines argument to open can be used to select the line 457 terminator(s) recognized. 458 """ 459 # For backwards compatibility, a (slowish) readline(). 460 if hasattr(self, "peek"): 461 def nreadahead(): 462 readahead = self.peek(1) 463 if not readahead: 464 return 1 465 n = (readahead.find(b"\n") + 1) or len(readahead) 466 if limit >= 0: 467 n = min(n, limit) 468 return n 469 else: 470 def nreadahead(): 471 return 1 472 if limit is None: 473 limit = -1 474 elif not isinstance(limit, (int, long)): 475 raise TypeError("limit must be an integer") 476 res = bytearray() 477 while limit < 0 or len(res) < limit: 478 b = self.read(nreadahead()) 479 if not b: 480 break 481 res += b 482 if res.endswith(b"\n"): 483 break 484 return bytes(res) 485 486 def __iter__(self): 487 self._checkClosed() 488 return self 489 490 def next(self): 491 line = self.readline() 492 if not line: 493 raise StopIteration 494 return line 495 496 def readlines(self, hint=None): 497 """Return a list of lines from the stream. 498 499 hint can be specified to control the number of lines read: no more 500 lines will be read if the total size (in bytes/characters) of all 501 lines so far exceeds hint. 502 """ 503 if hint is not None and not isinstance(hint, (int, long)): 504 raise TypeError("integer or None expected") 505 if hint is None or hint <= 0: 506 return list(self) 507 n = 0 508 lines = [] 509 for line in self: 510 lines.append(line) 511 n += len(line) 512 if n >= hint: 513 break 514 return lines 515 516 def writelines(self, lines): 517 self._checkClosed() 518 for line in lines: 519 self.write(line) 520 521io.IOBase.register(IOBase) 522 523 524class RawIOBase(IOBase): 525 526 """Base class for raw binary I/O.""" 527 528 # The read() method is implemented by calling readinto(); derived 529 # classes that want to support read() only need to implement 530 # readinto() as a primitive operation. In general, readinto() can be 531 # more efficient than read(). 532 533 # (It would be tempting to also provide an implementation of 534 # readinto() in terms of read(), in case the latter is a more suitable 535 # primitive operation, but that would lead to nasty recursion in case 536 # a subclass doesn't implement either.) 537 538 def read(self, n=-1): 539 """Read and return up to n bytes. 540 541 Returns an empty bytes object on EOF, or None if the object is 542 set not to block and has no data to read. 543 """ 544 if n is None: 545 n = -1 546 if n < 0: 547 return self.readall() 548 b = bytearray(n.__index__()) 549 n = self.readinto(b) 550 if n is None: 551 return None 552 del b[n:] 553 return bytes(b) 554 555 def readall(self): 556 """Read until EOF, using multiple read() call.""" 557 res = bytearray() 558 while True: 559 data = self.read(DEFAULT_BUFFER_SIZE) 560 if not data: 561 break 562 res += data 563 if res: 564 return bytes(res) 565 else: 566 # b'' or None 567 return data 568 569 def readinto(self, b): 570 """Read up to len(b) bytes into b. 571 572 Returns number of bytes read (0 for EOF), or None if the object 573 is set not to block and has no data to read. 574 """ 575 self._unsupported("readinto") 576 577 def write(self, b): 578 """Write the given buffer to the IO stream. 579 580 Returns the number of bytes written, which may be less than len(b). 581 """ 582 self._unsupported("write") 583 584io.RawIOBase.register(RawIOBase) 585from _io import FileIO 586RawIOBase.register(FileIO) 587 588 589class BufferedIOBase(IOBase): 590 591 """Base class for buffered IO objects. 592 593 The main difference with RawIOBase is that the read() method 594 supports omitting the size argument, and does not have a default 595 implementation that defers to readinto(). 596 597 In addition, read(), readinto() and write() may raise 598 BlockingIOError if the underlying raw stream is in non-blocking 599 mode and not ready; unlike their raw counterparts, they will never 600 return None. 601 602 A typical implementation should not inherit from a RawIOBase 603 implementation, but wrap one. 604 """ 605 606 def read(self, n=None): 607 """Read and return up to n bytes. 608 609 If the argument is omitted, None, or negative, reads and 610 returns all data until EOF. 611 612 If the argument is positive, and the underlying raw stream is 613 not 'interactive', multiple raw reads may be issued to satisfy 614 the byte count (unless EOF is reached first). But for 615 interactive raw streams (XXX and for pipes?), at most one raw 616 read will be issued, and a short result does not imply that 617 EOF is imminent. 618 619 Returns an empty bytes array on EOF. 620 621 Raises BlockingIOError if the underlying raw stream has no 622 data at the moment. 623 """ 624 self._unsupported("read") 625 626 def read1(self, n=None): 627 """Read up to n bytes with at most one read() system call.""" 628 self._unsupported("read1") 629 630 def readinto(self, b): 631 """Read up to len(b) bytes into b. 632 633 Like read(), this may issue multiple reads to the underlying raw 634 stream, unless the latter is 'interactive'. 635 636 Returns the number of bytes read (0 for EOF). 637 638 Raises BlockingIOError if the underlying raw stream has no 639 data at the moment. 640 """ 641 # XXX This ought to work with anything that supports the buffer API 642 data = self.read(len(b)) 643 n = len(data) 644 try: 645 b[:n] = data 646 except TypeError as err: 647 import array 648 if not isinstance(b, array.array): 649 raise err 650 b[:n] = array.array(b'b', data) 651 return n 652 653 def write(self, b): 654 """Write the given buffer to the IO stream. 655 656 Return the number of bytes written, which is never less than 657 len(b). 658 659 Raises BlockingIOError if the buffer is full and the 660 underlying raw stream cannot accept more data at the moment. 661 """ 662 self._unsupported("write") 663 664 def detach(self): 665 """ 666 Separate the underlying raw stream from the buffer and return it. 667 668 After the raw stream has been detached, the buffer is in an unusable 669 state. 670 """ 671 self._unsupported("detach") 672 673io.BufferedIOBase.register(BufferedIOBase) 674 675 676class _BufferedIOMixin(BufferedIOBase): 677 678 """A mixin implementation of BufferedIOBase with an underlying raw stream. 679 680 This passes most requests on to the underlying raw stream. It 681 does *not* provide implementations of read(), readinto() or 682 write(). 683 """ 684 685 def __init__(self, raw): 686 self._raw = raw 687 688 ### Positioning ### 689 690 def seek(self, pos, whence=0): 691 new_position = self.raw.seek(pos, whence) 692 if new_position < 0: 693 raise IOError("seek() returned an invalid position") 694 return new_position 695 696 def tell(self): 697 pos = self.raw.tell() 698 if pos < 0: 699 raise IOError("tell() returned an invalid position") 700 return pos 701 702 def truncate(self, pos=None): 703 # Flush the stream. We're mixing buffered I/O with lower-level I/O, 704 # and a flush may be necessary to synch both views of the current 705 # file state. 706 self.flush() 707 708 if pos is None: 709 pos = self.tell() 710 # XXX: Should seek() be used, instead of passing the position 711 # XXX directly to truncate? 712 return self.raw.truncate(pos) 713 714 ### Flush and close ### 715 716 def flush(self): 717 if self.closed: 718 raise ValueError("flush of closed file") 719 self.raw.flush() 720 721 def close(self): 722 if self.raw is not None and not self.closed: 723 self.flush() 724 self.raw.close() 725 726 def detach(self): 727 if self.raw is None: 728 raise ValueError("raw stream already detached") 729 self.flush() 730 raw = self._raw 731 self._raw = None 732 return raw 733 734 ### Inquiries ### 735 736 def seekable(self): 737 return self.raw.seekable() 738 739 def readable(self): 740 return self.raw.readable() 741 742 def writable(self): 743 return self.raw.writable() 744 745 @property 746 def raw(self): 747 return self._raw 748 749 @property 750 def closed(self): 751 return self.raw.closed 752 753 @property 754 def name(self): 755 return self.raw.name 756 757 @property 758 def mode(self): 759 return self.raw.mode 760 761 def __repr__(self): 762 clsname = self.__class__.__name__ 763 try: 764 name = self.name 765 except AttributeError: 766 return "<_pyio.{0}>".format(clsname) 767 else: 768 return "<_pyio.{0} name={1!r}>".format(clsname, name) 769 770 ### Lower-level APIs ### 771 772 def fileno(self): 773 return self.raw.fileno() 774 775 def isatty(self): 776 return self.raw.isatty() 777 778 779class BytesIO(BufferedIOBase): 780 781 """Buffered I/O implementation using an in-memory bytes buffer.""" 782 783 def __init__(self, initial_bytes=None): 784 buf = bytearray() 785 if initial_bytes is not None: 786 buf.extend(initial_bytes) 787 self._buffer = buf 788 self._pos = 0 789 790 def __getstate__(self): 791 if self.closed: 792 raise ValueError("__getstate__ on closed file") 793 return self.__dict__.copy() 794 795 def getvalue(self): 796 """Return the bytes value (contents) of the buffer 797 """ 798 if self.closed: 799 raise ValueError("getvalue on closed file") 800 return bytes(self._buffer) 801 802 def read(self, n=None): 803 if self.closed: 804 raise ValueError("read from closed file") 805 if n is None: 806 n = -1 807 if not isinstance(n, (int, long)): 808 raise TypeError("integer argument expected, got {0!r}".format( 809 type(n))) 810 if n < 0: 811 n = len(self._buffer) 812 if len(self._buffer) <= self._pos: 813 return b"" 814 newpos = min(len(self._buffer), self._pos + n) 815 b = self._buffer[self._pos : newpos] 816 self._pos = newpos 817 return bytes(b) 818 819 def read1(self, n): 820 """This is the same as read. 821 """ 822 return self.read(n) 823 824 def write(self, b): 825 if self.closed: 826 raise ValueError("write to closed file") 827 if isinstance(b, unicode): 828 raise TypeError("can't write unicode to binary stream") 829 n = len(b) 830 if n == 0: 831 return 0 832 pos = self._pos 833 if pos > len(self._buffer): 834 # Inserts null bytes between the current end of the file 835 # and the new write position. 836 padding = b'\x00' * (pos - len(self._buffer)) 837 self._buffer += padding 838 self._buffer[pos:pos + n] = b 839 self._pos += n 840 return n 841 842 def seek(self, pos, whence=0): 843 if self.closed: 844 raise ValueError("seek on closed file") 845 try: 846 pos.__index__ 847 except AttributeError: 848 raise TypeError("an integer is required") 849 if whence == 0: 850 if pos < 0: 851 raise ValueError("negative seek position %r" % (pos,)) 852 self._pos = pos 853 elif whence == 1: 854 self._pos = max(0, self._pos + pos) 855 elif whence == 2: 856 self._pos = max(0, len(self._buffer) + pos) 857 else: 858 raise ValueError("invalid whence value") 859 return self._pos 860 861 def tell(self): 862 if self.closed: 863 raise ValueError("tell on closed file") 864 return self._pos 865 866 def truncate(self, pos=None): 867 if self.closed: 868 raise ValueError("truncate on closed file") 869 if pos is None: 870 pos = self._pos 871 else: 872 try: 873 pos.__index__ 874 except AttributeError: 875 raise TypeError("an integer is required") 876 if pos < 0: 877 raise ValueError("negative truncate position %r" % (pos,)) 878 del self._buffer[pos:] 879 return pos 880 881 def readable(self): 882 return True 883 884 def writable(self): 885 return True 886 887 def seekable(self): 888 return True 889 890 891class BufferedReader(_BufferedIOMixin): 892 893 """BufferedReader(raw[, buffer_size]) 894 895 A buffer for a readable, sequential BaseRawIO object. 896 897 The constructor creates a BufferedReader for the given readable raw 898 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE 899 is used. 900 """ 901 902 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 903 """Create a new buffered reader using the given readable raw IO object. 904 """ 905 if not raw.readable(): 906 raise IOError('"raw" argument must be readable.') 907 908 _BufferedIOMixin.__init__(self, raw) 909 if buffer_size <= 0: 910 raise ValueError("invalid buffer size") 911 self.buffer_size = buffer_size 912 self._reset_read_buf() 913 self._read_lock = Lock() 914 915 def _reset_read_buf(self): 916 self._read_buf = b"" 917 self._read_pos = 0 918 919 def read(self, n=None): 920 """Read n bytes. 921 922 Returns exactly n bytes of data unless the underlying raw IO 923 stream reaches EOF or if the call would block in non-blocking 924 mode. If n is negative, read until EOF or until read() would 925 block. 926 """ 927 if n is not None and n < -1: 928 raise ValueError("invalid number of bytes to read") 929 with self._read_lock: 930 return self._read_unlocked(n) 931 932 def _read_unlocked(self, n=None): 933 nodata_val = b"" 934 empty_values = (b"", None) 935 buf = self._read_buf 936 pos = self._read_pos 937 938 # Special case for when the number of bytes to read is unspecified. 939 if n is None or n == -1: 940 self._reset_read_buf() 941 chunks = [buf[pos:]] # Strip the consumed bytes. 942 current_size = 0 943 while True: 944 # Read until EOF or until read() would block. 945 try: 946 chunk = self.raw.read() 947 except IOError as e: 948 if e.errno != EINTR: 949 raise 950 continue 951 if chunk in empty_values: 952 nodata_val = chunk 953 break 954 current_size += len(chunk) 955 chunks.append(chunk) 956 return b"".join(chunks) or nodata_val 957 958 # The number of bytes to read is specified, return at most n bytes. 959 avail = len(buf) - pos # Length of the available buffered data. 960 if n <= avail: 961 # Fast path: the data to read is fully buffered. 962 self._read_pos += n 963 return buf[pos:pos+n] 964 # Slow path: read from the stream until enough bytes are read, 965 # or until an EOF occurs or until read() would block. 966 chunks = [buf[pos:]] 967 wanted = max(self.buffer_size, n) 968 while avail < n: 969 try: 970 chunk = self.raw.read(wanted) 971 except IOError as e: 972 if e.errno != EINTR: 973 raise 974 continue 975 if chunk in empty_values: 976 nodata_val = chunk 977 break 978 avail += len(chunk) 979 chunks.append(chunk) 980 # n is more then avail only when an EOF occurred or when 981 # read() would have blocked. 982 n = min(n, avail) 983 out = b"".join(chunks) 984 self._read_buf = out[n:] # Save the extra data in the buffer. 985 self._read_pos = 0 986 return out[:n] if out else nodata_val 987 988 def peek(self, n=0): 989 """Returns buffered bytes without advancing the position. 990 991 The argument indicates a desired minimal number of bytes; we 992 do at most one raw read to satisfy it. We never return more 993 than self.buffer_size. 994 """ 995 with self._read_lock: 996 return self._peek_unlocked(n) 997 998 def _peek_unlocked(self, n=0): 999 want = min(n, self.buffer_size) 1000 have = len(self._read_buf) - self._read_pos 1001 if have < want or have <= 0: 1002 to_read = self.buffer_size - have 1003 while True: 1004 try: 1005 current = self.raw.read(to_read) 1006 except IOError as e: 1007 if e.errno != EINTR: 1008 raise 1009 continue 1010 break 1011 if current: 1012 self._read_buf = self._read_buf[self._read_pos:] + current 1013 self._read_pos = 0 1014 return self._read_buf[self._read_pos:] 1015 1016 def read1(self, n): 1017 """Reads up to n bytes, with at most one read() system call.""" 1018 # Returns up to n bytes. If at least one byte is buffered, we 1019 # only return buffered bytes. Otherwise, we do one raw read. 1020 if n < 0: 1021 raise ValueError("number of bytes to read must be positive") 1022 if n == 0: 1023 return b"" 1024 with self._read_lock: 1025 self._peek_unlocked(1) 1026 return self._read_unlocked( 1027 min(n, len(self._read_buf) - self._read_pos)) 1028 1029 def tell(self): 1030 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos 1031 1032 def seek(self, pos, whence=0): 1033 if not (0 <= whence <= 2): 1034 raise ValueError("invalid whence value") 1035 with self._read_lock: 1036 if whence == 1: 1037 pos -= len(self._read_buf) - self._read_pos 1038 pos = _BufferedIOMixin.seek(self, pos, whence) 1039 self._reset_read_buf() 1040 return pos 1041 1042class BufferedWriter(_BufferedIOMixin): 1043 1044 """A buffer for a writeable sequential RawIO object. 1045 1046 The constructor creates a BufferedWriter for the given writeable raw 1047 stream. If the buffer_size is not given, it defaults to 1048 DEFAULT_BUFFER_SIZE. 1049 """ 1050 1051 _warning_stack_offset = 2 1052 1053 def __init__(self, raw, 1054 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): 1055 if not raw.writable(): 1056 raise IOError('"raw" argument must be writable.') 1057 1058 _BufferedIOMixin.__init__(self, raw) 1059 if buffer_size <= 0: 1060 raise ValueError("invalid buffer size") 1061 if max_buffer_size is not None: 1062 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 1063 self._warning_stack_offset) 1064 self.buffer_size = buffer_size 1065 self._write_buf = bytearray() 1066 self._write_lock = Lock() 1067 1068 def write(self, b): 1069 if self.closed: 1070 raise ValueError("write to closed file") 1071 if isinstance(b, unicode): 1072 raise TypeError("can't write unicode to binary stream") 1073 with self._write_lock: 1074 # XXX we can implement some more tricks to try and avoid 1075 # partial writes 1076 if len(self._write_buf) > self.buffer_size: 1077 # We're full, so let's pre-flush the buffer 1078 try: 1079 self._flush_unlocked() 1080 except BlockingIOError as e: 1081 # We can't accept anything else. 1082 # XXX Why not just let the exception pass through? 1083 raise BlockingIOError(e.errno, e.strerror, 0) 1084 before = len(self._write_buf) 1085 self._write_buf.extend(b) 1086 written = len(self._write_buf) - before 1087 if len(self._write_buf) > self.buffer_size: 1088 try: 1089 self._flush_unlocked() 1090 except BlockingIOError as e: 1091 if len(self._write_buf) > self.buffer_size: 1092 # We've hit the buffer_size. We have to accept a partial 1093 # write and cut back our buffer. 1094 overage = len(self._write_buf) - self.buffer_size 1095 written -= overage 1096 self._write_buf = self._write_buf[:self.buffer_size] 1097 raise BlockingIOError(e.errno, e.strerror, written) 1098 return written 1099 1100 def truncate(self, pos=None): 1101 with self._write_lock: 1102 self._flush_unlocked() 1103 if pos is None: 1104 pos = self.raw.tell() 1105 return self.raw.truncate(pos) 1106 1107 def flush(self): 1108 with self._write_lock: 1109 self._flush_unlocked() 1110 1111 def _flush_unlocked(self): 1112 if self.closed: 1113 raise ValueError("flush of closed file") 1114 written = 0 1115 try: 1116 while self._write_buf: 1117 try: 1118 n = self.raw.write(self._write_buf) 1119 except IOError as e: 1120 if e.errno != EINTR: 1121 raise 1122 continue 1123 if n > len(self._write_buf) or n < 0: 1124 raise IOError("write() returned incorrect number of bytes") 1125 del self._write_buf[:n] 1126 written += n 1127 except BlockingIOError as e: 1128 n = e.characters_written 1129 del self._write_buf[:n] 1130 written += n 1131 raise BlockingIOError(e.errno, e.strerror, written) 1132 1133 def tell(self): 1134 return _BufferedIOMixin.tell(self) + len(self._write_buf) 1135 1136 def seek(self, pos, whence=0): 1137 if not (0 <= whence <= 2): 1138 raise ValueError("invalid whence") 1139 with self._write_lock: 1140 self._flush_unlocked() 1141 return _BufferedIOMixin.seek(self, pos, whence) 1142 1143 1144class BufferedRWPair(BufferedIOBase): 1145 1146 """A buffered reader and writer object together. 1147 1148 A buffered reader object and buffered writer object put together to 1149 form a sequential IO object that can read and write. This is typically 1150 used with a socket or two-way pipe. 1151 1152 reader and writer are RawIOBase objects that are readable and 1153 writeable respectively. If the buffer_size is omitted it defaults to 1154 DEFAULT_BUFFER_SIZE. 1155 """ 1156 1157 # XXX The usefulness of this (compared to having two separate IO 1158 # objects) is questionable. 1159 1160 def __init__(self, reader, writer, 1161 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): 1162 """Constructor. 1163 1164 The arguments are two RawIO instances. 1165 """ 1166 if max_buffer_size is not None: 1167 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2) 1168 1169 if not reader.readable(): 1170 raise IOError('"reader" argument must be readable.') 1171 1172 if not writer.writable(): 1173 raise IOError('"writer" argument must be writable.') 1174 1175 self.reader = BufferedReader(reader, buffer_size) 1176 self.writer = BufferedWriter(writer, buffer_size) 1177 1178 def read(self, n=None): 1179 if n is None: 1180 n = -1 1181 return self.reader.read(n) 1182 1183 def readinto(self, b): 1184 return self.reader.readinto(b) 1185 1186 def write(self, b): 1187 return self.writer.write(b) 1188 1189 def peek(self, n=0): 1190 return self.reader.peek(n) 1191 1192 def read1(self, n): 1193 return self.reader.read1(n) 1194 1195 def readable(self): 1196 return self.reader.readable() 1197 1198 def writable(self): 1199 return self.writer.writable() 1200 1201 def flush(self): 1202 return self.writer.flush() 1203 1204 def close(self): 1205 self.writer.close() 1206 self.reader.close() 1207 1208 def isatty(self): 1209 return self.reader.isatty() or self.writer.isatty() 1210 1211 @property 1212 def closed(self): 1213 return self.writer.closed 1214 1215 1216class BufferedRandom(BufferedWriter, BufferedReader): 1217 1218 """A buffered interface to random access streams. 1219 1220 The constructor creates a reader and writer for a seekable stream, 1221 raw, given in the first argument. If the buffer_size is omitted it 1222 defaults to DEFAULT_BUFFER_SIZE. 1223 """ 1224 1225 _warning_stack_offset = 3 1226 1227 def __init__(self, raw, 1228 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): 1229 raw._checkSeekable() 1230 BufferedReader.__init__(self, raw, buffer_size) 1231 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size) 1232 1233 def seek(self, pos, whence=0): 1234 if not (0 <= whence <= 2): 1235 raise ValueError("invalid whence") 1236 self.flush() 1237 if self._read_buf: 1238 # Undo read ahead. 1239 with self._read_lock: 1240 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1241 # First do the raw seek, then empty the read buffer, so that 1242 # if the raw seek fails, we don't lose buffered data forever. 1243 pos = self.raw.seek(pos, whence) 1244 with self._read_lock: 1245 self._reset_read_buf() 1246 if pos < 0: 1247 raise IOError("seek() returned invalid position") 1248 return pos 1249 1250 def tell(self): 1251 if self._write_buf: 1252 return BufferedWriter.tell(self) 1253 else: 1254 return BufferedReader.tell(self) 1255 1256 def truncate(self, pos=None): 1257 if pos is None: 1258 pos = self.tell() 1259 # Use seek to flush the read buffer. 1260 return BufferedWriter.truncate(self, pos) 1261 1262 def read(self, n=None): 1263 if n is None: 1264 n = -1 1265 self.flush() 1266 return BufferedReader.read(self, n) 1267 1268 def readinto(self, b): 1269 self.flush() 1270 return BufferedReader.readinto(self, b) 1271 1272 def peek(self, n=0): 1273 self.flush() 1274 return BufferedReader.peek(self, n) 1275 1276 def read1(self, n): 1277 self.flush() 1278 return BufferedReader.read1(self, n) 1279 1280 def write(self, b): 1281 if self._read_buf: 1282 # Undo readahead 1283 with self._read_lock: 1284 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1285 self._reset_read_buf() 1286 return BufferedWriter.write(self, b) 1287 1288 1289class TextIOBase(IOBase): 1290 1291 """Base class for text I/O. 1292 1293 This class provides a character and line based interface to stream 1294 I/O. There is no readinto method because Python's character strings 1295 are immutable. There is no public constructor. 1296 """ 1297 1298 def read(self, n=-1): 1299 """Read at most n characters from stream. 1300 1301 Read from underlying buffer until we have n characters or we hit EOF. 1302 If n is negative or omitted, read until EOF. 1303 """ 1304 self._unsupported("read") 1305 1306 def write(self, s): 1307 """Write string s to stream.""" 1308 self._unsupported("write") 1309 1310 def truncate(self, pos=None): 1311 """Truncate size to pos.""" 1312 self._unsupported("truncate") 1313 1314 def readline(self): 1315 """Read until newline or EOF. 1316 1317 Returns an empty string if EOF is hit immediately. 1318 """ 1319 self._unsupported("readline") 1320 1321 def detach(self): 1322 """ 1323 Separate the underlying buffer from the TextIOBase and return it. 1324 1325 After the underlying buffer has been detached, the TextIO is in an 1326 unusable state. 1327 """ 1328 self._unsupported("detach") 1329 1330 @property 1331 def encoding(self): 1332 """Subclasses should override.""" 1333 return None 1334 1335 @property 1336 def newlines(self): 1337 """Line endings translated so far. 1338 1339 Only line endings translated during reading are considered. 1340 1341 Subclasses should override. 1342 """ 1343 return None 1344 1345 @property 1346 def errors(self): 1347 """Error setting of the decoder or encoder. 1348 1349 Subclasses should override.""" 1350 return None 1351 1352io.TextIOBase.register(TextIOBase) 1353 1354 1355class IncrementalNewlineDecoder(codecs.IncrementalDecoder): 1356 r"""Codec used when reading a file in universal newlines mode. It wraps 1357 another incremental decoder, translating \r\n and \r into \n. It also 1358 records the types of newlines encountered. When used with 1359 translate=False, it ensures that the newline sequence is returned in 1360 one piece. 1361 """ 1362 def __init__(self, decoder, translate, errors='strict'): 1363 codecs.IncrementalDecoder.__init__(self, errors=errors) 1364 self.translate = translate 1365 self.decoder = decoder 1366 self.seennl = 0 1367 self.pendingcr = False 1368 1369 def decode(self, input, final=False): 1370 # decode input (with the eventual \r from a previous pass) 1371 if self.decoder is None: 1372 output = input 1373 else: 1374 output = self.decoder.decode(input, final=final) 1375 if self.pendingcr and (output or final): 1376 output = "\r" + output 1377 self.pendingcr = False 1378 1379 # retain last \r even when not translating data: 1380 # then readline() is sure to get \r\n in one pass 1381 if output.endswith("\r") and not final: 1382 output = output[:-1] 1383 self.pendingcr = True 1384 1385 # Record which newlines are read 1386 crlf = output.count('\r\n') 1387 cr = output.count('\r') - crlf 1388 lf = output.count('\n') - crlf 1389 self.seennl |= (lf and self._LF) | (cr and self._CR) \ 1390 | (crlf and self._CRLF) 1391 1392 if self.translate: 1393 if crlf: 1394 output = output.replace("\r\n", "\n") 1395 if cr: 1396 output = output.replace("\r", "\n") 1397 1398 return output 1399 1400 def getstate(self): 1401 if self.decoder is None: 1402 buf = b"" 1403 flag = 0 1404 else: 1405 buf, flag = self.decoder.getstate() 1406 flag <<= 1 1407 if self.pendingcr: 1408 flag |= 1 1409 return buf, flag 1410 1411 def setstate(self, state): 1412 buf, flag = state 1413 self.pendingcr = bool(flag & 1) 1414 if self.decoder is not None: 1415 self.decoder.setstate((buf, flag >> 1)) 1416 1417 def reset(self): 1418 self.seennl = 0 1419 self.pendingcr = False 1420 if self.decoder is not None: 1421 self.decoder.reset() 1422 1423 _LF = 1 1424 _CR = 2 1425 _CRLF = 4 1426 1427 @property 1428 def newlines(self): 1429 return (None, 1430 "\n", 1431 "\r", 1432 ("\r", "\n"), 1433 "\r\n", 1434 ("\n", "\r\n"), 1435 ("\r", "\r\n"), 1436 ("\r", "\n", "\r\n") 1437 )[self.seennl] 1438 1439 1440class TextIOWrapper(TextIOBase): 1441 1442 r"""Character and line based layer over a BufferedIOBase object, buffer. 1443 1444 encoding gives the name of the encoding that the stream will be 1445 decoded or encoded with. It defaults to locale.getpreferredencoding. 1446 1447 errors determines the strictness of encoding and decoding (see the 1448 codecs.register) and defaults to "strict". 1449 1450 newline can be None, '', '\n', '\r', or '\r\n'. It controls the 1451 handling of line endings. If it is None, universal newlines is 1452 enabled. With this enabled, on input, the lines endings '\n', '\r', 1453 or '\r\n' are translated to '\n' before being returned to the 1454 caller. Conversely, on output, '\n' is translated to the system 1455 default line seperator, os.linesep. If newline is any other of its 1456 legal values, that newline becomes the newline when the file is read 1457 and it is returned untranslated. On output, '\n' is converted to the 1458 newline. 1459 1460 If line_buffering is True, a call to flush is implied when a call to 1461 write contains a newline character. 1462 """ 1463 1464 _CHUNK_SIZE = 2048 1465 1466 def __init__(self, buffer, encoding=None, errors=None, newline=None, 1467 line_buffering=False): 1468 if newline is not None and not isinstance(newline, basestring): 1469 raise TypeError("illegal newline type: %r" % (type(newline),)) 1470 if newline not in (None, "", "\n", "\r", "\r\n"): 1471 raise ValueError("illegal newline value: %r" % (newline,)) 1472 if encoding is None: 1473 try: 1474 import locale 1475 except ImportError: 1476 # Importing locale may fail if Python is being built 1477 encoding = "ascii" 1478 else: 1479 encoding = locale.getpreferredencoding() 1480 1481 if not isinstance(encoding, basestring): 1482 raise ValueError("invalid encoding: %r" % encoding) 1483 1484 if errors is None: 1485 errors = "strict" 1486 else: 1487 if not isinstance(errors, basestring): 1488 raise ValueError("invalid errors: %r" % errors) 1489 1490 self._buffer = buffer 1491 self._line_buffering = line_buffering 1492 self._encoding = encoding 1493 self._errors = errors 1494 self._readuniversal = not newline 1495 self._readtranslate = newline is None 1496 self._readnl = newline 1497 self._writetranslate = newline != '' 1498 self._writenl = newline or os.linesep 1499 self._encoder = None 1500 self._decoder = None 1501 self._decoded_chars = '' # buffer for text returned from decoder 1502 self._decoded_chars_used = 0 # offset into _decoded_chars for read() 1503 self._snapshot = None # info for reconstructing decoder state 1504 self._seekable = self._telling = self.buffer.seekable() 1505 1506 if self._seekable and self.writable(): 1507 position = self.buffer.tell() 1508 if position != 0: 1509 try: 1510 self._get_encoder().setstate(0) 1511 except LookupError: 1512 # Sometimes the encoder doesn't exist 1513 pass 1514 1515 # self._snapshot is either None, or a tuple (dec_flags, next_input) 1516 # where dec_flags is the second (integer) item of the decoder state 1517 # and next_input is the chunk of input bytes that comes next after the 1518 # snapshot point. We use this to reconstruct decoder states in tell(). 1519 1520 # Naming convention: 1521 # - "bytes_..." for integer variables that count input bytes 1522 # - "chars_..." for integer variables that count decoded characters 1523 1524 def __repr__(self): 1525 try: 1526 name = self.name 1527 except AttributeError: 1528 return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding) 1529 else: 1530 return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format( 1531 name, self.encoding) 1532 1533 @property 1534 def encoding(self): 1535 return self._encoding 1536 1537 @property 1538 def errors(self): 1539 return self._errors 1540 1541 @property 1542 def line_buffering(self): 1543 return self._line_buffering 1544 1545 @property 1546 def buffer(self): 1547 return self._buffer 1548 1549 def seekable(self): 1550 return self._seekable 1551 1552 def readable(self): 1553 return self.buffer.readable() 1554 1555 def writable(self): 1556 return self.buffer.writable() 1557 1558 def flush(self): 1559 self.buffer.flush() 1560 self._telling = self._seekable 1561 1562 def close(self): 1563 if self.buffer is not None and not self.closed: 1564 self.flush() 1565 self.buffer.close() 1566 1567 @property 1568 def closed(self): 1569 return self.buffer.closed 1570 1571 @property 1572 def name(self): 1573 return self.buffer.name 1574 1575 def fileno(self): 1576 return self.buffer.fileno() 1577 1578 def isatty(self): 1579 return self.buffer.isatty() 1580 1581 def write(self, s): 1582 if self.closed: 1583 raise ValueError("write to closed file") 1584 if not isinstance(s, unicode): 1585 raise TypeError("can't write %s to text stream" % 1586 s.__class__.__name__) 1587 length = len(s) 1588 haslf = (self._writetranslate or self._line_buffering) and "\n" in s 1589 if haslf and self._writetranslate and self._writenl != "\n": 1590 s = s.replace("\n", self._writenl) 1591 encoder = self._encoder or self._get_encoder() 1592 # XXX What if we were just reading? 1593 b = encoder.encode(s) 1594 self.buffer.write(b) 1595 if self._line_buffering and (haslf or "\r" in s): 1596 self.flush() 1597 self._snapshot = None 1598 if self._decoder: 1599 self._decoder.reset() 1600 return length 1601 1602 def _get_encoder(self): 1603 make_encoder = codecs.getincrementalencoder(self._encoding) 1604 self._encoder = make_encoder(self._errors) 1605 return self._encoder 1606 1607 def _get_decoder(self): 1608 make_decoder = codecs.getincrementaldecoder(self._encoding) 1609 decoder = make_decoder(self._errors) 1610 if self._readuniversal: 1611 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) 1612 self._decoder = decoder 1613 return decoder 1614 1615 # The following three methods implement an ADT for _decoded_chars. 1616 # Text returned from the decoder is buffered here until the client 1617 # requests it by calling our read() or readline() method. 1618 def _set_decoded_chars(self, chars): 1619 """Set the _decoded_chars buffer.""" 1620 self._decoded_chars = chars 1621 self._decoded_chars_used = 0 1622 1623 def _get_decoded_chars(self, n=None): 1624 """Advance into the _decoded_chars buffer.""" 1625 offset = self._decoded_chars_used 1626 if n is None: 1627 chars = self._decoded_chars[offset:] 1628 else: 1629 chars = self._decoded_chars[offset:offset + n] 1630 self._decoded_chars_used += len(chars) 1631 return chars 1632 1633 def _rewind_decoded_chars(self, n): 1634 """Rewind the _decoded_chars buffer.""" 1635 if self._decoded_chars_used < n: 1636 raise AssertionError("rewind decoded_chars out of bounds") 1637 self._decoded_chars_used -= n 1638 1639 def _read_chunk(self): 1640 """ 1641 Read and decode the next chunk of data from the BufferedReader. 1642 """ 1643 1644 # The return value is True unless EOF was reached. The decoded 1645 # string is placed in self._decoded_chars (replacing its previous 1646 # value). The entire input chunk is sent to the decoder, though 1647 # some of it may remain buffered in the decoder, yet to be 1648 # converted. 1649 1650 if self._decoder is None: 1651 raise ValueError("no decoder") 1652 1653 if self._telling: 1654 # To prepare for tell(), we need to snapshot a point in the 1655 # file where the decoder's input buffer is empty. 1656 1657 dec_buffer, dec_flags = self._decoder.getstate() 1658 # Given this, we know there was a valid snapshot point 1659 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). 1660 1661 # Read a chunk, decode it, and put the result in self._decoded_chars. 1662 input_chunk = self.buffer.read1(self._CHUNK_SIZE) 1663 eof = not input_chunk 1664 self._set_decoded_chars(self._decoder.decode(input_chunk, eof)) 1665 1666 if self._telling: 1667 # At the snapshot point, len(dec_buffer) bytes before the read, 1668 # the next input to be decoded is dec_buffer + input_chunk. 1669 self._snapshot = (dec_flags, dec_buffer + input_chunk) 1670 1671 return not eof 1672 1673 def _pack_cookie(self, position, dec_flags=0, 1674 bytes_to_feed=0, need_eof=0, chars_to_skip=0): 1675 # The meaning of a tell() cookie is: seek to position, set the 1676 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them 1677 # into the decoder with need_eof as the EOF flag, then skip 1678 # chars_to_skip characters of the decoded result. For most simple 1679 # decoders, tell() will often just give a byte offset in the file. 1680 return (position | (dec_flags<<64) | (bytes_to_feed<<128) | 1681 (chars_to_skip<<192) | bool(need_eof)<<256) 1682 1683 def _unpack_cookie(self, bigint): 1684 rest, position = divmod(bigint, 1<<64) 1685 rest, dec_flags = divmod(rest, 1<<64) 1686 rest, bytes_to_feed = divmod(rest, 1<<64) 1687 need_eof, chars_to_skip = divmod(rest, 1<<64) 1688 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip 1689 1690 def tell(self): 1691 if not self._seekable: 1692 raise IOError("underlying stream is not seekable") 1693 if not self._telling: 1694 raise IOError("telling position disabled by next() call") 1695 self.flush() 1696 position = self.buffer.tell() 1697 decoder = self._decoder 1698 if decoder is None or self._snapshot is None: 1699 if self._decoded_chars: 1700 # This should never happen. 1701 raise AssertionError("pending decoded text") 1702 return position 1703 1704 # Skip backward to the snapshot point (see _read_chunk). 1705 dec_flags, next_input = self._snapshot 1706 position -= len(next_input) 1707 1708 # How many decoded characters have been used up since the snapshot? 1709 chars_to_skip = self._decoded_chars_used 1710 if chars_to_skip == 0: 1711 # We haven't moved from the snapshot point. 1712 return self._pack_cookie(position, dec_flags) 1713 1714 # Starting from the snapshot position, we will walk the decoder 1715 # forward until it gives us enough decoded characters. 1716 saved_state = decoder.getstate() 1717 try: 1718 # Note our initial start point. 1719 decoder.setstate((b'', dec_flags)) 1720 start_pos = position 1721 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 1722 need_eof = 0 1723 1724 # Feed the decoder one byte at a time. As we go, note the 1725 # nearest "safe start point" before the current location 1726 # (a point where the decoder has nothing buffered, so seek() 1727 # can safely start from there and advance to this location). 1728 for next_byte in next_input: 1729 bytes_fed += 1 1730 chars_decoded += len(decoder.decode(next_byte)) 1731 dec_buffer, dec_flags = decoder.getstate() 1732 if not dec_buffer and chars_decoded <= chars_to_skip: 1733 # Decoder buffer is empty, so this is a safe start point. 1734 start_pos += bytes_fed 1735 chars_to_skip -= chars_decoded 1736 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 1737 if chars_decoded >= chars_to_skip: 1738 break 1739 else: 1740 # We didn't get enough decoded data; signal EOF to get more. 1741 chars_decoded += len(decoder.decode(b'', final=True)) 1742 need_eof = 1 1743 if chars_decoded < chars_to_skip: 1744 raise IOError("can't reconstruct logical file position") 1745 1746 # The returned cookie corresponds to the last safe start point. 1747 return self._pack_cookie( 1748 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) 1749 finally: 1750 decoder.setstate(saved_state) 1751 1752 def truncate(self, pos=None): 1753 self.flush() 1754 if pos is None: 1755 pos = self.tell() 1756 return self.buffer.truncate(pos) 1757 1758 def detach(self): 1759 if self.buffer is None: 1760 raise ValueError("buffer is already detached") 1761 self.flush() 1762 buffer = self._buffer 1763 self._buffer = None 1764 return buffer 1765 1766 def seek(self, cookie, whence=0): 1767 if self.closed: 1768 raise ValueError("tell on closed file") 1769 if not self._seekable: 1770 raise IOError("underlying stream is not seekable") 1771 if whence == 1: # seek relative to current position 1772 if cookie != 0: 1773 raise IOError("can't do nonzero cur-relative seeks") 1774 # Seeking to the current position should attempt to 1775 # sync the underlying buffer with the current position. 1776 whence = 0 1777 cookie = self.tell() 1778 if whence == 2: # seek relative to end of file 1779 if cookie != 0: 1780 raise IOError("can't do nonzero end-relative seeks") 1781 self.flush() 1782 position = self.buffer.seek(0, 2) 1783 self._set_decoded_chars('') 1784 self._snapshot = None 1785 if self._decoder: 1786 self._decoder.reset() 1787 return position 1788 if whence != 0: 1789 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" % 1790 (whence,)) 1791 if cookie < 0: 1792 raise ValueError("negative seek position %r" % (cookie,)) 1793 self.flush() 1794 1795 # The strategy of seek() is to go back to the safe start point 1796 # and replay the effect of read(chars_to_skip) from there. 1797 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ 1798 self._unpack_cookie(cookie) 1799 1800 # Seek back to the safe start point. 1801 self.buffer.seek(start_pos) 1802 self._set_decoded_chars('') 1803 self._snapshot = None 1804 1805 # Restore the decoder to its state from the safe start point. 1806 if cookie == 0 and self._decoder: 1807 self._decoder.reset() 1808 elif self._decoder or dec_flags or chars_to_skip: 1809 self._decoder = self._decoder or self._get_decoder() 1810 self._decoder.setstate((b'', dec_flags)) 1811 self._snapshot = (dec_flags, b'') 1812 1813 if chars_to_skip: 1814 # Just like _read_chunk, feed the decoder and save a snapshot. 1815 input_chunk = self.buffer.read(bytes_to_feed) 1816 self._set_decoded_chars( 1817 self._decoder.decode(input_chunk, need_eof)) 1818 self._snapshot = (dec_flags, input_chunk) 1819 1820 # Skip chars_to_skip of the decoded characters. 1821 if len(self._decoded_chars) < chars_to_skip: 1822 raise IOError("can't restore logical file position") 1823 self._decoded_chars_used = chars_to_skip 1824 1825 # Finally, reset the encoder (merely useful for proper BOM handling) 1826 try: 1827 encoder = self._encoder or self._get_encoder() 1828 except LookupError: 1829 # Sometimes the encoder doesn't exist 1830 pass 1831 else: 1832 if cookie != 0: 1833 encoder.setstate(0) 1834 else: 1835 encoder.reset() 1836 return cookie 1837 1838 def read(self, n=None): 1839 self._checkReadable() 1840 if n is None: 1841 n = -1 1842 decoder = self._decoder or self._get_decoder() 1843 try: 1844 n.__index__ 1845 except AttributeError: 1846 raise TypeError("an integer is required") 1847 if n < 0: 1848 # Read everything. 1849 result = (self._get_decoded_chars() + 1850 decoder.decode(self.buffer.read(), final=True)) 1851 self._set_decoded_chars('') 1852 self._snapshot = None 1853 return result 1854 else: 1855 # Keep reading chunks until we have n characters to return. 1856 eof = False 1857 result = self._get_decoded_chars(n) 1858 while len(result) < n and not eof: 1859 eof = not self._read_chunk() 1860 result += self._get_decoded_chars(n - len(result)) 1861 return result 1862 1863 def next(self): 1864 self._telling = False 1865 line = self.readline() 1866 if not line: 1867 self._snapshot = None 1868 self._telling = self._seekable 1869 raise StopIteration 1870 return line 1871 1872 def readline(self, limit=None): 1873 if self.closed: 1874 raise ValueError("read from closed file") 1875 if limit is None: 1876 limit = -1 1877 elif not isinstance(limit, (int, long)): 1878 raise TypeError("limit must be an integer") 1879 1880 # Grab all the decoded text (we will rewind any extra bits later). 1881 line = self._get_decoded_chars() 1882 1883 start = 0 1884 # Make the decoder if it doesn't already exist. 1885 if not self._decoder: 1886 self._get_decoder() 1887 1888 pos = endpos = None 1889 while True: 1890 if self._readtranslate: 1891 # Newlines are already translated, only search for \n 1892 pos = line.find('\n', start) 1893 if pos >= 0: 1894 endpos = pos + 1 1895 break 1896 else: 1897 start = len(line) 1898 1899 elif self._readuniversal: 1900 # Universal newline search. Find any of \r, \r\n, \n 1901 # The decoder ensures that \r\n are not split in two pieces 1902 1903 # In C we'd look for these in parallel of course. 1904 nlpos = line.find("\n", start) 1905 crpos = line.find("\r", start) 1906 if crpos == -1: 1907 if nlpos == -1: 1908 # Nothing found 1909 start = len(line) 1910 else: 1911 # Found \n 1912 endpos = nlpos + 1 1913 break 1914 elif nlpos == -1: 1915 # Found lone \r 1916 endpos = crpos + 1 1917 break 1918 elif nlpos < crpos: 1919 # Found \n 1920 endpos = nlpos + 1 1921 break 1922 elif nlpos == crpos + 1: 1923 # Found \r\n 1924 endpos = crpos + 2 1925 break 1926 else: 1927 # Found \r 1928 endpos = crpos + 1 1929 break 1930 else: 1931 # non-universal 1932 pos = line.find(self._readnl) 1933 if pos >= 0: 1934 endpos = pos + len(self._readnl) 1935 break 1936 1937 if limit >= 0 and len(line) >= limit: 1938 endpos = limit # reached length limit 1939 break 1940 1941 # No line ending seen yet - get more data' 1942 while self._read_chunk(): 1943 if self._decoded_chars: 1944 break 1945 if self._decoded_chars: 1946 line += self._get_decoded_chars() 1947 else: 1948 # end of file 1949 self._set_decoded_chars('') 1950 self._snapshot = None 1951 return line 1952 1953 if limit >= 0 and endpos > limit: 1954 endpos = limit # don't exceed limit 1955 1956 # Rewind _decoded_chars to just after the line ending we found. 1957 self._rewind_decoded_chars(len(line) - endpos) 1958 return line[:endpos] 1959 1960 @property 1961 def newlines(self): 1962 return self._decoder.newlines if self._decoder else None 1963 1964 1965class StringIO(TextIOWrapper): 1966 """Text I/O implementation using an in-memory buffer. 1967 1968 The initial_value argument sets the value of object. The newline 1969 argument is like the one of TextIOWrapper's constructor. 1970 """ 1971 1972 def __init__(self, initial_value="", newline="\n"): 1973 super(StringIO, self).__init__(BytesIO(), 1974 encoding="utf-8", 1975 errors="strict", 1976 newline=newline) 1977 # Issue #5645: make universal newlines semantics the same as in the 1978 # C version, even under Windows. 1979 if newline is None: 1980 self._writetranslate = False 1981 if initial_value: 1982 if not isinstance(initial_value, unicode): 1983 initial_value = unicode(initial_value) 1984 self.write(initial_value) 1985 self.seek(0) 1986 1987 def getvalue(self): 1988 self.flush() 1989 return self.buffer.getvalue().decode(self._encoding, self._errors) 1990 1991 def __repr__(self): 1992 # TextIOWrapper tells the encoding in its repr. In StringIO, 1993 # that's a implementation detail. 1994 return object.__repr__(self) 1995 1996 @property 1997 def errors(self): 1998 return None 1999 2000 @property 2001 def encoding(self): 2002 return None 2003 2004 def detach(self): 2005 # This doesn't make sense on StringIO. 2006 self._unsupported("detach") 2007