• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2Python implementation of the io module.
3"""
4
5from __future__ import (print_function, unicode_literals)
6
7import os
8import abc
9import codecs
10import sys
11import warnings
12import errno
13# Import thread instead of threading to reduce startup cost
14try:
15    from thread import allocate_lock as Lock
16except ImportError:
17    from dummy_thread import allocate_lock as Lock
18
19import io
20from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
21from errno import EINTR
22
23__metaclass__ = type
24
25# open() uses st_blksize whenever we can
26DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
27
28# NOTE: Base classes defined here are registered with the "official" ABCs
29# defined in io.py. We don't use real inheritance though, because we don't want
30# to inherit the C implementations.
31
32
33class BlockingIOError(IOError):
34
35    """Exception raised when I/O would block on a non-blocking I/O stream."""
36
37    def __init__(self, errno, strerror, characters_written=0):
38        super(IOError, self).__init__(errno, strerror)
39        if not isinstance(characters_written, (int, long)):
40            raise TypeError("characters_written must be a integer")
41        self.characters_written = characters_written
42
43
44def open(file, mode="r", buffering=-1,
45         encoding=None, errors=None,
46         newline=None, closefd=True):
47
48    r"""Open file and return a stream.  Raise IOError upon failure.
49
50    file is either a text or byte string giving the name (and the path
51    if the file isn't in the current working directory) of the file to
52    be opened or an integer file descriptor of the file to be
53    wrapped. (If a file descriptor is given, it is closed when the
54    returned I/O object is closed, unless closefd is set to False.)
55
56    mode is an optional string that specifies the mode in which the file
57    is opened. It defaults to 'r' which means open for reading in text
58    mode.  Other common values are 'w' for writing (truncating the file if
59    it already exists), and 'a' for appending (which on some Unix systems,
60    means that all writes append to the end of the file regardless of the
61    current seek position). In text mode, if encoding is not specified the
62    encoding used is platform dependent. (For reading and writing raw
63    bytes use binary mode and leave encoding unspecified.) The available
64    modes are:
65
66    ========= ===============================================================
67    Character Meaning
68    --------- ---------------------------------------------------------------
69    'r'       open for reading (default)
70    'w'       open for writing, truncating the file first
71    'a'       open for writing, appending to the end of the file if it exists
72    'b'       binary mode
73    't'       text mode (default)
74    '+'       open a disk file for updating (reading and writing)
75    'U'       universal newline mode (for backwards compatibility; unneeded
76              for new code)
77    ========= ===============================================================
78
79    The default mode is 'rt' (open for reading text). For binary random
80    access, the mode 'w+b' opens and truncates the file to 0 bytes, while
81    'r+b' opens the file without truncation.
82
83    Python distinguishes between files opened in binary and text modes,
84    even when the underlying operating system doesn't. Files opened in
85    binary mode (appending 'b' to the mode argument) return contents as
86    bytes objects without any decoding. In text mode (the default, or when
87    't' is appended to the mode argument), the contents of the file are
88    returned as strings, the bytes having been first decoded using a
89    platform-dependent encoding or using the specified encoding if given.
90
91    buffering is an optional integer used to set the buffering policy.
92    Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
93    line buffering (only usable in text mode), and an integer > 1 to indicate
94    the size of a fixed-size chunk buffer.  When no buffering argument is
95    given, the default buffering policy works as follows:
96
97    * Binary files are buffered in fixed-size chunks; the size of the buffer
98      is chosen using a heuristic trying to determine the underlying device's
99      "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
100      On many systems, the buffer will typically be 4096 or 8192 bytes long.
101
102    * "Interactive" text files (files for which isatty() returns True)
103      use line buffering.  Other text files use the policy described above
104      for binary files.
105
106    encoding is the name of the encoding used to decode or encode the
107    file. This should only be used in text mode. The default encoding is
108    platform dependent, but any encoding supported by Python can be
109    passed.  See the codecs module for the list of supported encodings.
110
111    errors is an optional string that specifies how encoding errors are to
112    be handled---this argument should not be used in binary mode. Pass
113    'strict' to raise a ValueError exception if there is an encoding error
114    (the default of None has the same effect), or pass 'ignore' to ignore
115    errors. (Note that ignoring encoding errors can lead to data loss.)
116    See the documentation for codecs.register for a list of the permitted
117    encoding error strings.
118
119    newline controls how universal newlines works (it only applies to text
120    mode). It can be None, '', '\n', '\r', and '\r\n'.  It works as
121    follows:
122
123    * On input, if newline is None, universal newlines mode is
124      enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
125      these are translated into '\n' before being returned to the
126      caller. If it is '', universal newline mode is enabled, but line
127      endings are returned to the caller untranslated. If it has any of
128      the other legal values, input lines are only terminated by the given
129      string, and the line ending is returned to the caller untranslated.
130
131    * On output, if newline is None, any '\n' characters written are
132      translated to the system default line separator, os.linesep. If
133      newline is '', no translation takes place. If newline is any of the
134      other legal values, any '\n' characters written are translated to
135      the given string.
136
137    If closefd is False, the underlying file descriptor will be kept open
138    when the file is closed. This does not work when a file name is given
139    and must be True in that case.
140
141    open() returns a file object whose type depends on the mode, and
142    through which the standard file operations such as reading and writing
143    are performed. When open() is used to open a file in a text mode ('w',
144    'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
145    a file in a binary mode, the returned class varies: in read binary
146    mode, it returns a BufferedReader; in write binary and append binary
147    modes, it returns a BufferedWriter, and in read/write mode, it returns
148    a BufferedRandom.
149
150    It is also possible to use a string or bytearray as a file for both
151    reading and writing. For strings StringIO can be used like a file
152    opened in a text mode, and for bytes a BytesIO can be used like a file
153    opened in a binary mode.
154    """
155    if not isinstance(file, (basestring, int, long)):
156        raise TypeError("invalid file: %r" % file)
157    if not isinstance(mode, basestring):
158        raise TypeError("invalid mode: %r" % mode)
159    if not isinstance(buffering, (int, long)):
160        raise TypeError("invalid buffering: %r" % buffering)
161    if encoding is not None and not isinstance(encoding, basestring):
162        raise TypeError("invalid encoding: %r" % encoding)
163    if errors is not None and not isinstance(errors, basestring):
164        raise TypeError("invalid errors: %r" % errors)
165    modes = set(mode)
166    if modes - set("arwb+tU") or len(mode) > len(modes):
167        raise ValueError("invalid mode: %r" % mode)
168    reading = "r" in modes
169    writing = "w" in modes
170    appending = "a" in modes
171    updating = "+" in modes
172    text = "t" in modes
173    binary = "b" in modes
174    if "U" in modes:
175        if writing or appending:
176            raise ValueError("can't use U and writing mode at once")
177        reading = True
178    if text and binary:
179        raise ValueError("can't have text and binary mode at once")
180    if reading + writing + appending > 1:
181        raise ValueError("can't have read/write/append mode at once")
182    if not (reading or writing or appending):
183        raise ValueError("must have exactly one of read/write/append mode")
184    if binary and encoding is not None:
185        raise ValueError("binary mode doesn't take an encoding argument")
186    if binary and errors is not None:
187        raise ValueError("binary mode doesn't take an errors argument")
188    if binary and newline is not None:
189        raise ValueError("binary mode doesn't take a newline argument")
190    raw = FileIO(file,
191                 (reading and "r" or "") +
192                 (writing and "w" or "") +
193                 (appending and "a" or "") +
194                 (updating and "+" or ""),
195                 closefd)
196    result = raw
197    try:
198        line_buffering = False
199        if buffering == 1 or buffering < 0 and raw.isatty():
200            buffering = -1
201            line_buffering = True
202        if buffering < 0:
203            buffering = DEFAULT_BUFFER_SIZE
204            try:
205                bs = os.fstat(raw.fileno()).st_blksize
206            except (os.error, AttributeError):
207                pass
208            else:
209                if bs > 1:
210                    buffering = bs
211        if buffering < 0:
212            raise ValueError("invalid buffering size")
213        if buffering == 0:
214            if binary:
215                return result
216            raise ValueError("can't have unbuffered text I/O")
217        if updating:
218            buffer = BufferedRandom(raw, buffering)
219        elif writing or appending:
220            buffer = BufferedWriter(raw, buffering)
221        elif reading:
222            buffer = BufferedReader(raw, buffering)
223        else:
224            raise ValueError("unknown mode: %r" % mode)
225        result = buffer
226        if binary:
227            return result
228        text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
229        result = text
230        text.mode = mode
231        return result
232    except:
233        result.close()
234        raise
235
236
237class DocDescriptor:
238    """Helper for builtins.open.__doc__
239    """
240    def __get__(self, obj, typ):
241        return (
242            "open(file, mode='r', buffering=-1, encoding=None, "
243                 "errors=None, newline=None, closefd=True)\n\n" +
244            open.__doc__)
245
246class OpenWrapper:
247    """Wrapper for builtins.open
248
249    Trick so that open won't become a bound method when stored
250    as a class variable (as dbm.dumb does).
251
252    See initstdio() in Python/pythonrun.c.
253    """
254    __doc__ = DocDescriptor()
255
256    def __new__(cls, *args, **kwargs):
257        return open(*args, **kwargs)
258
259
260class UnsupportedOperation(ValueError, IOError):
261    pass
262
263
264class IOBase:
265    __metaclass__ = abc.ABCMeta
266
267    """The abstract base class for all I/O classes, acting on streams of
268    bytes. There is no public constructor.
269
270    This class provides dummy implementations for many methods that
271    derived classes can override selectively; the default implementations
272    represent a file that cannot be read, written or seeked.
273
274    Even though IOBase does not declare read, readinto, or write because
275    their signatures will vary, implementations and clients should
276    consider those methods part of the interface. Also, implementations
277    may raise an IOError when operations they do not support are called.
278
279    The basic type used for binary data read from or written to a file is
280    the bytes type. Method arguments may also be bytearray or memoryview of
281    arrays of bytes. In some cases, such as readinto, a writable object such
282    as bytearray is required. Text I/O classes work with unicode data.
283
284    Note that calling any method (even inquiries) on a closed stream is
285    undefined. Implementations may raise IOError in this case.
286
287    IOBase (and its subclasses) support the iterator protocol, meaning
288    that an IOBase object can be iterated over yielding the lines in a
289    stream.
290
291    IOBase also supports the :keyword:`with` statement. In this example,
292    fp is closed after the suite of the with statement is complete:
293
294    with open('spam.txt', 'r') as fp:
295        fp.write('Spam and eggs!')
296    """
297
298    ### Internal ###
299
300    def _unsupported(self, name):
301        """Internal: raise an exception for unsupported operations."""
302        raise UnsupportedOperation("%s.%s() not supported" %
303                                   (self.__class__.__name__, name))
304
305    ### Positioning ###
306
307    def seek(self, pos, whence=0):
308        """Change stream position.
309
310        Change the stream position to byte offset pos. Argument pos is
311        interpreted relative to the position indicated by whence.  Values
312        for whence are:
313
314        * 0 -- start of stream (the default); offset should be zero or positive
315        * 1 -- current stream position; offset may be negative
316        * 2 -- end of stream; offset is usually negative
317
318        Return the new absolute position.
319        """
320        self._unsupported("seek")
321
322    def tell(self):
323        """Return current stream position."""
324        return self.seek(0, 1)
325
326    def truncate(self, pos=None):
327        """Truncate file to size bytes.
328
329        Size defaults to the current IO position as reported by tell().  Return
330        the new size.
331        """
332        self._unsupported("truncate")
333
334    ### Flush and close ###
335
336    def flush(self):
337        """Flush write buffers, if applicable.
338
339        This is not implemented for read-only and non-blocking streams.
340        """
341        self._checkClosed()
342        # XXX Should this return the number of bytes written???
343
344    __closed = False
345
346    def close(self):
347        """Flush and close the IO object.
348
349        This method has no effect if the file is already closed.
350        """
351        if not self.__closed:
352            try:
353                self.flush()
354            finally:
355                self.__closed = True
356
357    def __del__(self):
358        """Destructor.  Calls close()."""
359        # The try/except block is in case this is called at program
360        # exit time, when it's possible that globals have already been
361        # deleted, and then the close() call might fail.  Since
362        # there's nothing we can do about such failures and they annoy
363        # the end users, we suppress the traceback.
364        try:
365            self.close()
366        except:
367            pass
368
369    ### Inquiries ###
370
371    def seekable(self):
372        """Return whether object supports random access.
373
374        If False, seek(), tell() and truncate() will raise IOError.
375        This method may need to do a test seek().
376        """
377        return False
378
379    def _checkSeekable(self, msg=None):
380        """Internal: raise an IOError if file is not seekable
381        """
382        if not self.seekable():
383            raise IOError("File or stream is not seekable."
384                          if msg is None else msg)
385
386
387    def readable(self):
388        """Return whether object was opened for reading.
389
390        If False, read() will raise IOError.
391        """
392        return False
393
394    def _checkReadable(self, msg=None):
395        """Internal: raise an IOError if file is not readable
396        """
397        if not self.readable():
398            raise IOError("File or stream is not readable."
399                          if msg is None else msg)
400
401    def writable(self):
402        """Return whether object was opened for writing.
403
404        If False, write() and truncate() will raise IOError.
405        """
406        return False
407
408    def _checkWritable(self, msg=None):
409        """Internal: raise an IOError if file is not writable
410        """
411        if not self.writable():
412            raise IOError("File or stream is not writable."
413                          if msg is None else msg)
414
415    @property
416    def closed(self):
417        """closed: bool.  True iff the file has been closed.
418
419        For backwards compatibility, this is a property, not a predicate.
420        """
421        return self.__closed
422
423    def _checkClosed(self, msg=None):
424        """Internal: raise a ValueError if file is closed
425        """
426        if self.closed:
427            raise ValueError("I/O operation on closed file."
428                             if msg is None else msg)
429
430    ### Context manager ###
431
432    def __enter__(self):
433        """Context management protocol.  Returns self."""
434        self._checkClosed()
435        return self
436
437    def __exit__(self, *args):
438        """Context management protocol.  Calls close()"""
439        self.close()
440
441    ### Lower-level APIs ###
442
443    # XXX Should these be present even if unimplemented?
444
445    def fileno(self):
446        """Returns underlying file descriptor if one exists.
447
448        An IOError is raised if the IO object does not use a file descriptor.
449        """
450        self._unsupported("fileno")
451
452    def isatty(self):
453        """Return whether this is an 'interactive' stream.
454
455        Return False if it can't be determined.
456        """
457        self._checkClosed()
458        return False
459
460    ### Readline[s] and writelines ###
461
462    def readline(self, limit=-1):
463        r"""Read and return a line from the stream.
464
465        If limit is specified, at most limit bytes will be read.
466
467        The line terminator is always b'\n' for binary files; for text
468        files, the newlines argument to open can be used to select the line
469        terminator(s) recognized.
470        """
471        # For backwards compatibility, a (slowish) readline().
472        if hasattr(self, "peek"):
473            def nreadahead():
474                readahead = self.peek(1)
475                if not readahead:
476                    return 1
477                n = (readahead.find(b"\n") + 1) or len(readahead)
478                if limit >= 0:
479                    n = min(n, limit)
480                return n
481        else:
482            def nreadahead():
483                return 1
484        if limit is None:
485            limit = -1
486        elif not isinstance(limit, (int, long)):
487            raise TypeError("limit must be an integer")
488        res = bytearray()
489        while limit < 0 or len(res) < limit:
490            b = self.read(nreadahead())
491            if not b:
492                break
493            res += b
494            if res.endswith(b"\n"):
495                break
496        return bytes(res)
497
498    def __iter__(self):
499        self._checkClosed()
500        return self
501
502    def next(self):
503        line = self.readline()
504        if not line:
505            raise StopIteration
506        return line
507
508    def readlines(self, hint=None):
509        """Return a list of lines from the stream.
510
511        hint can be specified to control the number of lines read: no more
512        lines will be read if the total size (in bytes/characters) of all
513        lines so far exceeds hint.
514        """
515        if hint is not None and not isinstance(hint, (int, long)):
516            raise TypeError("integer or None expected")
517        if hint is None or hint <= 0:
518            return list(self)
519        n = 0
520        lines = []
521        for line in self:
522            lines.append(line)
523            n += len(line)
524            if n >= hint:
525                break
526        return lines
527
528    def writelines(self, lines):
529        self._checkClosed()
530        for line in lines:
531            self.write(line)
532
533io.IOBase.register(IOBase)
534
535
536class RawIOBase(IOBase):
537
538    """Base class for raw binary I/O."""
539
540    # The read() method is implemented by calling readinto(); derived
541    # classes that want to support read() only need to implement
542    # readinto() as a primitive operation.  In general, readinto() can be
543    # more efficient than read().
544
545    # (It would be tempting to also provide an implementation of
546    # readinto() in terms of read(), in case the latter is a more suitable
547    # primitive operation, but that would lead to nasty recursion in case
548    # a subclass doesn't implement either.)
549
550    def read(self, n=-1):
551        """Read and return up to n bytes.
552
553        Returns an empty bytes object on EOF, or None if the object is
554        set not to block and has no data to read.
555        """
556        if n is None:
557            n = -1
558        if n < 0:
559            return self.readall()
560        b = bytearray(n.__index__())
561        n = self.readinto(b)
562        if n is None:
563            return None
564        del b[n:]
565        return bytes(b)
566
567    def readall(self):
568        """Read until EOF, using multiple read() call."""
569        res = bytearray()
570        while True:
571            data = self.read(DEFAULT_BUFFER_SIZE)
572            if not data:
573                break
574            res += data
575        if res:
576            return bytes(res)
577        else:
578            # b'' or None
579            return data
580
581    def readinto(self, b):
582        """Read up to len(b) bytes into b.
583
584        Returns number of bytes read (0 for EOF), or None if the object
585        is set not to block and has no data to read.
586        """
587        self._unsupported("readinto")
588
589    def write(self, b):
590        """Write the given buffer to the IO stream.
591
592        Returns the number of bytes written, which may be less than len(b).
593        """
594        self._unsupported("write")
595
596io.RawIOBase.register(RawIOBase)
597from _io import FileIO
598RawIOBase.register(FileIO)
599
600
601class BufferedIOBase(IOBase):
602
603    """Base class for buffered IO objects.
604
605    The main difference with RawIOBase is that the read() method
606    supports omitting the size argument, and does not have a default
607    implementation that defers to readinto().
608
609    In addition, read(), readinto() and write() may raise
610    BlockingIOError if the underlying raw stream is in non-blocking
611    mode and not ready; unlike their raw counterparts, they will never
612    return None.
613
614    A typical implementation should not inherit from a RawIOBase
615    implementation, but wrap one.
616    """
617
618    def read(self, n=None):
619        """Read and return up to n bytes.
620
621        If the argument is omitted, None, or negative, reads and
622        returns all data until EOF.
623
624        If the argument is positive, and the underlying raw stream is
625        not 'interactive', multiple raw reads may be issued to satisfy
626        the byte count (unless EOF is reached first).  But for
627        interactive raw streams (XXX and for pipes?), at most one raw
628        read will be issued, and a short result does not imply that
629        EOF is imminent.
630
631        Returns an empty bytes array on EOF.
632
633        Raises BlockingIOError if the underlying raw stream has no
634        data at the moment.
635        """
636        self._unsupported("read")
637
638    def read1(self, n=None):
639        """Read up to n bytes with at most one read() system call."""
640        self._unsupported("read1")
641
642    def readinto(self, b):
643        """Read up to len(b) bytes into b.
644
645        Like read(), this may issue multiple reads to the underlying raw
646        stream, unless the latter is 'interactive'.
647
648        Returns the number of bytes read (0 for EOF).
649
650        Raises BlockingIOError if the underlying raw stream has no
651        data at the moment.
652        """
653        data = self.read(len(b))
654        n = len(data)
655        try:
656            b[:n] = data
657        except TypeError as err:
658            import array
659            if not isinstance(b, array.array):
660                raise err
661            b[:n] = array.array(b'b', data)
662        return n
663
664    def write(self, b):
665        """Write the given buffer to the IO stream.
666
667        Return the number of bytes written, which is always len(b).
668
669        Raises BlockingIOError if the buffer is full and the
670        underlying raw stream cannot accept more data at the moment.
671        """
672        self._unsupported("write")
673
674    def detach(self):
675        """
676        Separate the underlying raw stream from the buffer and return it.
677
678        After the raw stream has been detached, the buffer is in an unusable
679        state.
680        """
681        self._unsupported("detach")
682
683io.BufferedIOBase.register(BufferedIOBase)
684
685
686class _BufferedIOMixin(BufferedIOBase):
687
688    """A mixin implementation of BufferedIOBase with an underlying raw stream.
689
690    This passes most requests on to the underlying raw stream.  It
691    does *not* provide implementations of read(), readinto() or
692    write().
693    """
694
695    def __init__(self, raw):
696        self._raw = raw
697
698    ### Positioning ###
699
700    def seek(self, pos, whence=0):
701        new_position = self.raw.seek(pos, whence)
702        if new_position < 0:
703            raise IOError("seek() returned an invalid position")
704        return new_position
705
706    def tell(self):
707        pos = self.raw.tell()
708        if pos < 0:
709            raise IOError("tell() returned an invalid position")
710        return pos
711
712    def truncate(self, pos=None):
713        # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
714        # and a flush may be necessary to synch both views of the current
715        # file state.
716        self.flush()
717
718        if pos is None:
719            pos = self.tell()
720        # XXX: Should seek() be used, instead of passing the position
721        # XXX  directly to truncate?
722        return self.raw.truncate(pos)
723
724    ### Flush and close ###
725
726    def flush(self):
727        if self.closed:
728            raise ValueError("flush of closed file")
729        self.raw.flush()
730
731    def close(self):
732        if self.raw is not None and not self.closed:
733            try:
734                # may raise BlockingIOError or BrokenPipeError etc
735                self.flush()
736            finally:
737                self.raw.close()
738
739    def detach(self):
740        if self.raw is None:
741            raise ValueError("raw stream already detached")
742        self.flush()
743        raw = self._raw
744        self._raw = None
745        return raw
746
747    ### Inquiries ###
748
749    def seekable(self):
750        return self.raw.seekable()
751
752    def readable(self):
753        return self.raw.readable()
754
755    def writable(self):
756        return self.raw.writable()
757
758    @property
759    def raw(self):
760        return self._raw
761
762    @property
763    def closed(self):
764        return self.raw.closed
765
766    @property
767    def name(self):
768        return self.raw.name
769
770    @property
771    def mode(self):
772        return self.raw.mode
773
774    def __repr__(self):
775        clsname = self.__class__.__name__
776        try:
777            name = self.name
778        except Exception:
779            return "<_pyio.{0}>".format(clsname)
780        else:
781            return "<_pyio.{0} name={1!r}>".format(clsname, name)
782
783    ### Lower-level APIs ###
784
785    def fileno(self):
786        return self.raw.fileno()
787
788    def isatty(self):
789        return self.raw.isatty()
790
791
792class BytesIO(BufferedIOBase):
793
794    """Buffered I/O implementation using an in-memory bytes buffer."""
795
796    def __init__(self, initial_bytes=None):
797        buf = bytearray()
798        if initial_bytes is not None:
799            buf.extend(initial_bytes)
800        self._buffer = buf
801        self._pos = 0
802
803    def __getstate__(self):
804        if self.closed:
805            raise ValueError("__getstate__ on closed file")
806        return self.__dict__.copy()
807
808    def getvalue(self):
809        """Return the bytes value (contents) of the buffer
810        """
811        if self.closed:
812            raise ValueError("getvalue on closed file")
813        return bytes(self._buffer)
814
815    def read(self, n=None):
816        if self.closed:
817            raise ValueError("read from closed file")
818        if n is None:
819            n = -1
820        if not isinstance(n, (int, long)):
821            raise TypeError("integer argument expected, got {0!r}".format(
822                type(n)))
823        if n < 0:
824            n = len(self._buffer)
825        if len(self._buffer) <= self._pos:
826            return b""
827        newpos = min(len(self._buffer), self._pos + n)
828        b = self._buffer[self._pos : newpos]
829        self._pos = newpos
830        return bytes(b)
831
832    def read1(self, n):
833        """This is the same as read.
834        """
835        return self.read(n)
836
837    def write(self, b):
838        if self.closed:
839            raise ValueError("write to closed file")
840        if isinstance(b, unicode):
841            raise TypeError("can't write unicode to binary stream")
842        n = len(b)
843        if n == 0:
844            return 0
845        pos = self._pos
846        if pos > len(self._buffer):
847            # Inserts null bytes between the current end of the file
848            # and the new write position.
849            padding = b'\x00' * (pos - len(self._buffer))
850            self._buffer += padding
851        self._buffer[pos:pos + n] = b
852        self._pos += n
853        return n
854
855    def seek(self, pos, whence=0):
856        if self.closed:
857            raise ValueError("seek on closed file")
858        try:
859            pos.__index__
860        except AttributeError:
861            raise TypeError("an integer is required")
862        if whence == 0:
863            if pos < 0:
864                raise ValueError("negative seek position %r" % (pos,))
865            self._pos = pos
866        elif whence == 1:
867            self._pos = max(0, self._pos + pos)
868        elif whence == 2:
869            self._pos = max(0, len(self._buffer) + pos)
870        else:
871            raise ValueError("invalid whence value")
872        return self._pos
873
874    def tell(self):
875        if self.closed:
876            raise ValueError("tell on closed file")
877        return self._pos
878
879    def truncate(self, pos=None):
880        if self.closed:
881            raise ValueError("truncate on closed file")
882        if pos is None:
883            pos = self._pos
884        else:
885            try:
886                pos.__index__
887            except AttributeError:
888                raise TypeError("an integer is required")
889            if pos < 0:
890                raise ValueError("negative truncate position %r" % (pos,))
891        del self._buffer[pos:]
892        return pos
893
894    def readable(self):
895        if self.closed:
896            raise ValueError("I/O operation on closed file.")
897        return True
898
899    def writable(self):
900        if self.closed:
901            raise ValueError("I/O operation on closed file.")
902        return True
903
904    def seekable(self):
905        if self.closed:
906            raise ValueError("I/O operation on closed file.")
907        return True
908
909
910class BufferedReader(_BufferedIOMixin):
911
912    """BufferedReader(raw[, buffer_size])
913
914    A buffer for a readable, sequential BaseRawIO object.
915
916    The constructor creates a BufferedReader for the given readable raw
917    stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
918    is used.
919    """
920
921    def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
922        """Create a new buffered reader using the given readable raw IO object.
923        """
924        if not raw.readable():
925            raise IOError('"raw" argument must be readable.')
926
927        _BufferedIOMixin.__init__(self, raw)
928        if buffer_size <= 0:
929            raise ValueError("invalid buffer size")
930        self.buffer_size = buffer_size
931        self._reset_read_buf()
932        self._read_lock = Lock()
933
934    def _reset_read_buf(self):
935        self._read_buf = b""
936        self._read_pos = 0
937
938    def read(self, n=None):
939        """Read n bytes.
940
941        Returns exactly n bytes of data unless the underlying raw IO
942        stream reaches EOF or if the call would block in non-blocking
943        mode. If n is negative, read until EOF or until read() would
944        block.
945        """
946        if n is not None and n < -1:
947            raise ValueError("invalid number of bytes to read")
948        with self._read_lock:
949            return self._read_unlocked(n)
950
951    def _read_unlocked(self, n=None):
952        nodata_val = b""
953        empty_values = (b"", None)
954        buf = self._read_buf
955        pos = self._read_pos
956
957        # Special case for when the number of bytes to read is unspecified.
958        if n is None or n == -1:
959            self._reset_read_buf()
960            chunks = [buf[pos:]]  # Strip the consumed bytes.
961            current_size = 0
962            while True:
963                # Read until EOF or until read() would block.
964                try:
965                    chunk = self.raw.read()
966                except IOError as e:
967                    if e.errno != EINTR:
968                        raise
969                    continue
970                if chunk in empty_values:
971                    nodata_val = chunk
972                    break
973                current_size += len(chunk)
974                chunks.append(chunk)
975            return b"".join(chunks) or nodata_val
976
977        # The number of bytes to read is specified, return at most n bytes.
978        avail = len(buf) - pos  # Length of the available buffered data.
979        if n <= avail:
980            # Fast path: the data to read is fully buffered.
981            self._read_pos += n
982            return buf[pos:pos+n]
983        # Slow path: read from the stream until enough bytes are read,
984        # or until an EOF occurs or until read() would block.
985        chunks = [buf[pos:]]
986        wanted = max(self.buffer_size, n)
987        while avail < n:
988            try:
989                chunk = self.raw.read(wanted)
990            except IOError as e:
991                if e.errno != EINTR:
992                    raise
993                continue
994            if chunk in empty_values:
995                nodata_val = chunk
996                break
997            avail += len(chunk)
998            chunks.append(chunk)
999        # n is more than avail only when an EOF occurred or when
1000        # read() would have blocked.
1001        n = min(n, avail)
1002        out = b"".join(chunks)
1003        self._read_buf = out[n:]  # Save the extra data in the buffer.
1004        self._read_pos = 0
1005        return out[:n] if out else nodata_val
1006
1007    def peek(self, n=0):
1008        """Returns buffered bytes without advancing the position.
1009
1010        The argument indicates a desired minimal number of bytes; we
1011        do at most one raw read to satisfy it.  We never return more
1012        than self.buffer_size.
1013        """
1014        with self._read_lock:
1015            return self._peek_unlocked(n)
1016
1017    def _peek_unlocked(self, n=0):
1018        want = min(n, self.buffer_size)
1019        have = len(self._read_buf) - self._read_pos
1020        if have < want or have <= 0:
1021            to_read = self.buffer_size - have
1022            while True:
1023                try:
1024                    current = self.raw.read(to_read)
1025                except IOError as e:
1026                    if e.errno != EINTR:
1027                        raise
1028                    continue
1029                break
1030            if current:
1031                self._read_buf = self._read_buf[self._read_pos:] + current
1032                self._read_pos = 0
1033        return self._read_buf[self._read_pos:]
1034
1035    def read1(self, n):
1036        """Reads up to n bytes, with at most one read() system call."""
1037        # Returns up to n bytes.  If at least one byte is buffered, we
1038        # only return buffered bytes.  Otherwise, we do one raw read.
1039        if n < 0:
1040            raise ValueError("number of bytes to read must be positive")
1041        if n == 0:
1042            return b""
1043        with self._read_lock:
1044            self._peek_unlocked(1)
1045            return self._read_unlocked(
1046                min(n, len(self._read_buf) - self._read_pos))
1047
1048    def tell(self):
1049        return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1050
1051    def seek(self, pos, whence=0):
1052        if not (0 <= whence <= 2):
1053            raise ValueError("invalid whence value")
1054        with self._read_lock:
1055            if whence == 1:
1056                pos -= len(self._read_buf) - self._read_pos
1057            pos = _BufferedIOMixin.seek(self, pos, whence)
1058            self._reset_read_buf()
1059            return pos
1060
1061class BufferedWriter(_BufferedIOMixin):
1062
1063    """A buffer for a writeable sequential RawIO object.
1064
1065    The constructor creates a BufferedWriter for the given writeable raw
1066    stream. If the buffer_size is not given, it defaults to
1067    DEFAULT_BUFFER_SIZE.
1068    """
1069
1070    _warning_stack_offset = 2
1071
1072    def __init__(self, raw,
1073                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1074        if not raw.writable():
1075            raise IOError('"raw" argument must be writable.')
1076
1077        _BufferedIOMixin.__init__(self, raw)
1078        if buffer_size <= 0:
1079            raise ValueError("invalid buffer size")
1080        if max_buffer_size is not None:
1081            warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1082                          self._warning_stack_offset)
1083        self.buffer_size = buffer_size
1084        self._write_buf = bytearray()
1085        self._write_lock = Lock()
1086
1087    def write(self, b):
1088        if self.closed:
1089            raise ValueError("write to closed file")
1090        if isinstance(b, unicode):
1091            raise TypeError("can't write unicode to binary stream")
1092        with self._write_lock:
1093            # XXX we can implement some more tricks to try and avoid
1094            # partial writes
1095            if len(self._write_buf) > self.buffer_size:
1096                # We're full, so let's pre-flush the buffer.  (This may
1097                # raise BlockingIOError with characters_written == 0.)
1098                self._flush_unlocked()
1099            before = len(self._write_buf)
1100            self._write_buf.extend(b)
1101            written = len(self._write_buf) - before
1102            if len(self._write_buf) > self.buffer_size:
1103                try:
1104                    self._flush_unlocked()
1105                except BlockingIOError as e:
1106                    if len(self._write_buf) > self.buffer_size:
1107                        # We've hit the buffer_size. We have to accept a partial
1108                        # write and cut back our buffer.
1109                        overage = len(self._write_buf) - self.buffer_size
1110                        written -= overage
1111                        self._write_buf = self._write_buf[:self.buffer_size]
1112                        raise BlockingIOError(e.errno, e.strerror, written)
1113            return written
1114
1115    def truncate(self, pos=None):
1116        with self._write_lock:
1117            self._flush_unlocked()
1118            if pos is None:
1119                pos = self.raw.tell()
1120            return self.raw.truncate(pos)
1121
1122    def flush(self):
1123        with self._write_lock:
1124            self._flush_unlocked()
1125
1126    def _flush_unlocked(self):
1127        if self.closed:
1128            raise ValueError("flush of closed file")
1129        while self._write_buf:
1130            try:
1131                n = self.raw.write(self._write_buf)
1132            except BlockingIOError:
1133                raise RuntimeError("self.raw should implement RawIOBase: it "
1134                                   "should not raise BlockingIOError")
1135            except IOError as e:
1136                if e.errno != EINTR:
1137                    raise
1138                continue
1139            if n is None:
1140                raise BlockingIOError(
1141                    errno.EAGAIN,
1142                    "write could not complete without blocking", 0)
1143            if n > len(self._write_buf) or n < 0:
1144                raise IOError("write() returned incorrect number of bytes")
1145            del self._write_buf[:n]
1146
1147    def tell(self):
1148        return _BufferedIOMixin.tell(self) + len(self._write_buf)
1149
1150    def seek(self, pos, whence=0):
1151        if not (0 <= whence <= 2):
1152            raise ValueError("invalid whence")
1153        with self._write_lock:
1154            self._flush_unlocked()
1155            return _BufferedIOMixin.seek(self, pos, whence)
1156
1157
1158class BufferedRWPair(BufferedIOBase):
1159
1160    """A buffered reader and writer object together.
1161
1162    A buffered reader object and buffered writer object put together to
1163    form a sequential IO object that can read and write. This is typically
1164    used with a socket or two-way pipe.
1165
1166    reader and writer are RawIOBase objects that are readable and
1167    writeable respectively. If the buffer_size is omitted it defaults to
1168    DEFAULT_BUFFER_SIZE.
1169    """
1170
1171    # XXX The usefulness of this (compared to having two separate IO
1172    # objects) is questionable.
1173
1174    def __init__(self, reader, writer,
1175                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1176        """Constructor.
1177
1178        The arguments are two RawIO instances.
1179        """
1180        if max_buffer_size is not None:
1181            warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1182
1183        if not reader.readable():
1184            raise IOError('"reader" argument must be readable.')
1185
1186        if not writer.writable():
1187            raise IOError('"writer" argument must be writable.')
1188
1189        self.reader = BufferedReader(reader, buffer_size)
1190        self.writer = BufferedWriter(writer, buffer_size)
1191
1192    def read(self, n=None):
1193        if n is None:
1194            n = -1
1195        return self.reader.read(n)
1196
1197    def readinto(self, b):
1198        return self.reader.readinto(b)
1199
1200    def write(self, b):
1201        return self.writer.write(b)
1202
1203    def peek(self, n=0):
1204        return self.reader.peek(n)
1205
1206    def read1(self, n):
1207        return self.reader.read1(n)
1208
1209    def readable(self):
1210        return self.reader.readable()
1211
1212    def writable(self):
1213        return self.writer.writable()
1214
1215    def flush(self):
1216        return self.writer.flush()
1217
1218    def close(self):
1219        try:
1220            self.writer.close()
1221        finally:
1222            self.reader.close()
1223
1224    def isatty(self):
1225        return self.reader.isatty() or self.writer.isatty()
1226
1227    @property
1228    def closed(self):
1229        return self.writer.closed
1230
1231
1232class BufferedRandom(BufferedWriter, BufferedReader):
1233
1234    """A buffered interface to random access streams.
1235
1236    The constructor creates a reader and writer for a seekable stream,
1237    raw, given in the first argument. If the buffer_size is omitted it
1238    defaults to DEFAULT_BUFFER_SIZE.
1239    """
1240
1241    _warning_stack_offset = 3
1242
1243    def __init__(self, raw,
1244                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1245        raw._checkSeekable()
1246        BufferedReader.__init__(self, raw, buffer_size)
1247        BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1248
1249    def seek(self, pos, whence=0):
1250        if not (0 <= whence <= 2):
1251            raise ValueError("invalid whence")
1252        self.flush()
1253        if self._read_buf:
1254            # Undo read ahead.
1255            with self._read_lock:
1256                self.raw.seek(self._read_pos - len(self._read_buf), 1)
1257        # First do the raw seek, then empty the read buffer, so that
1258        # if the raw seek fails, we don't lose buffered data forever.
1259        pos = self.raw.seek(pos, whence)
1260        with self._read_lock:
1261            self._reset_read_buf()
1262        if pos < 0:
1263            raise IOError("seek() returned invalid position")
1264        return pos
1265
1266    def tell(self):
1267        if self._write_buf:
1268            return BufferedWriter.tell(self)
1269        else:
1270            return BufferedReader.tell(self)
1271
1272    def truncate(self, pos=None):
1273        if pos is None:
1274            pos = self.tell()
1275        # Use seek to flush the read buffer.
1276        return BufferedWriter.truncate(self, pos)
1277
1278    def read(self, n=None):
1279        if n is None:
1280            n = -1
1281        self.flush()
1282        return BufferedReader.read(self, n)
1283
1284    def readinto(self, b):
1285        self.flush()
1286        return BufferedReader.readinto(self, b)
1287
1288    def peek(self, n=0):
1289        self.flush()
1290        return BufferedReader.peek(self, n)
1291
1292    def read1(self, n):
1293        self.flush()
1294        return BufferedReader.read1(self, n)
1295
1296    def write(self, b):
1297        if self._read_buf:
1298            # Undo readahead
1299            with self._read_lock:
1300                self.raw.seek(self._read_pos - len(self._read_buf), 1)
1301                self._reset_read_buf()
1302        return BufferedWriter.write(self, b)
1303
1304
1305class TextIOBase(IOBase):
1306
1307    """Base class for text I/O.
1308
1309    This class provides a character and line based interface to stream
1310    I/O. There is no readinto method because Python's character strings
1311    are immutable. There is no public constructor.
1312    """
1313
1314    def read(self, n=-1):
1315        """Read at most n characters from stream.
1316
1317        Read from underlying buffer until we have n characters or we hit EOF.
1318        If n is negative or omitted, read until EOF.
1319        """
1320        self._unsupported("read")
1321
1322    def write(self, s):
1323        """Write string s to stream."""
1324        self._unsupported("write")
1325
1326    def truncate(self, pos=None):
1327        """Truncate size to pos."""
1328        self._unsupported("truncate")
1329
1330    def readline(self):
1331        """Read until newline or EOF.
1332
1333        Returns an empty string if EOF is hit immediately.
1334        """
1335        self._unsupported("readline")
1336
1337    def detach(self):
1338        """
1339        Separate the underlying buffer from the TextIOBase and return it.
1340
1341        After the underlying buffer has been detached, the TextIO is in an
1342        unusable state.
1343        """
1344        self._unsupported("detach")
1345
1346    @property
1347    def encoding(self):
1348        """Subclasses should override."""
1349        return None
1350
1351    @property
1352    def newlines(self):
1353        """Line endings translated so far.
1354
1355        Only line endings translated during reading are considered.
1356
1357        Subclasses should override.
1358        """
1359        return None
1360
1361    @property
1362    def errors(self):
1363        """Error setting of the decoder or encoder.
1364
1365        Subclasses should override."""
1366        return None
1367
1368io.TextIOBase.register(TextIOBase)
1369
1370
1371class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1372    r"""Codec used when reading a file in universal newlines mode.  It wraps
1373    another incremental decoder, translating \r\n and \r into \n.  It also
1374    records the types of newlines encountered.  When used with
1375    translate=False, it ensures that the newline sequence is returned in
1376    one piece.
1377    """
1378    def __init__(self, decoder, translate, errors='strict'):
1379        codecs.IncrementalDecoder.__init__(self, errors=errors)
1380        self.translate = translate
1381        self.decoder = decoder
1382        self.seennl = 0
1383        self.pendingcr = False
1384
1385    def decode(self, input, final=False):
1386        # decode input (with the eventual \r from a previous pass)
1387        if self.decoder is None:
1388            output = input
1389        else:
1390            output = self.decoder.decode(input, final=final)
1391        if self.pendingcr and (output or final):
1392            output = "\r" + output
1393            self.pendingcr = False
1394
1395        # retain last \r even when not translating data:
1396        # then readline() is sure to get \r\n in one pass
1397        if output.endswith("\r") and not final:
1398            output = output[:-1]
1399            self.pendingcr = True
1400
1401        # Record which newlines are read
1402        crlf = output.count('\r\n')
1403        cr = output.count('\r') - crlf
1404        lf = output.count('\n') - crlf
1405        self.seennl |= (lf and self._LF) | (cr and self._CR) \
1406                    | (crlf and self._CRLF)
1407
1408        if self.translate:
1409            if crlf:
1410                output = output.replace("\r\n", "\n")
1411            if cr:
1412                output = output.replace("\r", "\n")
1413
1414        return output
1415
1416    def getstate(self):
1417        if self.decoder is None:
1418            buf = b""
1419            flag = 0
1420        else:
1421            buf, flag = self.decoder.getstate()
1422        flag <<= 1
1423        if self.pendingcr:
1424            flag |= 1
1425        return buf, flag
1426
1427    def setstate(self, state):
1428        buf, flag = state
1429        self.pendingcr = bool(flag & 1)
1430        if self.decoder is not None:
1431            self.decoder.setstate((buf, flag >> 1))
1432
1433    def reset(self):
1434        self.seennl = 0
1435        self.pendingcr = False
1436        if self.decoder is not None:
1437            self.decoder.reset()
1438
1439    _LF = 1
1440    _CR = 2
1441    _CRLF = 4
1442
1443    @property
1444    def newlines(self):
1445        return (None,
1446                "\n",
1447                "\r",
1448                ("\r", "\n"),
1449                "\r\n",
1450                ("\n", "\r\n"),
1451                ("\r", "\r\n"),
1452                ("\r", "\n", "\r\n")
1453               )[self.seennl]
1454
1455
1456class TextIOWrapper(TextIOBase):
1457
1458    r"""Character and line based layer over a BufferedIOBase object, buffer.
1459
1460    encoding gives the name of the encoding that the stream will be
1461    decoded or encoded with. It defaults to locale.getpreferredencoding.
1462
1463    errors determines the strictness of encoding and decoding (see the
1464    codecs.register) and defaults to "strict".
1465
1466    newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
1467    handling of line endings. If it is None, universal newlines is
1468    enabled.  With this enabled, on input, the lines endings '\n', '\r',
1469    or '\r\n' are translated to '\n' before being returned to the
1470    caller. Conversely, on output, '\n' is translated to the system
1471    default line separator, os.linesep. If newline is any other of its
1472    legal values, that newline becomes the newline when the file is read
1473    and it is returned untranslated. On output, '\n' is converted to the
1474    newline.
1475
1476    If line_buffering is True, a call to flush is implied when a call to
1477    write contains a newline character.
1478    """
1479
1480    _CHUNK_SIZE = 2048
1481
1482    def __init__(self, buffer, encoding=None, errors=None, newline=None,
1483                 line_buffering=False):
1484        if newline is not None and not isinstance(newline, basestring):
1485            raise TypeError("illegal newline type: %r" % (type(newline),))
1486        if newline not in (None, "", "\n", "\r", "\r\n"):
1487            raise ValueError("illegal newline value: %r" % (newline,))
1488        if encoding is None:
1489            try:
1490                import locale
1491            except ImportError:
1492                # Importing locale may fail if Python is being built
1493                encoding = "ascii"
1494            else:
1495                encoding = locale.getpreferredencoding()
1496
1497        if not isinstance(encoding, basestring):
1498            raise ValueError("invalid encoding: %r" % encoding)
1499
1500        if sys.py3kwarning and not codecs.lookup(encoding)._is_text_encoding:
1501            msg = ("%r is not a text encoding; "
1502                   "use codecs.open() to handle arbitrary codecs")
1503            warnings.warnpy3k(msg % encoding, stacklevel=2)
1504
1505        if errors is None:
1506            errors = "strict"
1507        else:
1508            if not isinstance(errors, basestring):
1509                raise ValueError("invalid errors: %r" % errors)
1510
1511        self._buffer = buffer
1512        self._line_buffering = line_buffering
1513        self._encoding = encoding
1514        self._errors = errors
1515        self._readuniversal = not newline
1516        self._readtranslate = newline is None
1517        self._readnl = newline
1518        self._writetranslate = newline != ''
1519        self._writenl = newline or os.linesep
1520        self._encoder = None
1521        self._decoder = None
1522        self._decoded_chars = ''  # buffer for text returned from decoder
1523        self._decoded_chars_used = 0  # offset into _decoded_chars for read()
1524        self._snapshot = None  # info for reconstructing decoder state
1525        self._seekable = self._telling = self.buffer.seekable()
1526
1527        if self._seekable and self.writable():
1528            position = self.buffer.tell()
1529            if position != 0:
1530                try:
1531                    self._get_encoder().setstate(0)
1532                except LookupError:
1533                    # Sometimes the encoder doesn't exist
1534                    pass
1535
1536    # self._snapshot is either None, or a tuple (dec_flags, next_input)
1537    # where dec_flags is the second (integer) item of the decoder state
1538    # and next_input is the chunk of input bytes that comes next after the
1539    # snapshot point.  We use this to reconstruct decoder states in tell().
1540
1541    # Naming convention:
1542    #   - "bytes_..." for integer variables that count input bytes
1543    #   - "chars_..." for integer variables that count decoded characters
1544
1545    def __repr__(self):
1546        try:
1547            name = self.name
1548        except Exception:
1549            return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1550        else:
1551            return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1552                name, self.encoding)
1553
1554    @property
1555    def encoding(self):
1556        return self._encoding
1557
1558    @property
1559    def errors(self):
1560        return self._errors
1561
1562    @property
1563    def line_buffering(self):
1564        return self._line_buffering
1565
1566    @property
1567    def buffer(self):
1568        return self._buffer
1569
1570    def seekable(self):
1571        if self.closed:
1572            raise ValueError("I/O operation on closed file.")
1573        return self._seekable
1574
1575    def readable(self):
1576        return self.buffer.readable()
1577
1578    def writable(self):
1579        return self.buffer.writable()
1580
1581    def flush(self):
1582        self.buffer.flush()
1583        self._telling = self._seekable
1584
1585    def close(self):
1586        if self.buffer is not None and not self.closed:
1587            try:
1588                self.flush()
1589            finally:
1590                self.buffer.close()
1591
1592    @property
1593    def closed(self):
1594        return self.buffer.closed
1595
1596    @property
1597    def name(self):
1598        return self.buffer.name
1599
1600    def fileno(self):
1601        return self.buffer.fileno()
1602
1603    def isatty(self):
1604        return self.buffer.isatty()
1605
1606    def write(self, s):
1607        if self.closed:
1608            raise ValueError("write to closed file")
1609        if not isinstance(s, unicode):
1610            raise TypeError("can't write %s to text stream" %
1611                            s.__class__.__name__)
1612        length = len(s)
1613        haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1614        if haslf and self._writetranslate and self._writenl != "\n":
1615            s = s.replace("\n", self._writenl)
1616        encoder = self._encoder or self._get_encoder()
1617        # XXX What if we were just reading?
1618        b = encoder.encode(s)
1619        self.buffer.write(b)
1620        if self._line_buffering and (haslf or "\r" in s):
1621            self.flush()
1622        self._set_decoded_chars('')
1623        self._snapshot = None
1624        if self._decoder:
1625            self._decoder.reset()
1626        return length
1627
1628    def _get_encoder(self):
1629        make_encoder = codecs.getincrementalencoder(self._encoding)
1630        self._encoder = make_encoder(self._errors)
1631        return self._encoder
1632
1633    def _get_decoder(self):
1634        make_decoder = codecs.getincrementaldecoder(self._encoding)
1635        decoder = make_decoder(self._errors)
1636        if self._readuniversal:
1637            decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1638        self._decoder = decoder
1639        return decoder
1640
1641    # The following three methods implement an ADT for _decoded_chars.
1642    # Text returned from the decoder is buffered here until the client
1643    # requests it by calling our read() or readline() method.
1644    def _set_decoded_chars(self, chars):
1645        """Set the _decoded_chars buffer."""
1646        self._decoded_chars = chars
1647        self._decoded_chars_used = 0
1648
1649    def _get_decoded_chars(self, n=None):
1650        """Advance into the _decoded_chars buffer."""
1651        offset = self._decoded_chars_used
1652        if n is None:
1653            chars = self._decoded_chars[offset:]
1654        else:
1655            chars = self._decoded_chars[offset:offset + n]
1656        self._decoded_chars_used += len(chars)
1657        return chars
1658
1659    def _rewind_decoded_chars(self, n):
1660        """Rewind the _decoded_chars buffer."""
1661        if self._decoded_chars_used < n:
1662            raise AssertionError("rewind decoded_chars out of bounds")
1663        self._decoded_chars_used -= n
1664
1665    def _read_chunk(self):
1666        """
1667        Read and decode the next chunk of data from the BufferedReader.
1668        """
1669
1670        # The return value is True unless EOF was reached.  The decoded
1671        # string is placed in self._decoded_chars (replacing its previous
1672        # value).  The entire input chunk is sent to the decoder, though
1673        # some of it may remain buffered in the decoder, yet to be
1674        # converted.
1675
1676        if self._decoder is None:
1677            raise ValueError("no decoder")
1678
1679        if self._telling:
1680            # To prepare for tell(), we need to snapshot a point in the
1681            # file where the decoder's input buffer is empty.
1682
1683            dec_buffer, dec_flags = self._decoder.getstate()
1684            # Given this, we know there was a valid snapshot point
1685            # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1686
1687        # Read a chunk, decode it, and put the result in self._decoded_chars.
1688        input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1689        eof = not input_chunk
1690        self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1691
1692        if self._telling:
1693            # At the snapshot point, len(dec_buffer) bytes before the read,
1694            # the next input to be decoded is dec_buffer + input_chunk.
1695            self._snapshot = (dec_flags, dec_buffer + input_chunk)
1696
1697        return not eof
1698
1699    def _pack_cookie(self, position, dec_flags=0,
1700                           bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1701        # The meaning of a tell() cookie is: seek to position, set the
1702        # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1703        # into the decoder with need_eof as the EOF flag, then skip
1704        # chars_to_skip characters of the decoded result.  For most simple
1705        # decoders, tell() will often just give a byte offset in the file.
1706        return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1707               (chars_to_skip<<192) | bool(need_eof)<<256)
1708
1709    def _unpack_cookie(self, bigint):
1710        rest, position = divmod(bigint, 1<<64)
1711        rest, dec_flags = divmod(rest, 1<<64)
1712        rest, bytes_to_feed = divmod(rest, 1<<64)
1713        need_eof, chars_to_skip = divmod(rest, 1<<64)
1714        return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1715
1716    def tell(self):
1717        if not self._seekable:
1718            raise IOError("underlying stream is not seekable")
1719        if not self._telling:
1720            raise IOError("telling position disabled by next() call")
1721        self.flush()
1722        position = self.buffer.tell()
1723        decoder = self._decoder
1724        if decoder is None or self._snapshot is None:
1725            if self._decoded_chars:
1726                # This should never happen.
1727                raise AssertionError("pending decoded text")
1728            return position
1729
1730        # Skip backward to the snapshot point (see _read_chunk).
1731        dec_flags, next_input = self._snapshot
1732        position -= len(next_input)
1733
1734        # How many decoded characters have been used up since the snapshot?
1735        chars_to_skip = self._decoded_chars_used
1736        if chars_to_skip == 0:
1737            # We haven't moved from the snapshot point.
1738            return self._pack_cookie(position, dec_flags)
1739
1740        # Starting from the snapshot position, we will walk the decoder
1741        # forward until it gives us enough decoded characters.
1742        saved_state = decoder.getstate()
1743        try:
1744            # Note our initial start point.
1745            decoder.setstate((b'', dec_flags))
1746            start_pos = position
1747            start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1748            need_eof = 0
1749
1750            # Feed the decoder one byte at a time.  As we go, note the
1751            # nearest "safe start point" before the current location
1752            # (a point where the decoder has nothing buffered, so seek()
1753            # can safely start from there and advance to this location).
1754            for next_byte in next_input:
1755                bytes_fed += 1
1756                chars_decoded += len(decoder.decode(next_byte))
1757                dec_buffer, dec_flags = decoder.getstate()
1758                if not dec_buffer and chars_decoded <= chars_to_skip:
1759                    # Decoder buffer is empty, so this is a safe start point.
1760                    start_pos += bytes_fed
1761                    chars_to_skip -= chars_decoded
1762                    start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1763                if chars_decoded >= chars_to_skip:
1764                    break
1765            else:
1766                # We didn't get enough decoded data; signal EOF to get more.
1767                chars_decoded += len(decoder.decode(b'', final=True))
1768                need_eof = 1
1769                if chars_decoded < chars_to_skip:
1770                    raise IOError("can't reconstruct logical file position")
1771
1772            # The returned cookie corresponds to the last safe start point.
1773            return self._pack_cookie(
1774                start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1775        finally:
1776            decoder.setstate(saved_state)
1777
1778    def truncate(self, pos=None):
1779        self.flush()
1780        if pos is None:
1781            pos = self.tell()
1782        return self.buffer.truncate(pos)
1783
1784    def detach(self):
1785        if self.buffer is None:
1786            raise ValueError("buffer is already detached")
1787        self.flush()
1788        buffer = self._buffer
1789        self._buffer = None
1790        return buffer
1791
1792    def seek(self, cookie, whence=0):
1793        if self.closed:
1794            raise ValueError("tell on closed file")
1795        if not self._seekable:
1796            raise IOError("underlying stream is not seekable")
1797        if whence == 1: # seek relative to current position
1798            if cookie != 0:
1799                raise IOError("can't do nonzero cur-relative seeks")
1800            # Seeking to the current position should attempt to
1801            # sync the underlying buffer with the current position.
1802            whence = 0
1803            cookie = self.tell()
1804        if whence == 2: # seek relative to end of file
1805            if cookie != 0:
1806                raise IOError("can't do nonzero end-relative seeks")
1807            self.flush()
1808            position = self.buffer.seek(0, 2)
1809            self._set_decoded_chars('')
1810            self._snapshot = None
1811            if self._decoder:
1812                self._decoder.reset()
1813            return position
1814        if whence != 0:
1815            raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1816                             (whence,))
1817        if cookie < 0:
1818            raise ValueError("negative seek position %r" % (cookie,))
1819        self.flush()
1820
1821        # The strategy of seek() is to go back to the safe start point
1822        # and replay the effect of read(chars_to_skip) from there.
1823        start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1824            self._unpack_cookie(cookie)
1825
1826        # Seek back to the safe start point.
1827        self.buffer.seek(start_pos)
1828        self._set_decoded_chars('')
1829        self._snapshot = None
1830
1831        # Restore the decoder to its state from the safe start point.
1832        if cookie == 0 and self._decoder:
1833            self._decoder.reset()
1834        elif self._decoder or dec_flags or chars_to_skip:
1835            self._decoder = self._decoder or self._get_decoder()
1836            self._decoder.setstate((b'', dec_flags))
1837            self._snapshot = (dec_flags, b'')
1838
1839        if chars_to_skip:
1840            # Just like _read_chunk, feed the decoder and save a snapshot.
1841            input_chunk = self.buffer.read(bytes_to_feed)
1842            self._set_decoded_chars(
1843                self._decoder.decode(input_chunk, need_eof))
1844            self._snapshot = (dec_flags, input_chunk)
1845
1846            # Skip chars_to_skip of the decoded characters.
1847            if len(self._decoded_chars) < chars_to_skip:
1848                raise IOError("can't restore logical file position")
1849            self._decoded_chars_used = chars_to_skip
1850
1851        # Finally, reset the encoder (merely useful for proper BOM handling)
1852        try:
1853            encoder = self._encoder or self._get_encoder()
1854        except LookupError:
1855            # Sometimes the encoder doesn't exist
1856            pass
1857        else:
1858            if cookie != 0:
1859                encoder.setstate(0)
1860            else:
1861                encoder.reset()
1862        return cookie
1863
1864    def read(self, n=None):
1865        self._checkReadable()
1866        if n is None:
1867            n = -1
1868        decoder = self._decoder or self._get_decoder()
1869        try:
1870            n.__index__
1871        except AttributeError:
1872            raise TypeError("an integer is required")
1873        if n < 0:
1874            # Read everything.
1875            result = (self._get_decoded_chars() +
1876                      decoder.decode(self.buffer.read(), final=True))
1877            self._set_decoded_chars('')
1878            self._snapshot = None
1879            return result
1880        else:
1881            # Keep reading chunks until we have n characters to return.
1882            eof = False
1883            result = self._get_decoded_chars(n)
1884            while len(result) < n and not eof:
1885                eof = not self._read_chunk()
1886                result += self._get_decoded_chars(n - len(result))
1887            return result
1888
1889    def next(self):
1890        self._telling = False
1891        line = self.readline()
1892        if not line:
1893            self._snapshot = None
1894            self._telling = self._seekable
1895            raise StopIteration
1896        return line
1897
1898    def readline(self, limit=None):
1899        if self.closed:
1900            raise ValueError("read from closed file")
1901        if limit is None:
1902            limit = -1
1903        elif not isinstance(limit, (int, long)):
1904            raise TypeError("limit must be an integer")
1905
1906        # Grab all the decoded text (we will rewind any extra bits later).
1907        line = self._get_decoded_chars()
1908
1909        start = 0
1910        # Make the decoder if it doesn't already exist.
1911        if not self._decoder:
1912            self._get_decoder()
1913
1914        pos = endpos = None
1915        while True:
1916            if self._readtranslate:
1917                # Newlines are already translated, only search for \n
1918                pos = line.find('\n', start)
1919                if pos >= 0:
1920                    endpos = pos + 1
1921                    break
1922                else:
1923                    start = len(line)
1924
1925            elif self._readuniversal:
1926                # Universal newline search. Find any of \r, \r\n, \n
1927                # The decoder ensures that \r\n are not split in two pieces
1928
1929                # In C we'd look for these in parallel of course.
1930                nlpos = line.find("\n", start)
1931                crpos = line.find("\r", start)
1932                if crpos == -1:
1933                    if nlpos == -1:
1934                        # Nothing found
1935                        start = len(line)
1936                    else:
1937                        # Found \n
1938                        endpos = nlpos + 1
1939                        break
1940                elif nlpos == -1:
1941                    # Found lone \r
1942                    endpos = crpos + 1
1943                    break
1944                elif nlpos < crpos:
1945                    # Found \n
1946                    endpos = nlpos + 1
1947                    break
1948                elif nlpos == crpos + 1:
1949                    # Found \r\n
1950                    endpos = crpos + 2
1951                    break
1952                else:
1953                    # Found \r
1954                    endpos = crpos + 1
1955                    break
1956            else:
1957                # non-universal
1958                pos = line.find(self._readnl)
1959                if pos >= 0:
1960                    endpos = pos + len(self._readnl)
1961                    break
1962
1963            if limit >= 0 and len(line) >= limit:
1964                endpos = limit  # reached length limit
1965                break
1966
1967            # No line ending seen yet - get more data'
1968            while self._read_chunk():
1969                if self._decoded_chars:
1970                    break
1971            if self._decoded_chars:
1972                line += self._get_decoded_chars()
1973            else:
1974                # end of file
1975                self._set_decoded_chars('')
1976                self._snapshot = None
1977                return line
1978
1979        if limit >= 0 and endpos > limit:
1980            endpos = limit  # don't exceed limit
1981
1982        # Rewind _decoded_chars to just after the line ending we found.
1983        self._rewind_decoded_chars(len(line) - endpos)
1984        return line[:endpos]
1985
1986    @property
1987    def newlines(self):
1988        return self._decoder.newlines if self._decoder else None
1989
1990
1991class StringIO(TextIOWrapper):
1992    """Text I/O implementation using an in-memory buffer.
1993
1994    The initial_value argument sets the value of object.  The newline
1995    argument is like the one of TextIOWrapper's constructor.
1996    """
1997
1998    def __init__(self, initial_value="", newline="\n"):
1999        super(StringIO, self).__init__(BytesIO(),
2000                                       encoding="utf-8",
2001                                       errors="strict",
2002                                       newline=newline)
2003        # Issue #5645: make universal newlines semantics the same as in the
2004        # C version, even under Windows.
2005        if newline is None:
2006            self._writetranslate = False
2007        if initial_value:
2008            if not isinstance(initial_value, unicode):
2009                initial_value = unicode(initial_value)
2010            self.write(initial_value)
2011            self.seek(0)
2012
2013    def getvalue(self):
2014        self.flush()
2015        decoder = self._decoder or self._get_decoder()
2016        old_state = decoder.getstate()
2017        decoder.reset()
2018        try:
2019            return decoder.decode(self.buffer.getvalue(), final=True)
2020        finally:
2021            decoder.setstate(old_state)
2022
2023    def __repr__(self):
2024        # TextIOWrapper tells the encoding in its repr. In StringIO,
2025        # that's an implementation detail.
2026        return object.__repr__(self)
2027
2028    @property
2029    def errors(self):
2030        return None
2031
2032    @property
2033    def encoding(self):
2034        return None
2035
2036    def detach(self):
2037        # This doesn't make sense on StringIO.
2038        self._unsupported("detach")
2039