• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Create portable serialized representations of Python objects.
2
3See module copyreg for a mechanism for registering custom picklers.
4See module pickletools source for extensive comments.
5
6Classes:
7
8    Pickler
9    Unpickler
10
11Functions:
12
13    dump(object, file)
14    dumps(object) -> string
15    load(file) -> object
16    loads(string) -> object
17
18Misc variables:
19
20    __version__
21    format_version
22    compatible_formats
23
24"""
25
26from types import FunctionType
27from copyreg import dispatch_table
28from copyreg import _extension_registry, _inverted_registry, _extension_cache
29from itertools import islice
30from functools import partial
31import sys
32from sys import maxsize
33from struct import pack, unpack
34import re
35import io
36import codecs
37import _compat_pickle
38
39__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
40           "Unpickler", "dump", "dumps", "load", "loads"]
41
42try:
43    from _pickle import PickleBuffer
44    __all__.append("PickleBuffer")
45    _HAVE_PICKLE_BUFFER = True
46except ImportError:
47    _HAVE_PICKLE_BUFFER = False
48
49
50# Shortcut for use in isinstance testing
51bytes_types = (bytes, bytearray)
52
53# These are purely informational; no code uses these.
54format_version = "4.0"                  # File format version we write
55compatible_formats = ["1.0",            # Original protocol 0
56                      "1.1",            # Protocol 0 with INST added
57                      "1.2",            # Original protocol 1
58                      "1.3",            # Protocol 1 with BINFLOAT added
59                      "2.0",            # Protocol 2
60                      "3.0",            # Protocol 3
61                      "4.0",            # Protocol 4
62                      "5.0",            # Protocol 5
63                      ]                 # Old format versions we can read
64
65# This is the highest protocol number we know how to read.
66HIGHEST_PROTOCOL = 5
67
68# The protocol we write by default.  May be less than HIGHEST_PROTOCOL.
69# Only bump this if the oldest still supported version of Python already
70# includes it.
71DEFAULT_PROTOCOL = 4
72
73class PickleError(Exception):
74    """A common base class for the other pickling exceptions."""
75    pass
76
77class PicklingError(PickleError):
78    """This exception is raised when an unpicklable object is passed to the
79    dump() method.
80
81    """
82    pass
83
84class UnpicklingError(PickleError):
85    """This exception is raised when there is a problem unpickling an object,
86    such as a security violation.
87
88    Note that other exceptions may also be raised during unpickling, including
89    (but not necessarily limited to) AttributeError, EOFError, ImportError,
90    and IndexError.
91
92    """
93    pass
94
95# An instance of _Stop is raised by Unpickler.load_stop() in response to
96# the STOP opcode, passing the object that is the result of unpickling.
97class _Stop(Exception):
98    def __init__(self, value):
99        self.value = value
100
101# Jython has PyStringMap; it's a dict subclass with string keys
102try:
103    from org.python.core import PyStringMap
104except ImportError:
105    PyStringMap = None
106
107# Pickle opcodes.  See pickletools.py for extensive docs.  The listing
108# here is in kind-of alphabetical order of 1-character pickle code.
109# pickletools groups them by purpose.
110
111MARK           = b'('   # push special markobject on stack
112STOP           = b'.'   # every pickle ends with STOP
113POP            = b'0'   # discard topmost stack item
114POP_MARK       = b'1'   # discard stack top through topmost markobject
115DUP            = b'2'   # duplicate top stack item
116FLOAT          = b'F'   # push float object; decimal string argument
117INT            = b'I'   # push integer or bool; decimal string argument
118BININT         = b'J'   # push four-byte signed int
119BININT1        = b'K'   # push 1-byte unsigned int
120LONG           = b'L'   # push long; decimal string argument
121BININT2        = b'M'   # push 2-byte unsigned int
122NONE           = b'N'   # push None
123PERSID         = b'P'   # push persistent object; id is taken from string arg
124BINPERSID      = b'Q'   #  "       "         "  ;  "  "   "     "  stack
125REDUCE         = b'R'   # apply callable to argtuple, both on stack
126STRING         = b'S'   # push string; NL-terminated string argument
127BINSTRING      = b'T'   # push string; counted binary string argument
128SHORT_BINSTRING= b'U'   #  "     "   ;    "      "       "      " < 256 bytes
129UNICODE        = b'V'   # push Unicode string; raw-unicode-escaped'd argument
130BINUNICODE     = b'X'   #   "     "       "  ; counted UTF-8 string argument
131APPEND         = b'a'   # append stack top to list below it
132BUILD          = b'b'   # call __setstate__ or __dict__.update()
133GLOBAL         = b'c'   # push self.find_class(modname, name); 2 string args
134DICT           = b'd'   # build a dict from stack items
135EMPTY_DICT     = b'}'   # push empty dict
136APPENDS        = b'e'   # extend list on stack by topmost stack slice
137GET            = b'g'   # push item from memo on stack; index is string arg
138BINGET         = b'h'   #   "    "    "    "   "   "  ;   "    " 1-byte arg
139INST           = b'i'   # build & push class instance
140LONG_BINGET    = b'j'   # push item from memo on stack; index is 4-byte arg
141LIST           = b'l'   # build list from topmost stack items
142EMPTY_LIST     = b']'   # push empty list
143OBJ            = b'o'   # build & push class instance
144PUT            = b'p'   # store stack top in memo; index is string arg
145BINPUT         = b'q'   #   "     "    "   "   " ;   "    " 1-byte arg
146LONG_BINPUT    = b'r'   #   "     "    "   "   " ;   "    " 4-byte arg
147SETITEM        = b's'   # add key+value pair to dict
148TUPLE          = b't'   # build tuple from topmost stack items
149EMPTY_TUPLE    = b')'   # push empty tuple
150SETITEMS       = b'u'   # modify dict by adding topmost key+value pairs
151BINFLOAT       = b'G'   # push float; arg is 8-byte float encoding
152
153TRUE           = b'I01\n'  # not an opcode; see INT docs in pickletools.py
154FALSE          = b'I00\n'  # not an opcode; see INT docs in pickletools.py
155
156# Protocol 2
157
158PROTO          = b'\x80'  # identify pickle protocol
159NEWOBJ         = b'\x81'  # build object by applying cls.__new__ to argtuple
160EXT1           = b'\x82'  # push object from extension registry; 1-byte index
161EXT2           = b'\x83'  # ditto, but 2-byte index
162EXT4           = b'\x84'  # ditto, but 4-byte index
163TUPLE1         = b'\x85'  # build 1-tuple from stack top
164TUPLE2         = b'\x86'  # build 2-tuple from two topmost stack items
165TUPLE3         = b'\x87'  # build 3-tuple from three topmost stack items
166NEWTRUE        = b'\x88'  # push True
167NEWFALSE       = b'\x89'  # push False
168LONG1          = b'\x8a'  # push long from < 256 bytes
169LONG4          = b'\x8b'  # push really big long
170
171_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
172
173# Protocol 3 (Python 3.x)
174
175BINBYTES       = b'B'   # push bytes; counted binary string argument
176SHORT_BINBYTES = b'C'   #  "     "   ;    "      "       "      " < 256 bytes
177
178# Protocol 4
179
180SHORT_BINUNICODE = b'\x8c'  # push short string; UTF-8 length < 256 bytes
181BINUNICODE8      = b'\x8d'  # push very long string
182BINBYTES8        = b'\x8e'  # push very long bytes string
183EMPTY_SET        = b'\x8f'  # push empty set on the stack
184ADDITEMS         = b'\x90'  # modify set by adding topmost stack items
185FROZENSET        = b'\x91'  # build frozenset from topmost stack items
186NEWOBJ_EX        = b'\x92'  # like NEWOBJ but work with keyword only arguments
187STACK_GLOBAL     = b'\x93'  # same as GLOBAL but using names on the stacks
188MEMOIZE          = b'\x94'  # store top of the stack in memo
189FRAME            = b'\x95'  # indicate the beginning of a new frame
190
191# Protocol 5
192
193BYTEARRAY8       = b'\x96'  # push bytearray
194NEXT_BUFFER      = b'\x97'  # push next out-of-band buffer
195READONLY_BUFFER  = b'\x98'  # make top of stack readonly
196
197__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
198
199
200class _Framer:
201
202    _FRAME_SIZE_MIN = 4
203    _FRAME_SIZE_TARGET = 64 * 1024
204
205    def __init__(self, file_write):
206        self.file_write = file_write
207        self.current_frame = None
208
209    def start_framing(self):
210        self.current_frame = io.BytesIO()
211
212    def end_framing(self):
213        if self.current_frame and self.current_frame.tell() > 0:
214            self.commit_frame(force=True)
215            self.current_frame = None
216
217    def commit_frame(self, force=False):
218        if self.current_frame:
219            f = self.current_frame
220            if f.tell() >= self._FRAME_SIZE_TARGET or force:
221                data = f.getbuffer()
222                write = self.file_write
223                if len(data) >= self._FRAME_SIZE_MIN:
224                    # Issue a single call to the write method of the underlying
225                    # file object for the frame opcode with the size of the
226                    # frame. The concatenation is expected to be less expensive
227                    # than issuing an additional call to write.
228                    write(FRAME + pack("<Q", len(data)))
229
230                # Issue a separate call to write to append the frame
231                # contents without concatenation to the above to avoid a
232                # memory copy.
233                write(data)
234
235                # Start the new frame with a new io.BytesIO instance so that
236                # the file object can have delayed access to the previous frame
237                # contents via an unreleased memoryview of the previous
238                # io.BytesIO instance.
239                self.current_frame = io.BytesIO()
240
241    def write(self, data):
242        if self.current_frame:
243            return self.current_frame.write(data)
244        else:
245            return self.file_write(data)
246
247    def write_large_bytes(self, header, payload):
248        write = self.file_write
249        if self.current_frame:
250            # Terminate the current frame and flush it to the file.
251            self.commit_frame(force=True)
252
253        # Perform direct write of the header and payload of the large binary
254        # object. Be careful not to concatenate the header and the payload
255        # prior to calling 'write' as we do not want to allocate a large
256        # temporary bytes object.
257        # We intentionally do not insert a protocol 4 frame opcode to make
258        # it possible to optimize file.read calls in the loader.
259        write(header)
260        write(payload)
261
262
263class _Unframer:
264
265    def __init__(self, file_read, file_readline, file_tell=None):
266        self.file_read = file_read
267        self.file_readline = file_readline
268        self.current_frame = None
269
270    def readinto(self, buf):
271        if self.current_frame:
272            n = self.current_frame.readinto(buf)
273            if n == 0 and len(buf) != 0:
274                self.current_frame = None
275                n = len(buf)
276                buf[:] = self.file_read(n)
277                return n
278            if n < len(buf):
279                raise UnpicklingError(
280                    "pickle exhausted before end of frame")
281            return n
282        else:
283            n = len(buf)
284            buf[:] = self.file_read(n)
285            return n
286
287    def read(self, n):
288        if self.current_frame:
289            data = self.current_frame.read(n)
290            if not data and n != 0:
291                self.current_frame = None
292                return self.file_read(n)
293            if len(data) < n:
294                raise UnpicklingError(
295                    "pickle exhausted before end of frame")
296            return data
297        else:
298            return self.file_read(n)
299
300    def readline(self):
301        if self.current_frame:
302            data = self.current_frame.readline()
303            if not data:
304                self.current_frame = None
305                return self.file_readline()
306            if data[-1] != b'\n'[0]:
307                raise UnpicklingError(
308                    "pickle exhausted before end of frame")
309            return data
310        else:
311            return self.file_readline()
312
313    def load_frame(self, frame_size):
314        if self.current_frame and self.current_frame.read() != b'':
315            raise UnpicklingError(
316                "beginning of a new frame before end of current frame")
317        self.current_frame = io.BytesIO(self.file_read(frame_size))
318
319
320# Tools used for pickling.
321
322def _getattribute(obj, name):
323    for subpath in name.split('.'):
324        if subpath == '<locals>':
325            raise AttributeError("Can't get local attribute {!r} on {!r}"
326                                 .format(name, obj))
327        try:
328            parent = obj
329            obj = getattr(obj, subpath)
330        except AttributeError:
331            raise AttributeError("Can't get attribute {!r} on {!r}"
332                                 .format(name, obj)) from None
333    return obj, parent
334
335def whichmodule(obj, name):
336    """Find the module an object belong to."""
337    module_name = getattr(obj, '__module__', None)
338    if module_name is not None:
339        return module_name
340    # Protect the iteration by using a list copy of sys.modules against dynamic
341    # modules that trigger imports of other modules upon calls to getattr.
342    for module_name, module in sys.modules.copy().items():
343        if module_name == '__main__' or module is None:
344            continue
345        try:
346            if _getattribute(module, name)[0] is obj:
347                return module_name
348        except AttributeError:
349            pass
350    return '__main__'
351
352def encode_long(x):
353    r"""Encode a long to a two's complement little-endian binary string.
354    Note that 0 is a special case, returning an empty string, to save a
355    byte in the LONG1 pickling context.
356
357    >>> encode_long(0)
358    b''
359    >>> encode_long(255)
360    b'\xff\x00'
361    >>> encode_long(32767)
362    b'\xff\x7f'
363    >>> encode_long(-256)
364    b'\x00\xff'
365    >>> encode_long(-32768)
366    b'\x00\x80'
367    >>> encode_long(-128)
368    b'\x80'
369    >>> encode_long(127)
370    b'\x7f'
371    >>>
372    """
373    if x == 0:
374        return b''
375    nbytes = (x.bit_length() >> 3) + 1
376    result = x.to_bytes(nbytes, byteorder='little', signed=True)
377    if x < 0 and nbytes > 1:
378        if result[-1] == 0xff and (result[-2] & 0x80) != 0:
379            result = result[:-1]
380    return result
381
382def decode_long(data):
383    r"""Decode a long from a two's complement little-endian binary string.
384
385    >>> decode_long(b'')
386    0
387    >>> decode_long(b"\xff\x00")
388    255
389    >>> decode_long(b"\xff\x7f")
390    32767
391    >>> decode_long(b"\x00\xff")
392    -256
393    >>> decode_long(b"\x00\x80")
394    -32768
395    >>> decode_long(b"\x80")
396    -128
397    >>> decode_long(b"\x7f")
398    127
399    """
400    return int.from_bytes(data, byteorder='little', signed=True)
401
402
403# Pickling machinery
404
405class _Pickler:
406
407    def __init__(self, file, protocol=None, *, fix_imports=True,
408                 buffer_callback=None):
409        """This takes a binary file for writing a pickle data stream.
410
411        The optional *protocol* argument tells the pickler to use the
412        given protocol; supported protocols are 0, 1, 2, 3, 4 and 5.
413        The default protocol is 4. It was introduced in Python 3.4, and
414        is incompatible with previous versions.
415
416        Specifying a negative protocol version selects the highest
417        protocol version supported.  The higher the protocol used, the
418        more recent the version of Python needed to read the pickle
419        produced.
420
421        The *file* argument must have a write() method that accepts a
422        single bytes argument. It can thus be a file object opened for
423        binary writing, an io.BytesIO instance, or any other custom
424        object that meets this interface.
425
426        If *fix_imports* is True and *protocol* is less than 3, pickle
427        will try to map the new Python 3 names to the old module names
428        used in Python 2, so that the pickle data stream is readable
429        with Python 2.
430
431        If *buffer_callback* is None (the default), buffer views are
432        serialized into *file* as part of the pickle stream.
433
434        If *buffer_callback* is not None, then it can be called any number
435        of times with a buffer view.  If the callback returns a false value
436        (such as None), the given buffer is out-of-band; otherwise the
437        buffer is serialized in-band, i.e. inside the pickle stream.
438
439        It is an error if *buffer_callback* is not None and *protocol*
440        is None or smaller than 5.
441        """
442        if protocol is None:
443            protocol = DEFAULT_PROTOCOL
444        if protocol < 0:
445            protocol = HIGHEST_PROTOCOL
446        elif not 0 <= protocol <= HIGHEST_PROTOCOL:
447            raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
448        if buffer_callback is not None and protocol < 5:
449            raise ValueError("buffer_callback needs protocol >= 5")
450        self._buffer_callback = buffer_callback
451        try:
452            self._file_write = file.write
453        except AttributeError:
454            raise TypeError("file must have a 'write' attribute")
455        self.framer = _Framer(self._file_write)
456        self.write = self.framer.write
457        self._write_large_bytes = self.framer.write_large_bytes
458        self.memo = {}
459        self.proto = int(protocol)
460        self.bin = protocol >= 1
461        self.fast = 0
462        self.fix_imports = fix_imports and protocol < 3
463
464    def clear_memo(self):
465        """Clears the pickler's "memo".
466
467        The memo is the data structure that remembers which objects the
468        pickler has already seen, so that shared or recursive objects
469        are pickled by reference and not by value.  This method is
470        useful when re-using picklers.
471        """
472        self.memo.clear()
473
474    def dump(self, obj):
475        """Write a pickled representation of obj to the open file."""
476        # Check whether Pickler was initialized correctly. This is
477        # only needed to mimic the behavior of _pickle.Pickler.dump().
478        if not hasattr(self, "_file_write"):
479            raise PicklingError("Pickler.__init__() was not called by "
480                                "%s.__init__()" % (self.__class__.__name__,))
481        if self.proto >= 2:
482            self.write(PROTO + pack("<B", self.proto))
483        if self.proto >= 4:
484            self.framer.start_framing()
485        self.save(obj)
486        self.write(STOP)
487        self.framer.end_framing()
488
489    def memoize(self, obj):
490        """Store an object in the memo."""
491
492        # The Pickler memo is a dictionary mapping object ids to 2-tuples
493        # that contain the Unpickler memo key and the object being memoized.
494        # The memo key is written to the pickle and will become
495        # the key in the Unpickler's memo.  The object is stored in the
496        # Pickler memo so that transient objects are kept alive during
497        # pickling.
498
499        # The use of the Unpickler memo length as the memo key is just a
500        # convention.  The only requirement is that the memo values be unique.
501        # But there appears no advantage to any other scheme, and this
502        # scheme allows the Unpickler memo to be implemented as a plain (but
503        # growable) array, indexed by memo key.
504        if self.fast:
505            return
506        assert id(obj) not in self.memo
507        idx = len(self.memo)
508        self.write(self.put(idx))
509        self.memo[id(obj)] = idx, obj
510
511    # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
512    def put(self, idx):
513        if self.proto >= 4:
514            return MEMOIZE
515        elif self.bin:
516            if idx < 256:
517                return BINPUT + pack("<B", idx)
518            else:
519                return LONG_BINPUT + pack("<I", idx)
520        else:
521            return PUT + repr(idx).encode("ascii") + b'\n'
522
523    # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
524    def get(self, i):
525        if self.bin:
526            if i < 256:
527                return BINGET + pack("<B", i)
528            else:
529                return LONG_BINGET + pack("<I", i)
530
531        return GET + repr(i).encode("ascii") + b'\n'
532
533    def save(self, obj, save_persistent_id=True):
534        self.framer.commit_frame()
535
536        # Check for persistent id (defined by a subclass)
537        pid = self.persistent_id(obj)
538        if pid is not None and save_persistent_id:
539            self.save_pers(pid)
540            return
541
542        # Check the memo
543        x = self.memo.get(id(obj))
544        if x is not None:
545            self.write(self.get(x[0]))
546            return
547
548        rv = NotImplemented
549        reduce = getattr(self, "reducer_override", None)
550        if reduce is not None:
551            rv = reduce(obj)
552
553        if rv is NotImplemented:
554            # Check the type dispatch table
555            t = type(obj)
556            f = self.dispatch.get(t)
557            if f is not None:
558                f(self, obj)  # Call unbound method with explicit self
559                return
560
561            # Check private dispatch table if any, or else
562            # copyreg.dispatch_table
563            reduce = getattr(self, 'dispatch_table', dispatch_table).get(t)
564            if reduce is not None:
565                rv = reduce(obj)
566            else:
567                # Check for a class with a custom metaclass; treat as regular
568                # class
569                if issubclass(t, type):
570                    self.save_global(obj)
571                    return
572
573                # Check for a __reduce_ex__ method, fall back to __reduce__
574                reduce = getattr(obj, "__reduce_ex__", None)
575                if reduce is not None:
576                    rv = reduce(self.proto)
577                else:
578                    reduce = getattr(obj, "__reduce__", None)
579                    if reduce is not None:
580                        rv = reduce()
581                    else:
582                        raise PicklingError("Can't pickle %r object: %r" %
583                                            (t.__name__, obj))
584
585        # Check for string returned by reduce(), meaning "save as global"
586        if isinstance(rv, str):
587            self.save_global(obj, rv)
588            return
589
590        # Assert that reduce() returned a tuple
591        if not isinstance(rv, tuple):
592            raise PicklingError("%s must return string or tuple" % reduce)
593
594        # Assert that it returned an appropriately sized tuple
595        l = len(rv)
596        if not (2 <= l <= 6):
597            raise PicklingError("Tuple returned by %s must have "
598                                "two to six elements" % reduce)
599
600        # Save the reduce() output and finally memoize the object
601        self.save_reduce(obj=obj, *rv)
602
603    def persistent_id(self, obj):
604        # This exists so a subclass can override it
605        return None
606
607    def save_pers(self, pid):
608        # Save a persistent id reference
609        if self.bin:
610            self.save(pid, save_persistent_id=False)
611            self.write(BINPERSID)
612        else:
613            try:
614                self.write(PERSID + str(pid).encode("ascii") + b'\n')
615            except UnicodeEncodeError:
616                raise PicklingError(
617                    "persistent IDs in protocol 0 must be ASCII strings")
618
619    def save_reduce(self, func, args, state=None, listitems=None,
620                    dictitems=None, state_setter=None, obj=None):
621        # This API is called by some subclasses
622
623        if not isinstance(args, tuple):
624            raise PicklingError("args from save_reduce() must be a tuple")
625        if not callable(func):
626            raise PicklingError("func from save_reduce() must be callable")
627
628        save = self.save
629        write = self.write
630
631        func_name = getattr(func, "__name__", "")
632        if self.proto >= 2 and func_name == "__newobj_ex__":
633            cls, args, kwargs = args
634            if not hasattr(cls, "__new__"):
635                raise PicklingError("args[0] from {} args has no __new__"
636                                    .format(func_name))
637            if obj is not None and cls is not obj.__class__:
638                raise PicklingError("args[0] from {} args has the wrong class"
639                                    .format(func_name))
640            if self.proto >= 4:
641                save(cls)
642                save(args)
643                save(kwargs)
644                write(NEWOBJ_EX)
645            else:
646                func = partial(cls.__new__, cls, *args, **kwargs)
647                save(func)
648                save(())
649                write(REDUCE)
650        elif self.proto >= 2 and func_name == "__newobj__":
651            # A __reduce__ implementation can direct protocol 2 or newer to
652            # use the more efficient NEWOBJ opcode, while still
653            # allowing protocol 0 and 1 to work normally.  For this to
654            # work, the function returned by __reduce__ should be
655            # called __newobj__, and its first argument should be a
656            # class.  The implementation for __newobj__
657            # should be as follows, although pickle has no way to
658            # verify this:
659            #
660            # def __newobj__(cls, *args):
661            #     return cls.__new__(cls, *args)
662            #
663            # Protocols 0 and 1 will pickle a reference to __newobj__,
664            # while protocol 2 (and above) will pickle a reference to
665            # cls, the remaining args tuple, and the NEWOBJ code,
666            # which calls cls.__new__(cls, *args) at unpickling time
667            # (see load_newobj below).  If __reduce__ returns a
668            # three-tuple, the state from the third tuple item will be
669            # pickled regardless of the protocol, calling __setstate__
670            # at unpickling time (see load_build below).
671            #
672            # Note that no standard __newobj__ implementation exists;
673            # you have to provide your own.  This is to enforce
674            # compatibility with Python 2.2 (pickles written using
675            # protocol 0 or 1 in Python 2.3 should be unpicklable by
676            # Python 2.2).
677            cls = args[0]
678            if not hasattr(cls, "__new__"):
679                raise PicklingError(
680                    "args[0] from __newobj__ args has no __new__")
681            if obj is not None and cls is not obj.__class__:
682                raise PicklingError(
683                    "args[0] from __newobj__ args has the wrong class")
684            args = args[1:]
685            save(cls)
686            save(args)
687            write(NEWOBJ)
688        else:
689            save(func)
690            save(args)
691            write(REDUCE)
692
693        if obj is not None:
694            # If the object is already in the memo, this means it is
695            # recursive. In this case, throw away everything we put on the
696            # stack, and fetch the object back from the memo.
697            if id(obj) in self.memo:
698                write(POP + self.get(self.memo[id(obj)][0]))
699            else:
700                self.memoize(obj)
701
702        # More new special cases (that work with older protocols as
703        # well): when __reduce__ returns a tuple with 4 or 5 items,
704        # the 4th and 5th item should be iterators that provide list
705        # items and dict items (as (key, value) tuples), or None.
706
707        if listitems is not None:
708            self._batch_appends(listitems)
709
710        if dictitems is not None:
711            self._batch_setitems(dictitems)
712
713        if state is not None:
714            if state_setter is None:
715                save(state)
716                write(BUILD)
717            else:
718                # If a state_setter is specified, call it instead of load_build
719                # to update obj's with its previous state.
720                # First, push state_setter and its tuple of expected arguments
721                # (obj, state) onto the stack.
722                save(state_setter)
723                save(obj)  # simple BINGET opcode as obj is already memoized.
724                save(state)
725                write(TUPLE2)
726                # Trigger a state_setter(obj, state) function call.
727                write(REDUCE)
728                # The purpose of state_setter is to carry-out an
729                # inplace modification of obj. We do not care about what the
730                # method might return, so its output is eventually removed from
731                # the stack.
732                write(POP)
733
734    # Methods below this point are dispatched through the dispatch table
735
736    dispatch = {}
737
738    def save_none(self, obj):
739        self.write(NONE)
740    dispatch[type(None)] = save_none
741
742    def save_bool(self, obj):
743        if self.proto >= 2:
744            self.write(NEWTRUE if obj else NEWFALSE)
745        else:
746            self.write(TRUE if obj else FALSE)
747    dispatch[bool] = save_bool
748
749    def save_long(self, obj):
750        if self.bin:
751            # If the int is small enough to fit in a signed 4-byte 2's-comp
752            # format, we can store it more efficiently than the general
753            # case.
754            # First one- and two-byte unsigned ints:
755            if obj >= 0:
756                if obj <= 0xff:
757                    self.write(BININT1 + pack("<B", obj))
758                    return
759                if obj <= 0xffff:
760                    self.write(BININT2 + pack("<H", obj))
761                    return
762            # Next check for 4-byte signed ints:
763            if -0x80000000 <= obj <= 0x7fffffff:
764                self.write(BININT + pack("<i", obj))
765                return
766        if self.proto >= 2:
767            encoded = encode_long(obj)
768            n = len(encoded)
769            if n < 256:
770                self.write(LONG1 + pack("<B", n) + encoded)
771            else:
772                self.write(LONG4 + pack("<i", n) + encoded)
773            return
774        if -0x80000000 <= obj <= 0x7fffffff:
775            self.write(INT + repr(obj).encode("ascii") + b'\n')
776        else:
777            self.write(LONG + repr(obj).encode("ascii") + b'L\n')
778    dispatch[int] = save_long
779
780    def save_float(self, obj):
781        if self.bin:
782            self.write(BINFLOAT + pack('>d', obj))
783        else:
784            self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
785    dispatch[float] = save_float
786
787    def save_bytes(self, obj):
788        if self.proto < 3:
789            if not obj: # bytes object is empty
790                self.save_reduce(bytes, (), obj=obj)
791            else:
792                self.save_reduce(codecs.encode,
793                                 (str(obj, 'latin1'), 'latin1'), obj=obj)
794            return
795        n = len(obj)
796        if n <= 0xff:
797            self.write(SHORT_BINBYTES + pack("<B", n) + obj)
798        elif n > 0xffffffff and self.proto >= 4:
799            self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
800        elif n >= self.framer._FRAME_SIZE_TARGET:
801            self._write_large_bytes(BINBYTES + pack("<I", n), obj)
802        else:
803            self.write(BINBYTES + pack("<I", n) + obj)
804        self.memoize(obj)
805    dispatch[bytes] = save_bytes
806
807    def save_bytearray(self, obj):
808        if self.proto < 5:
809            if not obj:  # bytearray is empty
810                self.save_reduce(bytearray, (), obj=obj)
811            else:
812                self.save_reduce(bytearray, (bytes(obj),), obj=obj)
813            return
814        n = len(obj)
815        if n >= self.framer._FRAME_SIZE_TARGET:
816            self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
817        else:
818            self.write(BYTEARRAY8 + pack("<Q", n) + obj)
819    dispatch[bytearray] = save_bytearray
820
821    if _HAVE_PICKLE_BUFFER:
822        def save_picklebuffer(self, obj):
823            if self.proto < 5:
824                raise PicklingError("PickleBuffer can only pickled with "
825                                    "protocol >= 5")
826            with obj.raw() as m:
827                if not m.contiguous:
828                    raise PicklingError("PickleBuffer can not be pickled when "
829                                        "pointing to a non-contiguous buffer")
830                in_band = True
831                if self._buffer_callback is not None:
832                    in_band = bool(self._buffer_callback(obj))
833                if in_band:
834                    # Write data in-band
835                    # XXX The C implementation avoids a copy here
836                    if m.readonly:
837                        self.save_bytes(m.tobytes())
838                    else:
839                        self.save_bytearray(m.tobytes())
840                else:
841                    # Write data out-of-band
842                    self.write(NEXT_BUFFER)
843                    if m.readonly:
844                        self.write(READONLY_BUFFER)
845
846        dispatch[PickleBuffer] = save_picklebuffer
847
848    def save_str(self, obj):
849        if self.bin:
850            encoded = obj.encode('utf-8', 'surrogatepass')
851            n = len(encoded)
852            if n <= 0xff and self.proto >= 4:
853                self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
854            elif n > 0xffffffff and self.proto >= 4:
855                self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
856            elif n >= self.framer._FRAME_SIZE_TARGET:
857                self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
858            else:
859                self.write(BINUNICODE + pack("<I", n) + encoded)
860        else:
861            obj = obj.replace("\\", "\\u005c")
862            obj = obj.replace("\0", "\\u0000")
863            obj = obj.replace("\n", "\\u000a")
864            obj = obj.replace("\r", "\\u000d")
865            obj = obj.replace("\x1a", "\\u001a")  # EOF on DOS
866            self.write(UNICODE + obj.encode('raw-unicode-escape') +
867                       b'\n')
868        self.memoize(obj)
869    dispatch[str] = save_str
870
871    def save_tuple(self, obj):
872        if not obj: # tuple is empty
873            if self.bin:
874                self.write(EMPTY_TUPLE)
875            else:
876                self.write(MARK + TUPLE)
877            return
878
879        n = len(obj)
880        save = self.save
881        memo = self.memo
882        if n <= 3 and self.proto >= 2:
883            for element in obj:
884                save(element)
885            # Subtle.  Same as in the big comment below.
886            if id(obj) in memo:
887                get = self.get(memo[id(obj)][0])
888                self.write(POP * n + get)
889            else:
890                self.write(_tuplesize2code[n])
891                self.memoize(obj)
892            return
893
894        # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
895        # has more than 3 elements.
896        write = self.write
897        write(MARK)
898        for element in obj:
899            save(element)
900
901        if id(obj) in memo:
902            # Subtle.  d was not in memo when we entered save_tuple(), so
903            # the process of saving the tuple's elements must have saved
904            # the tuple itself:  the tuple is recursive.  The proper action
905            # now is to throw away everything we put on the stack, and
906            # simply GET the tuple (it's already constructed).  This check
907            # could have been done in the "for element" loop instead, but
908            # recursive tuples are a rare thing.
909            get = self.get(memo[id(obj)][0])
910            if self.bin:
911                write(POP_MARK + get)
912            else:   # proto 0 -- POP_MARK not available
913                write(POP * (n+1) + get)
914            return
915
916        # No recursion.
917        write(TUPLE)
918        self.memoize(obj)
919
920    dispatch[tuple] = save_tuple
921
922    def save_list(self, obj):
923        if self.bin:
924            self.write(EMPTY_LIST)
925        else:   # proto 0 -- can't use EMPTY_LIST
926            self.write(MARK + LIST)
927
928        self.memoize(obj)
929        self._batch_appends(obj)
930
931    dispatch[list] = save_list
932
933    _BATCHSIZE = 1000
934
935    def _batch_appends(self, items):
936        # Helper to batch up APPENDS sequences
937        save = self.save
938        write = self.write
939
940        if not self.bin:
941            for x in items:
942                save(x)
943                write(APPEND)
944            return
945
946        it = iter(items)
947        while True:
948            tmp = list(islice(it, self._BATCHSIZE))
949            n = len(tmp)
950            if n > 1:
951                write(MARK)
952                for x in tmp:
953                    save(x)
954                write(APPENDS)
955            elif n:
956                save(tmp[0])
957                write(APPEND)
958            # else tmp is empty, and we're done
959            if n < self._BATCHSIZE:
960                return
961
962    def save_dict(self, obj):
963        if self.bin:
964            self.write(EMPTY_DICT)
965        else:   # proto 0 -- can't use EMPTY_DICT
966            self.write(MARK + DICT)
967
968        self.memoize(obj)
969        self._batch_setitems(obj.items())
970
971    dispatch[dict] = save_dict
972    if PyStringMap is not None:
973        dispatch[PyStringMap] = save_dict
974
975    def _batch_setitems(self, items):
976        # Helper to batch up SETITEMS sequences; proto >= 1 only
977        save = self.save
978        write = self.write
979
980        if not self.bin:
981            for k, v in items:
982                save(k)
983                save(v)
984                write(SETITEM)
985            return
986
987        it = iter(items)
988        while True:
989            tmp = list(islice(it, self._BATCHSIZE))
990            n = len(tmp)
991            if n > 1:
992                write(MARK)
993                for k, v in tmp:
994                    save(k)
995                    save(v)
996                write(SETITEMS)
997            elif n:
998                k, v = tmp[0]
999                save(k)
1000                save(v)
1001                write(SETITEM)
1002            # else tmp is empty, and we're done
1003            if n < self._BATCHSIZE:
1004                return
1005
1006    def save_set(self, obj):
1007        save = self.save
1008        write = self.write
1009
1010        if self.proto < 4:
1011            self.save_reduce(set, (list(obj),), obj=obj)
1012            return
1013
1014        write(EMPTY_SET)
1015        self.memoize(obj)
1016
1017        it = iter(obj)
1018        while True:
1019            batch = list(islice(it, self._BATCHSIZE))
1020            n = len(batch)
1021            if n > 0:
1022                write(MARK)
1023                for item in batch:
1024                    save(item)
1025                write(ADDITEMS)
1026            if n < self._BATCHSIZE:
1027                return
1028    dispatch[set] = save_set
1029
1030    def save_frozenset(self, obj):
1031        save = self.save
1032        write = self.write
1033
1034        if self.proto < 4:
1035            self.save_reduce(frozenset, (list(obj),), obj=obj)
1036            return
1037
1038        write(MARK)
1039        for item in obj:
1040            save(item)
1041
1042        if id(obj) in self.memo:
1043            # If the object is already in the memo, this means it is
1044            # recursive. In this case, throw away everything we put on the
1045            # stack, and fetch the object back from the memo.
1046            write(POP_MARK + self.get(self.memo[id(obj)][0]))
1047            return
1048
1049        write(FROZENSET)
1050        self.memoize(obj)
1051    dispatch[frozenset] = save_frozenset
1052
1053    def save_global(self, obj, name=None):
1054        write = self.write
1055        memo = self.memo
1056
1057        if name is None:
1058            name = getattr(obj, '__qualname__', None)
1059        if name is None:
1060            name = obj.__name__
1061
1062        module_name = whichmodule(obj, name)
1063        try:
1064            __import__(module_name, level=0)
1065            module = sys.modules[module_name]
1066            obj2, parent = _getattribute(module, name)
1067        except (ImportError, KeyError, AttributeError):
1068            raise PicklingError(
1069                "Can't pickle %r: it's not found as %s.%s" %
1070                (obj, module_name, name)) from None
1071        else:
1072            if obj2 is not obj:
1073                raise PicklingError(
1074                    "Can't pickle %r: it's not the same object as %s.%s" %
1075                    (obj, module_name, name))
1076
1077        if self.proto >= 2:
1078            code = _extension_registry.get((module_name, name))
1079            if code:
1080                assert code > 0
1081                if code <= 0xff:
1082                    write(EXT1 + pack("<B", code))
1083                elif code <= 0xffff:
1084                    write(EXT2 + pack("<H", code))
1085                else:
1086                    write(EXT4 + pack("<i", code))
1087                return
1088        lastname = name.rpartition('.')[2]
1089        if parent is module:
1090            name = lastname
1091        # Non-ASCII identifiers are supported only with protocols >= 3.
1092        if self.proto >= 4:
1093            self.save(module_name)
1094            self.save(name)
1095            write(STACK_GLOBAL)
1096        elif parent is not module:
1097            self.save_reduce(getattr, (parent, lastname))
1098        elif self.proto >= 3:
1099            write(GLOBAL + bytes(module_name, "utf-8") + b'\n' +
1100                  bytes(name, "utf-8") + b'\n')
1101        else:
1102            if self.fix_imports:
1103                r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING
1104                r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING
1105                if (module_name, name) in r_name_mapping:
1106                    module_name, name = r_name_mapping[(module_name, name)]
1107                elif module_name in r_import_mapping:
1108                    module_name = r_import_mapping[module_name]
1109            try:
1110                write(GLOBAL + bytes(module_name, "ascii") + b'\n' +
1111                      bytes(name, "ascii") + b'\n')
1112            except UnicodeEncodeError:
1113                raise PicklingError(
1114                    "can't pickle global identifier '%s.%s' using "
1115                    "pickle protocol %i" % (module, name, self.proto)) from None
1116
1117        self.memoize(obj)
1118
1119    def save_type(self, obj):
1120        if obj is type(None):
1121            return self.save_reduce(type, (None,), obj=obj)
1122        elif obj is type(NotImplemented):
1123            return self.save_reduce(type, (NotImplemented,), obj=obj)
1124        elif obj is type(...):
1125            return self.save_reduce(type, (...,), obj=obj)
1126        return self.save_global(obj)
1127
1128    dispatch[FunctionType] = save_global
1129    dispatch[type] = save_type
1130
1131
1132# Unpickling machinery
1133
1134class _Unpickler:
1135
1136    def __init__(self, file, *, fix_imports=True,
1137                 encoding="ASCII", errors="strict", buffers=None):
1138        """This takes a binary file for reading a pickle data stream.
1139
1140        The protocol version of the pickle is detected automatically, so
1141        no proto argument is needed.
1142
1143        The argument *file* must have two methods, a read() method that
1144        takes an integer argument, and a readline() method that requires
1145        no arguments.  Both methods should return bytes.  Thus *file*
1146        can be a binary file object opened for reading, an io.BytesIO
1147        object, or any other custom object that meets this interface.
1148
1149        The file-like object must have two methods, a read() method
1150        that takes an integer argument, and a readline() method that
1151        requires no arguments.  Both methods should return bytes.
1152        Thus file-like object can be a binary file object opened for
1153        reading, a BytesIO object, or any other custom object that
1154        meets this interface.
1155
1156        If *buffers* is not None, it should be an iterable of buffer-enabled
1157        objects that is consumed each time the pickle stream references
1158        an out-of-band buffer view.  Such buffers have been given in order
1159        to the *buffer_callback* of a Pickler object.
1160
1161        If *buffers* is None (the default), then the buffers are taken
1162        from the pickle stream, assuming they are serialized there.
1163        It is an error for *buffers* to be None if the pickle stream
1164        was produced with a non-None *buffer_callback*.
1165
1166        Other optional arguments are *fix_imports*, *encoding* and
1167        *errors*, which are used to control compatibility support for
1168        pickle stream generated by Python 2.  If *fix_imports* is True,
1169        pickle will try to map the old Python 2 names to the new names
1170        used in Python 3.  The *encoding* and *errors* tell pickle how
1171        to decode 8-bit string instances pickled by Python 2; these
1172        default to 'ASCII' and 'strict', respectively. *encoding* can be
1173        'bytes' to read theses 8-bit string instances as bytes objects.
1174        """
1175        self._buffers = iter(buffers) if buffers is not None else None
1176        self._file_readline = file.readline
1177        self._file_read = file.read
1178        self.memo = {}
1179        self.encoding = encoding
1180        self.errors = errors
1181        self.proto = 0
1182        self.fix_imports = fix_imports
1183
1184    def load(self):
1185        """Read a pickled object representation from the open file.
1186
1187        Return the reconstituted object hierarchy specified in the file.
1188        """
1189        # Check whether Unpickler was initialized correctly. This is
1190        # only needed to mimic the behavior of _pickle.Unpickler.dump().
1191        if not hasattr(self, "_file_read"):
1192            raise UnpicklingError("Unpickler.__init__() was not called by "
1193                                  "%s.__init__()" % (self.__class__.__name__,))
1194        self._unframer = _Unframer(self._file_read, self._file_readline)
1195        self.read = self._unframer.read
1196        self.readinto = self._unframer.readinto
1197        self.readline = self._unframer.readline
1198        self.metastack = []
1199        self.stack = []
1200        self.append = self.stack.append
1201        self.proto = 0
1202        read = self.read
1203        dispatch = self.dispatch
1204        try:
1205            while True:
1206                key = read(1)
1207                if not key:
1208                    raise EOFError
1209                assert isinstance(key, bytes_types)
1210                dispatch[key[0]](self)
1211        except _Stop as stopinst:
1212            return stopinst.value
1213
1214    # Return a list of items pushed in the stack after last MARK instruction.
1215    def pop_mark(self):
1216        items = self.stack
1217        self.stack = self.metastack.pop()
1218        self.append = self.stack.append
1219        return items
1220
1221    def persistent_load(self, pid):
1222        raise UnpicklingError("unsupported persistent id encountered")
1223
1224    dispatch = {}
1225
1226    def load_proto(self):
1227        proto = self.read(1)[0]
1228        if not 0 <= proto <= HIGHEST_PROTOCOL:
1229            raise ValueError("unsupported pickle protocol: %d" % proto)
1230        self.proto = proto
1231    dispatch[PROTO[0]] = load_proto
1232
1233    def load_frame(self):
1234        frame_size, = unpack('<Q', self.read(8))
1235        if frame_size > sys.maxsize:
1236            raise ValueError("frame size > sys.maxsize: %d" % frame_size)
1237        self._unframer.load_frame(frame_size)
1238    dispatch[FRAME[0]] = load_frame
1239
1240    def load_persid(self):
1241        try:
1242            pid = self.readline()[:-1].decode("ascii")
1243        except UnicodeDecodeError:
1244            raise UnpicklingError(
1245                "persistent IDs in protocol 0 must be ASCII strings")
1246        self.append(self.persistent_load(pid))
1247    dispatch[PERSID[0]] = load_persid
1248
1249    def load_binpersid(self):
1250        pid = self.stack.pop()
1251        self.append(self.persistent_load(pid))
1252    dispatch[BINPERSID[0]] = load_binpersid
1253
1254    def load_none(self):
1255        self.append(None)
1256    dispatch[NONE[0]] = load_none
1257
1258    def load_false(self):
1259        self.append(False)
1260    dispatch[NEWFALSE[0]] = load_false
1261
1262    def load_true(self):
1263        self.append(True)
1264    dispatch[NEWTRUE[0]] = load_true
1265
1266    def load_int(self):
1267        data = self.readline()
1268        if data == FALSE[1:]:
1269            val = False
1270        elif data == TRUE[1:]:
1271            val = True
1272        else:
1273            val = int(data, 0)
1274        self.append(val)
1275    dispatch[INT[0]] = load_int
1276
1277    def load_binint(self):
1278        self.append(unpack('<i', self.read(4))[0])
1279    dispatch[BININT[0]] = load_binint
1280
1281    def load_binint1(self):
1282        self.append(self.read(1)[0])
1283    dispatch[BININT1[0]] = load_binint1
1284
1285    def load_binint2(self):
1286        self.append(unpack('<H', self.read(2))[0])
1287    dispatch[BININT2[0]] = load_binint2
1288
1289    def load_long(self):
1290        val = self.readline()[:-1]
1291        if val and val[-1] == b'L'[0]:
1292            val = val[:-1]
1293        self.append(int(val, 0))
1294    dispatch[LONG[0]] = load_long
1295
1296    def load_long1(self):
1297        n = self.read(1)[0]
1298        data = self.read(n)
1299        self.append(decode_long(data))
1300    dispatch[LONG1[0]] = load_long1
1301
1302    def load_long4(self):
1303        n, = unpack('<i', self.read(4))
1304        if n < 0:
1305            # Corrupt or hostile pickle -- we never write one like this
1306            raise UnpicklingError("LONG pickle has negative byte count")
1307        data = self.read(n)
1308        self.append(decode_long(data))
1309    dispatch[LONG4[0]] = load_long4
1310
1311    def load_float(self):
1312        self.append(float(self.readline()[:-1]))
1313    dispatch[FLOAT[0]] = load_float
1314
1315    def load_binfloat(self):
1316        self.append(unpack('>d', self.read(8))[0])
1317    dispatch[BINFLOAT[0]] = load_binfloat
1318
1319    def _decode_string(self, value):
1320        # Used to allow strings from Python 2 to be decoded either as
1321        # bytes or Unicode strings.  This should be used only with the
1322        # STRING, BINSTRING and SHORT_BINSTRING opcodes.
1323        if self.encoding == "bytes":
1324            return value
1325        else:
1326            return value.decode(self.encoding, self.errors)
1327
1328    def load_string(self):
1329        data = self.readline()[:-1]
1330        # Strip outermost quotes
1331        if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'':
1332            data = data[1:-1]
1333        else:
1334            raise UnpicklingError("the STRING opcode argument must be quoted")
1335        self.append(self._decode_string(codecs.escape_decode(data)[0]))
1336    dispatch[STRING[0]] = load_string
1337
1338    def load_binstring(self):
1339        # Deprecated BINSTRING uses signed 32-bit length
1340        len, = unpack('<i', self.read(4))
1341        if len < 0:
1342            raise UnpicklingError("BINSTRING pickle has negative byte count")
1343        data = self.read(len)
1344        self.append(self._decode_string(data))
1345    dispatch[BINSTRING[0]] = load_binstring
1346
1347    def load_binbytes(self):
1348        len, = unpack('<I', self.read(4))
1349        if len > maxsize:
1350            raise UnpicklingError("BINBYTES exceeds system's maximum size "
1351                                  "of %d bytes" % maxsize)
1352        self.append(self.read(len))
1353    dispatch[BINBYTES[0]] = load_binbytes
1354
1355    def load_unicode(self):
1356        self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
1357    dispatch[UNICODE[0]] = load_unicode
1358
1359    def load_binunicode(self):
1360        len, = unpack('<I', self.read(4))
1361        if len > maxsize:
1362            raise UnpicklingError("BINUNICODE exceeds system's maximum size "
1363                                  "of %d bytes" % maxsize)
1364        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1365    dispatch[BINUNICODE[0]] = load_binunicode
1366
1367    def load_binunicode8(self):
1368        len, = unpack('<Q', self.read(8))
1369        if len > maxsize:
1370            raise UnpicklingError("BINUNICODE8 exceeds system's maximum size "
1371                                  "of %d bytes" % maxsize)
1372        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1373    dispatch[BINUNICODE8[0]] = load_binunicode8
1374
1375    def load_binbytes8(self):
1376        len, = unpack('<Q', self.read(8))
1377        if len > maxsize:
1378            raise UnpicklingError("BINBYTES8 exceeds system's maximum size "
1379                                  "of %d bytes" % maxsize)
1380        self.append(self.read(len))
1381    dispatch[BINBYTES8[0]] = load_binbytes8
1382
1383    def load_bytearray8(self):
1384        len, = unpack('<Q', self.read(8))
1385        if len > maxsize:
1386            raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
1387                                  "of %d bytes" % maxsize)
1388        b = bytearray(len)
1389        self.readinto(b)
1390        self.append(b)
1391    dispatch[BYTEARRAY8[0]] = load_bytearray8
1392
1393    def load_next_buffer(self):
1394        if self._buffers is None:
1395            raise UnpicklingError("pickle stream refers to out-of-band data "
1396                                  "but no *buffers* argument was given")
1397        try:
1398            buf = next(self._buffers)
1399        except StopIteration:
1400            raise UnpicklingError("not enough out-of-band buffers")
1401        self.append(buf)
1402    dispatch[NEXT_BUFFER[0]] = load_next_buffer
1403
1404    def load_readonly_buffer(self):
1405        buf = self.stack[-1]
1406        with memoryview(buf) as m:
1407            if not m.readonly:
1408                self.stack[-1] = m.toreadonly()
1409    dispatch[READONLY_BUFFER[0]] = load_readonly_buffer
1410
1411    def load_short_binstring(self):
1412        len = self.read(1)[0]
1413        data = self.read(len)
1414        self.append(self._decode_string(data))
1415    dispatch[SHORT_BINSTRING[0]] = load_short_binstring
1416
1417    def load_short_binbytes(self):
1418        len = self.read(1)[0]
1419        self.append(self.read(len))
1420    dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
1421
1422    def load_short_binunicode(self):
1423        len = self.read(1)[0]
1424        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1425    dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
1426
1427    def load_tuple(self):
1428        items = self.pop_mark()
1429        self.append(tuple(items))
1430    dispatch[TUPLE[0]] = load_tuple
1431
1432    def load_empty_tuple(self):
1433        self.append(())
1434    dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
1435
1436    def load_tuple1(self):
1437        self.stack[-1] = (self.stack[-1],)
1438    dispatch[TUPLE1[0]] = load_tuple1
1439
1440    def load_tuple2(self):
1441        self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1442    dispatch[TUPLE2[0]] = load_tuple2
1443
1444    def load_tuple3(self):
1445        self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1446    dispatch[TUPLE3[0]] = load_tuple3
1447
1448    def load_empty_list(self):
1449        self.append([])
1450    dispatch[EMPTY_LIST[0]] = load_empty_list
1451
1452    def load_empty_dictionary(self):
1453        self.append({})
1454    dispatch[EMPTY_DICT[0]] = load_empty_dictionary
1455
1456    def load_empty_set(self):
1457        self.append(set())
1458    dispatch[EMPTY_SET[0]] = load_empty_set
1459
1460    def load_frozenset(self):
1461        items = self.pop_mark()
1462        self.append(frozenset(items))
1463    dispatch[FROZENSET[0]] = load_frozenset
1464
1465    def load_list(self):
1466        items = self.pop_mark()
1467        self.append(items)
1468    dispatch[LIST[0]] = load_list
1469
1470    def load_dict(self):
1471        items = self.pop_mark()
1472        d = {items[i]: items[i+1]
1473             for i in range(0, len(items), 2)}
1474        self.append(d)
1475    dispatch[DICT[0]] = load_dict
1476
1477    # INST and OBJ differ only in how they get a class object.  It's not
1478    # only sensible to do the rest in a common routine, the two routines
1479    # previously diverged and grew different bugs.
1480    # klass is the class to instantiate, and k points to the topmost mark
1481    # object, following which are the arguments for klass.__init__.
1482    def _instantiate(self, klass, args):
1483        if (args or not isinstance(klass, type) or
1484            hasattr(klass, "__getinitargs__")):
1485            try:
1486                value = klass(*args)
1487            except TypeError as err:
1488                raise TypeError("in constructor for %s: %s" %
1489                                (klass.__name__, str(err)), sys.exc_info()[2])
1490        else:
1491            value = klass.__new__(klass)
1492        self.append(value)
1493
1494    def load_inst(self):
1495        module = self.readline()[:-1].decode("ascii")
1496        name = self.readline()[:-1].decode("ascii")
1497        klass = self.find_class(module, name)
1498        self._instantiate(klass, self.pop_mark())
1499    dispatch[INST[0]] = load_inst
1500
1501    def load_obj(self):
1502        # Stack is ... markobject classobject arg1 arg2 ...
1503        args = self.pop_mark()
1504        cls = args.pop(0)
1505        self._instantiate(cls, args)
1506    dispatch[OBJ[0]] = load_obj
1507
1508    def load_newobj(self):
1509        args = self.stack.pop()
1510        cls = self.stack.pop()
1511        obj = cls.__new__(cls, *args)
1512        self.append(obj)
1513    dispatch[NEWOBJ[0]] = load_newobj
1514
1515    def load_newobj_ex(self):
1516        kwargs = self.stack.pop()
1517        args = self.stack.pop()
1518        cls = self.stack.pop()
1519        obj = cls.__new__(cls, *args, **kwargs)
1520        self.append(obj)
1521    dispatch[NEWOBJ_EX[0]] = load_newobj_ex
1522
1523    def load_global(self):
1524        module = self.readline()[:-1].decode("utf-8")
1525        name = self.readline()[:-1].decode("utf-8")
1526        klass = self.find_class(module, name)
1527        self.append(klass)
1528    dispatch[GLOBAL[0]] = load_global
1529
1530    def load_stack_global(self):
1531        name = self.stack.pop()
1532        module = self.stack.pop()
1533        if type(name) is not str or type(module) is not str:
1534            raise UnpicklingError("STACK_GLOBAL requires str")
1535        self.append(self.find_class(module, name))
1536    dispatch[STACK_GLOBAL[0]] = load_stack_global
1537
1538    def load_ext1(self):
1539        code = self.read(1)[0]
1540        self.get_extension(code)
1541    dispatch[EXT1[0]] = load_ext1
1542
1543    def load_ext2(self):
1544        code, = unpack('<H', self.read(2))
1545        self.get_extension(code)
1546    dispatch[EXT2[0]] = load_ext2
1547
1548    def load_ext4(self):
1549        code, = unpack('<i', self.read(4))
1550        self.get_extension(code)
1551    dispatch[EXT4[0]] = load_ext4
1552
1553    def get_extension(self, code):
1554        nil = []
1555        obj = _extension_cache.get(code, nil)
1556        if obj is not nil:
1557            self.append(obj)
1558            return
1559        key = _inverted_registry.get(code)
1560        if not key:
1561            if code <= 0: # note that 0 is forbidden
1562                # Corrupt or hostile pickle.
1563                raise UnpicklingError("EXT specifies code <= 0")
1564            raise ValueError("unregistered extension code %d" % code)
1565        obj = self.find_class(*key)
1566        _extension_cache[code] = obj
1567        self.append(obj)
1568
1569    def find_class(self, module, name):
1570        # Subclasses may override this.
1571        sys.audit('pickle.find_class', module, name)
1572        if self.proto < 3 and self.fix_imports:
1573            if (module, name) in _compat_pickle.NAME_MAPPING:
1574                module, name = _compat_pickle.NAME_MAPPING[(module, name)]
1575            elif module in _compat_pickle.IMPORT_MAPPING:
1576                module = _compat_pickle.IMPORT_MAPPING[module]
1577        __import__(module, level=0)
1578        if self.proto >= 4:
1579            return _getattribute(sys.modules[module], name)[0]
1580        else:
1581            return getattr(sys.modules[module], name)
1582
1583    def load_reduce(self):
1584        stack = self.stack
1585        args = stack.pop()
1586        func = stack[-1]
1587        stack[-1] = func(*args)
1588    dispatch[REDUCE[0]] = load_reduce
1589
1590    def load_pop(self):
1591        if self.stack:
1592            del self.stack[-1]
1593        else:
1594            self.pop_mark()
1595    dispatch[POP[0]] = load_pop
1596
1597    def load_pop_mark(self):
1598        self.pop_mark()
1599    dispatch[POP_MARK[0]] = load_pop_mark
1600
1601    def load_dup(self):
1602        self.append(self.stack[-1])
1603    dispatch[DUP[0]] = load_dup
1604
1605    def load_get(self):
1606        i = int(self.readline()[:-1])
1607        self.append(self.memo[i])
1608    dispatch[GET[0]] = load_get
1609
1610    def load_binget(self):
1611        i = self.read(1)[0]
1612        self.append(self.memo[i])
1613    dispatch[BINGET[0]] = load_binget
1614
1615    def load_long_binget(self):
1616        i, = unpack('<I', self.read(4))
1617        self.append(self.memo[i])
1618    dispatch[LONG_BINGET[0]] = load_long_binget
1619
1620    def load_put(self):
1621        i = int(self.readline()[:-1])
1622        if i < 0:
1623            raise ValueError("negative PUT argument")
1624        self.memo[i] = self.stack[-1]
1625    dispatch[PUT[0]] = load_put
1626
1627    def load_binput(self):
1628        i = self.read(1)[0]
1629        if i < 0:
1630            raise ValueError("negative BINPUT argument")
1631        self.memo[i] = self.stack[-1]
1632    dispatch[BINPUT[0]] = load_binput
1633
1634    def load_long_binput(self):
1635        i, = unpack('<I', self.read(4))
1636        if i > maxsize:
1637            raise ValueError("negative LONG_BINPUT argument")
1638        self.memo[i] = self.stack[-1]
1639    dispatch[LONG_BINPUT[0]] = load_long_binput
1640
1641    def load_memoize(self):
1642        memo = self.memo
1643        memo[len(memo)] = self.stack[-1]
1644    dispatch[MEMOIZE[0]] = load_memoize
1645
1646    def load_append(self):
1647        stack = self.stack
1648        value = stack.pop()
1649        list = stack[-1]
1650        list.append(value)
1651    dispatch[APPEND[0]] = load_append
1652
1653    def load_appends(self):
1654        items = self.pop_mark()
1655        list_obj = self.stack[-1]
1656        try:
1657            extend = list_obj.extend
1658        except AttributeError:
1659            pass
1660        else:
1661            extend(items)
1662            return
1663        # Even if the PEP 307 requires extend() and append() methods,
1664        # fall back on append() if the object has no extend() method
1665        # for backward compatibility.
1666        append = list_obj.append
1667        for item in items:
1668            append(item)
1669    dispatch[APPENDS[0]] = load_appends
1670
1671    def load_setitem(self):
1672        stack = self.stack
1673        value = stack.pop()
1674        key = stack.pop()
1675        dict = stack[-1]
1676        dict[key] = value
1677    dispatch[SETITEM[0]] = load_setitem
1678
1679    def load_setitems(self):
1680        items = self.pop_mark()
1681        dict = self.stack[-1]
1682        for i in range(0, len(items), 2):
1683            dict[items[i]] = items[i + 1]
1684    dispatch[SETITEMS[0]] = load_setitems
1685
1686    def load_additems(self):
1687        items = self.pop_mark()
1688        set_obj = self.stack[-1]
1689        if isinstance(set_obj, set):
1690            set_obj.update(items)
1691        else:
1692            add = set_obj.add
1693            for item in items:
1694                add(item)
1695    dispatch[ADDITEMS[0]] = load_additems
1696
1697    def load_build(self):
1698        stack = self.stack
1699        state = stack.pop()
1700        inst = stack[-1]
1701        setstate = getattr(inst, "__setstate__", None)
1702        if setstate is not None:
1703            setstate(state)
1704            return
1705        slotstate = None
1706        if isinstance(state, tuple) and len(state) == 2:
1707            state, slotstate = state
1708        if state:
1709            inst_dict = inst.__dict__
1710            intern = sys.intern
1711            for k, v in state.items():
1712                if type(k) is str:
1713                    inst_dict[intern(k)] = v
1714                else:
1715                    inst_dict[k] = v
1716        if slotstate:
1717            for k, v in slotstate.items():
1718                setattr(inst, k, v)
1719    dispatch[BUILD[0]] = load_build
1720
1721    def load_mark(self):
1722        self.metastack.append(self.stack)
1723        self.stack = []
1724        self.append = self.stack.append
1725    dispatch[MARK[0]] = load_mark
1726
1727    def load_stop(self):
1728        value = self.stack.pop()
1729        raise _Stop(value)
1730    dispatch[STOP[0]] = load_stop
1731
1732
1733# Shorthands
1734
1735def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None):
1736    _Pickler(file, protocol, fix_imports=fix_imports,
1737             buffer_callback=buffer_callback).dump(obj)
1738
1739def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None):
1740    f = io.BytesIO()
1741    _Pickler(f, protocol, fix_imports=fix_imports,
1742             buffer_callback=buffer_callback).dump(obj)
1743    res = f.getvalue()
1744    assert isinstance(res, bytes_types)
1745    return res
1746
1747def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict",
1748          buffers=None):
1749    return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
1750                     encoding=encoding, errors=errors).load()
1751
1752def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict",
1753           buffers=None):
1754    if isinstance(s, str):
1755        raise TypeError("Can't load pickle from unicode string")
1756    file = io.BytesIO(s)
1757    return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
1758                      encoding=encoding, errors=errors).load()
1759
1760# Use the faster _pickle if possible
1761try:
1762    from _pickle import (
1763        PickleError,
1764        PicklingError,
1765        UnpicklingError,
1766        Pickler,
1767        Unpickler,
1768        dump,
1769        dumps,
1770        load,
1771        loads
1772    )
1773except ImportError:
1774    Pickler, Unpickler = _Pickler, _Unpickler
1775    dump, dumps, load, loads = _dump, _dumps, _load, _loads
1776
1777# Doctest
1778def _test():
1779    import doctest
1780    return doctest.testmod()
1781
1782if __name__ == "__main__":
1783    import argparse
1784    parser = argparse.ArgumentParser(
1785        description='display contents of the pickle files')
1786    parser.add_argument(
1787        'pickle_file', type=argparse.FileType('br'),
1788        nargs='*', help='the pickle file')
1789    parser.add_argument(
1790        '-t', '--test', action='store_true',
1791        help='run self-test suite')
1792    parser.add_argument(
1793        '-v', action='store_true',
1794        help='run verbosely; only affects self-test run')
1795    args = parser.parse_args()
1796    if args.test:
1797        _test()
1798    else:
1799        if not args.pickle_file:
1800            parser.print_help()
1801        else:
1802            import pprint
1803            for f in args.pickle_file:
1804                obj = load(f)
1805                pprint.pprint(obj)
1806