• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Create portable serialized representations of Python objects.
2
3See module copyreg for a mechanism for registering custom picklers.
4See module pickletools source for extensive comments.
5
6Classes:
7
8    Pickler
9    Unpickler
10
11Functions:
12
13    dump(object, file)
14    dumps(object) -> string
15    load(file) -> object
16    loads(bytes) -> object
17
18Misc variables:
19
20    __version__
21    format_version
22    compatible_formats
23
24"""
25
26from types import FunctionType
27from copyreg import dispatch_table
28from copyreg import _extension_registry, _inverted_registry, _extension_cache
29from itertools import islice
30from functools import partial
31import sys
32from sys import maxsize
33from struct import pack, unpack
34import re
35import io
36import codecs
37import _compat_pickle
38
39__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
40           "Unpickler", "dump", "dumps", "load", "loads"]
41
42try:
43    from _pickle import PickleBuffer
44    __all__.append("PickleBuffer")
45    _HAVE_PICKLE_BUFFER = True
46except ImportError:
47    _HAVE_PICKLE_BUFFER = False
48
49
50# Shortcut for use in isinstance testing
51bytes_types = (bytes, bytearray)
52
53# These are purely informational; no code uses these.
54format_version = "4.0"                  # File format version we write
55compatible_formats = ["1.0",            # Original protocol 0
56                      "1.1",            # Protocol 0 with INST added
57                      "1.2",            # Original protocol 1
58                      "1.3",            # Protocol 1 with BINFLOAT added
59                      "2.0",            # Protocol 2
60                      "3.0",            # Protocol 3
61                      "4.0",            # Protocol 4
62                      "5.0",            # Protocol 5
63                      ]                 # Old format versions we can read
64
65# This is the highest protocol number we know how to read.
66HIGHEST_PROTOCOL = 5
67
68# The protocol we write by default.  May be less than HIGHEST_PROTOCOL.
69# Only bump this if the oldest still supported version of Python already
70# includes it.
71DEFAULT_PROTOCOL = 4
72
73class PickleError(Exception):
74    """A common base class for the other pickling exceptions."""
75    pass
76
77class PicklingError(PickleError):
78    """This exception is raised when an unpicklable object is passed to the
79    dump() method.
80
81    """
82    pass
83
84class UnpicklingError(PickleError):
85    """This exception is raised when there is a problem unpickling an object,
86    such as a security violation.
87
88    Note that other exceptions may also be raised during unpickling, including
89    (but not necessarily limited to) AttributeError, EOFError, ImportError,
90    and IndexError.
91
92    """
93    pass
94
95# An instance of _Stop is raised by Unpickler.load_stop() in response to
96# the STOP opcode, passing the object that is the result of unpickling.
97class _Stop(Exception):
98    def __init__(self, value):
99        self.value = value
100
101# Jython has PyStringMap; it's a dict subclass with string keys
102try:
103    from org.python.core import PyStringMap
104except ImportError:
105    PyStringMap = None
106
107# Pickle opcodes.  See pickletools.py for extensive docs.  The listing
108# here is in kind-of alphabetical order of 1-character pickle code.
109# pickletools groups them by purpose.
110
111MARK           = b'('   # push special markobject on stack
112STOP           = b'.'   # every pickle ends with STOP
113POP            = b'0'   # discard topmost stack item
114POP_MARK       = b'1'   # discard stack top through topmost markobject
115DUP            = b'2'   # duplicate top stack item
116FLOAT          = b'F'   # push float object; decimal string argument
117INT            = b'I'   # push integer or bool; decimal string argument
118BININT         = b'J'   # push four-byte signed int
119BININT1        = b'K'   # push 1-byte unsigned int
120LONG           = b'L'   # push long; decimal string argument
121BININT2        = b'M'   # push 2-byte unsigned int
122NONE           = b'N'   # push None
123PERSID         = b'P'   # push persistent object; id is taken from string arg
124BINPERSID      = b'Q'   #  "       "         "  ;  "  "   "     "  stack
125REDUCE         = b'R'   # apply callable to argtuple, both on stack
126STRING         = b'S'   # push string; NL-terminated string argument
127BINSTRING      = b'T'   # push string; counted binary string argument
128SHORT_BINSTRING= b'U'   #  "     "   ;    "      "       "      " < 256 bytes
129UNICODE        = b'V'   # push Unicode string; raw-unicode-escaped'd argument
130BINUNICODE     = b'X'   #   "     "       "  ; counted UTF-8 string argument
131APPEND         = b'a'   # append stack top to list below it
132BUILD          = b'b'   # call __setstate__ or __dict__.update()
133GLOBAL         = b'c'   # push self.find_class(modname, name); 2 string args
134DICT           = b'd'   # build a dict from stack items
135EMPTY_DICT     = b'}'   # push empty dict
136APPENDS        = b'e'   # extend list on stack by topmost stack slice
137GET            = b'g'   # push item from memo on stack; index is string arg
138BINGET         = b'h'   #   "    "    "    "   "   "  ;   "    " 1-byte arg
139INST           = b'i'   # build & push class instance
140LONG_BINGET    = b'j'   # push item from memo on stack; index is 4-byte arg
141LIST           = b'l'   # build list from topmost stack items
142EMPTY_LIST     = b']'   # push empty list
143OBJ            = b'o'   # build & push class instance
144PUT            = b'p'   # store stack top in memo; index is string arg
145BINPUT         = b'q'   #   "     "    "   "   " ;   "    " 1-byte arg
146LONG_BINPUT    = b'r'   #   "     "    "   "   " ;   "    " 4-byte arg
147SETITEM        = b's'   # add key+value pair to dict
148TUPLE          = b't'   # build tuple from topmost stack items
149EMPTY_TUPLE    = b')'   # push empty tuple
150SETITEMS       = b'u'   # modify dict by adding topmost key+value pairs
151BINFLOAT       = b'G'   # push float; arg is 8-byte float encoding
152
153TRUE           = b'I01\n'  # not an opcode; see INT docs in pickletools.py
154FALSE          = b'I00\n'  # not an opcode; see INT docs in pickletools.py
155
156# Protocol 2
157
158PROTO          = b'\x80'  # identify pickle protocol
159NEWOBJ         = b'\x81'  # build object by applying cls.__new__ to argtuple
160EXT1           = b'\x82'  # push object from extension registry; 1-byte index
161EXT2           = b'\x83'  # ditto, but 2-byte index
162EXT4           = b'\x84'  # ditto, but 4-byte index
163TUPLE1         = b'\x85'  # build 1-tuple from stack top
164TUPLE2         = b'\x86'  # build 2-tuple from two topmost stack items
165TUPLE3         = b'\x87'  # build 3-tuple from three topmost stack items
166NEWTRUE        = b'\x88'  # push True
167NEWFALSE       = b'\x89'  # push False
168LONG1          = b'\x8a'  # push long from < 256 bytes
169LONG4          = b'\x8b'  # push really big long
170
171_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
172
173# Protocol 3 (Python 3.x)
174
175BINBYTES       = b'B'   # push bytes; counted binary string argument
176SHORT_BINBYTES = b'C'   #  "     "   ;    "      "       "      " < 256 bytes
177
178# Protocol 4
179
180SHORT_BINUNICODE = b'\x8c'  # push short string; UTF-8 length < 256 bytes
181BINUNICODE8      = b'\x8d'  # push very long string
182BINBYTES8        = b'\x8e'  # push very long bytes string
183EMPTY_SET        = b'\x8f'  # push empty set on the stack
184ADDITEMS         = b'\x90'  # modify set by adding topmost stack items
185FROZENSET        = b'\x91'  # build frozenset from topmost stack items
186NEWOBJ_EX        = b'\x92'  # like NEWOBJ but work with keyword only arguments
187STACK_GLOBAL     = b'\x93'  # same as GLOBAL but using names on the stacks
188MEMOIZE          = b'\x94'  # store top of the stack in memo
189FRAME            = b'\x95'  # indicate the beginning of a new frame
190
191# Protocol 5
192
193BYTEARRAY8       = b'\x96'  # push bytearray
194NEXT_BUFFER      = b'\x97'  # push next out-of-band buffer
195READONLY_BUFFER  = b'\x98'  # make top of stack readonly
196
197__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
198
199
200class _Framer:
201
202    _FRAME_SIZE_MIN = 4
203    _FRAME_SIZE_TARGET = 64 * 1024
204
205    def __init__(self, file_write):
206        self.file_write = file_write
207        self.current_frame = None
208
209    def start_framing(self):
210        self.current_frame = io.BytesIO()
211
212    def end_framing(self):
213        if self.current_frame and self.current_frame.tell() > 0:
214            self.commit_frame(force=True)
215            self.current_frame = None
216
217    def commit_frame(self, force=False):
218        if self.current_frame:
219            f = self.current_frame
220            if f.tell() >= self._FRAME_SIZE_TARGET or force:
221                data = f.getbuffer()
222                write = self.file_write
223                if len(data) >= self._FRAME_SIZE_MIN:
224                    # Issue a single call to the write method of the underlying
225                    # file object for the frame opcode with the size of the
226                    # frame. The concatenation is expected to be less expensive
227                    # than issuing an additional call to write.
228                    write(FRAME + pack("<Q", len(data)))
229
230                # Issue a separate call to write to append the frame
231                # contents without concatenation to the above to avoid a
232                # memory copy.
233                write(data)
234
235                # Start the new frame with a new io.BytesIO instance so that
236                # the file object can have delayed access to the previous frame
237                # contents via an unreleased memoryview of the previous
238                # io.BytesIO instance.
239                self.current_frame = io.BytesIO()
240
241    def write(self, data):
242        if self.current_frame:
243            return self.current_frame.write(data)
244        else:
245            return self.file_write(data)
246
247    def write_large_bytes(self, header, payload):
248        write = self.file_write
249        if self.current_frame:
250            # Terminate the current frame and flush it to the file.
251            self.commit_frame(force=True)
252
253        # Perform direct write of the header and payload of the large binary
254        # object. Be careful not to concatenate the header and the payload
255        # prior to calling 'write' as we do not want to allocate a large
256        # temporary bytes object.
257        # We intentionally do not insert a protocol 4 frame opcode to make
258        # it possible to optimize file.read calls in the loader.
259        write(header)
260        write(payload)
261
262
263class _Unframer:
264
265    def __init__(self, file_read, file_readline, file_tell=None):
266        self.file_read = file_read
267        self.file_readline = file_readline
268        self.current_frame = None
269
270    def readinto(self, buf):
271        if self.current_frame:
272            n = self.current_frame.readinto(buf)
273            if n == 0 and len(buf) != 0:
274                self.current_frame = None
275                n = len(buf)
276                buf[:] = self.file_read(n)
277                return n
278            if n < len(buf):
279                raise UnpicklingError(
280                    "pickle exhausted before end of frame")
281            return n
282        else:
283            n = len(buf)
284            buf[:] = self.file_read(n)
285            return n
286
287    def read(self, n):
288        if self.current_frame:
289            data = self.current_frame.read(n)
290            if not data and n != 0:
291                self.current_frame = None
292                return self.file_read(n)
293            if len(data) < n:
294                raise UnpicklingError(
295                    "pickle exhausted before end of frame")
296            return data
297        else:
298            return self.file_read(n)
299
300    def readline(self):
301        if self.current_frame:
302            data = self.current_frame.readline()
303            if not data:
304                self.current_frame = None
305                return self.file_readline()
306            if data[-1] != b'\n'[0]:
307                raise UnpicklingError(
308                    "pickle exhausted before end of frame")
309            return data
310        else:
311            return self.file_readline()
312
313    def load_frame(self, frame_size):
314        if self.current_frame and self.current_frame.read() != b'':
315            raise UnpicklingError(
316                "beginning of a new frame before end of current frame")
317        self.current_frame = io.BytesIO(self.file_read(frame_size))
318
319
320# Tools used for pickling.
321
322def _getattribute(obj, name):
323    for subpath in name.split('.'):
324        if subpath == '<locals>':
325            raise AttributeError("Can't get local attribute {!r} on {!r}"
326                                 .format(name, obj))
327        try:
328            parent = obj
329            obj = getattr(obj, subpath)
330        except AttributeError:
331            raise AttributeError("Can't get attribute {!r} on {!r}"
332                                 .format(name, obj)) from None
333    return obj, parent
334
335def whichmodule(obj, name):
336    """Find the module an object belong to."""
337    module_name = getattr(obj, '__module__', None)
338    if module_name is not None:
339        return module_name
340    # Protect the iteration by using a list copy of sys.modules against dynamic
341    # modules that trigger imports of other modules upon calls to getattr.
342    for module_name, module in sys.modules.copy().items():
343        if (module_name == '__main__'
344            or module_name == '__mp_main__'  # bpo-42406
345            or module is None):
346            continue
347        try:
348            if _getattribute(module, name)[0] is obj:
349                return module_name
350        except AttributeError:
351            pass
352    return '__main__'
353
354def encode_long(x):
355    r"""Encode a long to a two's complement little-endian binary string.
356    Note that 0 is a special case, returning an empty string, to save a
357    byte in the LONG1 pickling context.
358
359    >>> encode_long(0)
360    b''
361    >>> encode_long(255)
362    b'\xff\x00'
363    >>> encode_long(32767)
364    b'\xff\x7f'
365    >>> encode_long(-256)
366    b'\x00\xff'
367    >>> encode_long(-32768)
368    b'\x00\x80'
369    >>> encode_long(-128)
370    b'\x80'
371    >>> encode_long(127)
372    b'\x7f'
373    >>>
374    """
375    if x == 0:
376        return b''
377    nbytes = (x.bit_length() >> 3) + 1
378    result = x.to_bytes(nbytes, byteorder='little', signed=True)
379    if x < 0 and nbytes > 1:
380        if result[-1] == 0xff and (result[-2] & 0x80) != 0:
381            result = result[:-1]
382    return result
383
384def decode_long(data):
385    r"""Decode a long from a two's complement little-endian binary string.
386
387    >>> decode_long(b'')
388    0
389    >>> decode_long(b"\xff\x00")
390    255
391    >>> decode_long(b"\xff\x7f")
392    32767
393    >>> decode_long(b"\x00\xff")
394    -256
395    >>> decode_long(b"\x00\x80")
396    -32768
397    >>> decode_long(b"\x80")
398    -128
399    >>> decode_long(b"\x7f")
400    127
401    """
402    return int.from_bytes(data, byteorder='little', signed=True)
403
404
405# Pickling machinery
406
407class _Pickler:
408
409    def __init__(self, file, protocol=None, *, fix_imports=True,
410                 buffer_callback=None):
411        """This takes a binary file for writing a pickle data stream.
412
413        The optional *protocol* argument tells the pickler to use the
414        given protocol; supported protocols are 0, 1, 2, 3, 4 and 5.
415        The default protocol is 4. It was introduced in Python 3.4, and
416        is incompatible with previous versions.
417
418        Specifying a negative protocol version selects the highest
419        protocol version supported.  The higher the protocol used, the
420        more recent the version of Python needed to read the pickle
421        produced.
422
423        The *file* argument must have a write() method that accepts a
424        single bytes argument. It can thus be a file object opened for
425        binary writing, an io.BytesIO instance, or any other custom
426        object that meets this interface.
427
428        If *fix_imports* is True and *protocol* is less than 3, pickle
429        will try to map the new Python 3 names to the old module names
430        used in Python 2, so that the pickle data stream is readable
431        with Python 2.
432
433        If *buffer_callback* is None (the default), buffer views are
434        serialized into *file* as part of the pickle stream.
435
436        If *buffer_callback* is not None, then it can be called any number
437        of times with a buffer view.  If the callback returns a false value
438        (such as None), the given buffer is out-of-band; otherwise the
439        buffer is serialized in-band, i.e. inside the pickle stream.
440
441        It is an error if *buffer_callback* is not None and *protocol*
442        is None or smaller than 5.
443        """
444        if protocol is None:
445            protocol = DEFAULT_PROTOCOL
446        if protocol < 0:
447            protocol = HIGHEST_PROTOCOL
448        elif not 0 <= protocol <= HIGHEST_PROTOCOL:
449            raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
450        if buffer_callback is not None and protocol < 5:
451            raise ValueError("buffer_callback needs protocol >= 5")
452        self._buffer_callback = buffer_callback
453        try:
454            self._file_write = file.write
455        except AttributeError:
456            raise TypeError("file must have a 'write' attribute")
457        self.framer = _Framer(self._file_write)
458        self.write = self.framer.write
459        self._write_large_bytes = self.framer.write_large_bytes
460        self.memo = {}
461        self.proto = int(protocol)
462        self.bin = protocol >= 1
463        self.fast = 0
464        self.fix_imports = fix_imports and protocol < 3
465
466    def clear_memo(self):
467        """Clears the pickler's "memo".
468
469        The memo is the data structure that remembers which objects the
470        pickler has already seen, so that shared or recursive objects
471        are pickled by reference and not by value.  This method is
472        useful when re-using picklers.
473        """
474        self.memo.clear()
475
476    def dump(self, obj):
477        """Write a pickled representation of obj to the open file."""
478        # Check whether Pickler was initialized correctly. This is
479        # only needed to mimic the behavior of _pickle.Pickler.dump().
480        if not hasattr(self, "_file_write"):
481            raise PicklingError("Pickler.__init__() was not called by "
482                                "%s.__init__()" % (self.__class__.__name__,))
483        if self.proto >= 2:
484            self.write(PROTO + pack("<B", self.proto))
485        if self.proto >= 4:
486            self.framer.start_framing()
487        self.save(obj)
488        self.write(STOP)
489        self.framer.end_framing()
490
491    def memoize(self, obj):
492        """Store an object in the memo."""
493
494        # The Pickler memo is a dictionary mapping object ids to 2-tuples
495        # that contain the Unpickler memo key and the object being memoized.
496        # The memo key is written to the pickle and will become
497        # the key in the Unpickler's memo.  The object is stored in the
498        # Pickler memo so that transient objects are kept alive during
499        # pickling.
500
501        # The use of the Unpickler memo length as the memo key is just a
502        # convention.  The only requirement is that the memo values be unique.
503        # But there appears no advantage to any other scheme, and this
504        # scheme allows the Unpickler memo to be implemented as a plain (but
505        # growable) array, indexed by memo key.
506        if self.fast:
507            return
508        assert id(obj) not in self.memo
509        idx = len(self.memo)
510        self.write(self.put(idx))
511        self.memo[id(obj)] = idx, obj
512
513    # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
514    def put(self, idx):
515        if self.proto >= 4:
516            return MEMOIZE
517        elif self.bin:
518            if idx < 256:
519                return BINPUT + pack("<B", idx)
520            else:
521                return LONG_BINPUT + pack("<I", idx)
522        else:
523            return PUT + repr(idx).encode("ascii") + b'\n'
524
525    # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
526    def get(self, i):
527        if self.bin:
528            if i < 256:
529                return BINGET + pack("<B", i)
530            else:
531                return LONG_BINGET + pack("<I", i)
532
533        return GET + repr(i).encode("ascii") + b'\n'
534
535    def save(self, obj, save_persistent_id=True):
536        self.framer.commit_frame()
537
538        # Check for persistent id (defined by a subclass)
539        pid = self.persistent_id(obj)
540        if pid is not None and save_persistent_id:
541            self.save_pers(pid)
542            return
543
544        # Check the memo
545        x = self.memo.get(id(obj))
546        if x is not None:
547            self.write(self.get(x[0]))
548            return
549
550        rv = NotImplemented
551        reduce = getattr(self, "reducer_override", None)
552        if reduce is not None:
553            rv = reduce(obj)
554
555        if rv is NotImplemented:
556            # Check the type dispatch table
557            t = type(obj)
558            f = self.dispatch.get(t)
559            if f is not None:
560                f(self, obj)  # Call unbound method with explicit self
561                return
562
563            # Check private dispatch table if any, or else
564            # copyreg.dispatch_table
565            reduce = getattr(self, 'dispatch_table', dispatch_table).get(t)
566            if reduce is not None:
567                rv = reduce(obj)
568            else:
569                # Check for a class with a custom metaclass; treat as regular
570                # class
571                if issubclass(t, type):
572                    self.save_global(obj)
573                    return
574
575                # Check for a __reduce_ex__ method, fall back to __reduce__
576                reduce = getattr(obj, "__reduce_ex__", None)
577                if reduce is not None:
578                    rv = reduce(self.proto)
579                else:
580                    reduce = getattr(obj, "__reduce__", None)
581                    if reduce is not None:
582                        rv = reduce()
583                    else:
584                        raise PicklingError("Can't pickle %r object: %r" %
585                                            (t.__name__, obj))
586
587        # Check for string returned by reduce(), meaning "save as global"
588        if isinstance(rv, str):
589            self.save_global(obj, rv)
590            return
591
592        # Assert that reduce() returned a tuple
593        if not isinstance(rv, tuple):
594            raise PicklingError("%s must return string or tuple" % reduce)
595
596        # Assert that it returned an appropriately sized tuple
597        l = len(rv)
598        if not (2 <= l <= 6):
599            raise PicklingError("Tuple returned by %s must have "
600                                "two to six elements" % reduce)
601
602        # Save the reduce() output and finally memoize the object
603        self.save_reduce(obj=obj, *rv)
604
605    def persistent_id(self, obj):
606        # This exists so a subclass can override it
607        return None
608
609    def save_pers(self, pid):
610        # Save a persistent id reference
611        if self.bin:
612            self.save(pid, save_persistent_id=False)
613            self.write(BINPERSID)
614        else:
615            try:
616                self.write(PERSID + str(pid).encode("ascii") + b'\n')
617            except UnicodeEncodeError:
618                raise PicklingError(
619                    "persistent IDs in protocol 0 must be ASCII strings")
620
621    def save_reduce(self, func, args, state=None, listitems=None,
622                    dictitems=None, state_setter=None, obj=None):
623        # This API is called by some subclasses
624
625        if not isinstance(args, tuple):
626            raise PicklingError("args from save_reduce() must be a tuple")
627        if not callable(func):
628            raise PicklingError("func from save_reduce() must be callable")
629
630        save = self.save
631        write = self.write
632
633        func_name = getattr(func, "__name__", "")
634        if self.proto >= 2 and func_name == "__newobj_ex__":
635            cls, args, kwargs = args
636            if not hasattr(cls, "__new__"):
637                raise PicklingError("args[0] from {} args has no __new__"
638                                    .format(func_name))
639            if obj is not None and cls is not obj.__class__:
640                raise PicklingError("args[0] from {} args has the wrong class"
641                                    .format(func_name))
642            if self.proto >= 4:
643                save(cls)
644                save(args)
645                save(kwargs)
646                write(NEWOBJ_EX)
647            else:
648                func = partial(cls.__new__, cls, *args, **kwargs)
649                save(func)
650                save(())
651                write(REDUCE)
652        elif self.proto >= 2 and func_name == "__newobj__":
653            # A __reduce__ implementation can direct protocol 2 or newer to
654            # use the more efficient NEWOBJ opcode, while still
655            # allowing protocol 0 and 1 to work normally.  For this to
656            # work, the function returned by __reduce__ should be
657            # called __newobj__, and its first argument should be a
658            # class.  The implementation for __newobj__
659            # should be as follows, although pickle has no way to
660            # verify this:
661            #
662            # def __newobj__(cls, *args):
663            #     return cls.__new__(cls, *args)
664            #
665            # Protocols 0 and 1 will pickle a reference to __newobj__,
666            # while protocol 2 (and above) will pickle a reference to
667            # cls, the remaining args tuple, and the NEWOBJ code,
668            # which calls cls.__new__(cls, *args) at unpickling time
669            # (see load_newobj below).  If __reduce__ returns a
670            # three-tuple, the state from the third tuple item will be
671            # pickled regardless of the protocol, calling __setstate__
672            # at unpickling time (see load_build below).
673            #
674            # Note that no standard __newobj__ implementation exists;
675            # you have to provide your own.  This is to enforce
676            # compatibility with Python 2.2 (pickles written using
677            # protocol 0 or 1 in Python 2.3 should be unpicklable by
678            # Python 2.2).
679            cls = args[0]
680            if not hasattr(cls, "__new__"):
681                raise PicklingError(
682                    "args[0] from __newobj__ args has no __new__")
683            if obj is not None and cls is not obj.__class__:
684                raise PicklingError(
685                    "args[0] from __newobj__ args has the wrong class")
686            args = args[1:]
687            save(cls)
688            save(args)
689            write(NEWOBJ)
690        else:
691            save(func)
692            save(args)
693            write(REDUCE)
694
695        if obj is not None:
696            # If the object is already in the memo, this means it is
697            # recursive. In this case, throw away everything we put on the
698            # stack, and fetch the object back from the memo.
699            if id(obj) in self.memo:
700                write(POP + self.get(self.memo[id(obj)][0]))
701            else:
702                self.memoize(obj)
703
704        # More new special cases (that work with older protocols as
705        # well): when __reduce__ returns a tuple with 4 or 5 items,
706        # the 4th and 5th item should be iterators that provide list
707        # items and dict items (as (key, value) tuples), or None.
708
709        if listitems is not None:
710            self._batch_appends(listitems)
711
712        if dictitems is not None:
713            self._batch_setitems(dictitems)
714
715        if state is not None:
716            if state_setter is None:
717                save(state)
718                write(BUILD)
719            else:
720                # If a state_setter is specified, call it instead of load_build
721                # to update obj's with its previous state.
722                # First, push state_setter and its tuple of expected arguments
723                # (obj, state) onto the stack.
724                save(state_setter)
725                save(obj)  # simple BINGET opcode as obj is already memoized.
726                save(state)
727                write(TUPLE2)
728                # Trigger a state_setter(obj, state) function call.
729                write(REDUCE)
730                # The purpose of state_setter is to carry-out an
731                # inplace modification of obj. We do not care about what the
732                # method might return, so its output is eventually removed from
733                # the stack.
734                write(POP)
735
736    # Methods below this point are dispatched through the dispatch table
737
738    dispatch = {}
739
740    def save_none(self, obj):
741        self.write(NONE)
742    dispatch[type(None)] = save_none
743
744    def save_bool(self, obj):
745        if self.proto >= 2:
746            self.write(NEWTRUE if obj else NEWFALSE)
747        else:
748            self.write(TRUE if obj else FALSE)
749    dispatch[bool] = save_bool
750
751    def save_long(self, obj):
752        if self.bin:
753            # If the int is small enough to fit in a signed 4-byte 2's-comp
754            # format, we can store it more efficiently than the general
755            # case.
756            # First one- and two-byte unsigned ints:
757            if obj >= 0:
758                if obj <= 0xff:
759                    self.write(BININT1 + pack("<B", obj))
760                    return
761                if obj <= 0xffff:
762                    self.write(BININT2 + pack("<H", obj))
763                    return
764            # Next check for 4-byte signed ints:
765            if -0x80000000 <= obj <= 0x7fffffff:
766                self.write(BININT + pack("<i", obj))
767                return
768        if self.proto >= 2:
769            encoded = encode_long(obj)
770            n = len(encoded)
771            if n < 256:
772                self.write(LONG1 + pack("<B", n) + encoded)
773            else:
774                self.write(LONG4 + pack("<i", n) + encoded)
775            return
776        if -0x80000000 <= obj <= 0x7fffffff:
777            self.write(INT + repr(obj).encode("ascii") + b'\n')
778        else:
779            self.write(LONG + repr(obj).encode("ascii") + b'L\n')
780    dispatch[int] = save_long
781
782    def save_float(self, obj):
783        if self.bin:
784            self.write(BINFLOAT + pack('>d', obj))
785        else:
786            self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
787    dispatch[float] = save_float
788
789    def save_bytes(self, obj):
790        if self.proto < 3:
791            if not obj: # bytes object is empty
792                self.save_reduce(bytes, (), obj=obj)
793            else:
794                self.save_reduce(codecs.encode,
795                                 (str(obj, 'latin1'), 'latin1'), obj=obj)
796            return
797        n = len(obj)
798        if n <= 0xff:
799            self.write(SHORT_BINBYTES + pack("<B", n) + obj)
800        elif n > 0xffffffff and self.proto >= 4:
801            self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
802        elif n >= self.framer._FRAME_SIZE_TARGET:
803            self._write_large_bytes(BINBYTES + pack("<I", n), obj)
804        else:
805            self.write(BINBYTES + pack("<I", n) + obj)
806        self.memoize(obj)
807    dispatch[bytes] = save_bytes
808
809    def save_bytearray(self, obj):
810        if self.proto < 5:
811            if not obj:  # bytearray is empty
812                self.save_reduce(bytearray, (), obj=obj)
813            else:
814                self.save_reduce(bytearray, (bytes(obj),), obj=obj)
815            return
816        n = len(obj)
817        if n >= self.framer._FRAME_SIZE_TARGET:
818            self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
819        else:
820            self.write(BYTEARRAY8 + pack("<Q", n) + obj)
821    dispatch[bytearray] = save_bytearray
822
823    if _HAVE_PICKLE_BUFFER:
824        def save_picklebuffer(self, obj):
825            if self.proto < 5:
826                raise PicklingError("PickleBuffer can only pickled with "
827                                    "protocol >= 5")
828            with obj.raw() as m:
829                if not m.contiguous:
830                    raise PicklingError("PickleBuffer can not be pickled when "
831                                        "pointing to a non-contiguous buffer")
832                in_band = True
833                if self._buffer_callback is not None:
834                    in_band = bool(self._buffer_callback(obj))
835                if in_band:
836                    # Write data in-band
837                    # XXX The C implementation avoids a copy here
838                    if m.readonly:
839                        self.save_bytes(m.tobytes())
840                    else:
841                        self.save_bytearray(m.tobytes())
842                else:
843                    # Write data out-of-band
844                    self.write(NEXT_BUFFER)
845                    if m.readonly:
846                        self.write(READONLY_BUFFER)
847
848        dispatch[PickleBuffer] = save_picklebuffer
849
850    def save_str(self, obj):
851        if self.bin:
852            encoded = obj.encode('utf-8', 'surrogatepass')
853            n = len(encoded)
854            if n <= 0xff and self.proto >= 4:
855                self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
856            elif n > 0xffffffff and self.proto >= 4:
857                self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
858            elif n >= self.framer._FRAME_SIZE_TARGET:
859                self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
860            else:
861                self.write(BINUNICODE + pack("<I", n) + encoded)
862        else:
863            obj = obj.replace("\\", "\\u005c")
864            obj = obj.replace("\0", "\\u0000")
865            obj = obj.replace("\n", "\\u000a")
866            obj = obj.replace("\r", "\\u000d")
867            obj = obj.replace("\x1a", "\\u001a")  # EOF on DOS
868            self.write(UNICODE + obj.encode('raw-unicode-escape') +
869                       b'\n')
870        self.memoize(obj)
871    dispatch[str] = save_str
872
873    def save_tuple(self, obj):
874        if not obj: # tuple is empty
875            if self.bin:
876                self.write(EMPTY_TUPLE)
877            else:
878                self.write(MARK + TUPLE)
879            return
880
881        n = len(obj)
882        save = self.save
883        memo = self.memo
884        if n <= 3 and self.proto >= 2:
885            for element in obj:
886                save(element)
887            # Subtle.  Same as in the big comment below.
888            if id(obj) in memo:
889                get = self.get(memo[id(obj)][0])
890                self.write(POP * n + get)
891            else:
892                self.write(_tuplesize2code[n])
893                self.memoize(obj)
894            return
895
896        # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
897        # has more than 3 elements.
898        write = self.write
899        write(MARK)
900        for element in obj:
901            save(element)
902
903        if id(obj) in memo:
904            # Subtle.  d was not in memo when we entered save_tuple(), so
905            # the process of saving the tuple's elements must have saved
906            # the tuple itself:  the tuple is recursive.  The proper action
907            # now is to throw away everything we put on the stack, and
908            # simply GET the tuple (it's already constructed).  This check
909            # could have been done in the "for element" loop instead, but
910            # recursive tuples are a rare thing.
911            get = self.get(memo[id(obj)][0])
912            if self.bin:
913                write(POP_MARK + get)
914            else:   # proto 0 -- POP_MARK not available
915                write(POP * (n+1) + get)
916            return
917
918        # No recursion.
919        write(TUPLE)
920        self.memoize(obj)
921
922    dispatch[tuple] = save_tuple
923
924    def save_list(self, obj):
925        if self.bin:
926            self.write(EMPTY_LIST)
927        else:   # proto 0 -- can't use EMPTY_LIST
928            self.write(MARK + LIST)
929
930        self.memoize(obj)
931        self._batch_appends(obj)
932
933    dispatch[list] = save_list
934
935    _BATCHSIZE = 1000
936
937    def _batch_appends(self, items):
938        # Helper to batch up APPENDS sequences
939        save = self.save
940        write = self.write
941
942        if not self.bin:
943            for x in items:
944                save(x)
945                write(APPEND)
946            return
947
948        it = iter(items)
949        while True:
950            tmp = list(islice(it, self._BATCHSIZE))
951            n = len(tmp)
952            if n > 1:
953                write(MARK)
954                for x in tmp:
955                    save(x)
956                write(APPENDS)
957            elif n:
958                save(tmp[0])
959                write(APPEND)
960            # else tmp is empty, and we're done
961            if n < self._BATCHSIZE:
962                return
963
964    def save_dict(self, obj):
965        if self.bin:
966            self.write(EMPTY_DICT)
967        else:   # proto 0 -- can't use EMPTY_DICT
968            self.write(MARK + DICT)
969
970        self.memoize(obj)
971        self._batch_setitems(obj.items())
972
973    dispatch[dict] = save_dict
974    if PyStringMap is not None:
975        dispatch[PyStringMap] = save_dict
976
977    def _batch_setitems(self, items):
978        # Helper to batch up SETITEMS sequences; proto >= 1 only
979        save = self.save
980        write = self.write
981
982        if not self.bin:
983            for k, v in items:
984                save(k)
985                save(v)
986                write(SETITEM)
987            return
988
989        it = iter(items)
990        while True:
991            tmp = list(islice(it, self._BATCHSIZE))
992            n = len(tmp)
993            if n > 1:
994                write(MARK)
995                for k, v in tmp:
996                    save(k)
997                    save(v)
998                write(SETITEMS)
999            elif n:
1000                k, v = tmp[0]
1001                save(k)
1002                save(v)
1003                write(SETITEM)
1004            # else tmp is empty, and we're done
1005            if n < self._BATCHSIZE:
1006                return
1007
1008    def save_set(self, obj):
1009        save = self.save
1010        write = self.write
1011
1012        if self.proto < 4:
1013            self.save_reduce(set, (list(obj),), obj=obj)
1014            return
1015
1016        write(EMPTY_SET)
1017        self.memoize(obj)
1018
1019        it = iter(obj)
1020        while True:
1021            batch = list(islice(it, self._BATCHSIZE))
1022            n = len(batch)
1023            if n > 0:
1024                write(MARK)
1025                for item in batch:
1026                    save(item)
1027                write(ADDITEMS)
1028            if n < self._BATCHSIZE:
1029                return
1030    dispatch[set] = save_set
1031
1032    def save_frozenset(self, obj):
1033        save = self.save
1034        write = self.write
1035
1036        if self.proto < 4:
1037            self.save_reduce(frozenset, (list(obj),), obj=obj)
1038            return
1039
1040        write(MARK)
1041        for item in obj:
1042            save(item)
1043
1044        if id(obj) in self.memo:
1045            # If the object is already in the memo, this means it is
1046            # recursive. In this case, throw away everything we put on the
1047            # stack, and fetch the object back from the memo.
1048            write(POP_MARK + self.get(self.memo[id(obj)][0]))
1049            return
1050
1051        write(FROZENSET)
1052        self.memoize(obj)
1053    dispatch[frozenset] = save_frozenset
1054
1055    def save_global(self, obj, name=None):
1056        write = self.write
1057        memo = self.memo
1058
1059        if name is None:
1060            name = getattr(obj, '__qualname__', None)
1061        if name is None:
1062            name = obj.__name__
1063
1064        module_name = whichmodule(obj, name)
1065        try:
1066            __import__(module_name, level=0)
1067            module = sys.modules[module_name]
1068            obj2, parent = _getattribute(module, name)
1069        except (ImportError, KeyError, AttributeError):
1070            raise PicklingError(
1071                "Can't pickle %r: it's not found as %s.%s" %
1072                (obj, module_name, name)) from None
1073        else:
1074            if obj2 is not obj:
1075                raise PicklingError(
1076                    "Can't pickle %r: it's not the same object as %s.%s" %
1077                    (obj, module_name, name))
1078
1079        if self.proto >= 2:
1080            code = _extension_registry.get((module_name, name))
1081            if code:
1082                assert code > 0
1083                if code <= 0xff:
1084                    write(EXT1 + pack("<B", code))
1085                elif code <= 0xffff:
1086                    write(EXT2 + pack("<H", code))
1087                else:
1088                    write(EXT4 + pack("<i", code))
1089                return
1090        lastname = name.rpartition('.')[2]
1091        if parent is module:
1092            name = lastname
1093        # Non-ASCII identifiers are supported only with protocols >= 3.
1094        if self.proto >= 4:
1095            self.save(module_name)
1096            self.save(name)
1097            write(STACK_GLOBAL)
1098        elif parent is not module:
1099            self.save_reduce(getattr, (parent, lastname))
1100        elif self.proto >= 3:
1101            write(GLOBAL + bytes(module_name, "utf-8") + b'\n' +
1102                  bytes(name, "utf-8") + b'\n')
1103        else:
1104            if self.fix_imports:
1105                r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING
1106                r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING
1107                if (module_name, name) in r_name_mapping:
1108                    module_name, name = r_name_mapping[(module_name, name)]
1109                elif module_name in r_import_mapping:
1110                    module_name = r_import_mapping[module_name]
1111            try:
1112                write(GLOBAL + bytes(module_name, "ascii") + b'\n' +
1113                      bytes(name, "ascii") + b'\n')
1114            except UnicodeEncodeError:
1115                raise PicklingError(
1116                    "can't pickle global identifier '%s.%s' using "
1117                    "pickle protocol %i" % (module, name, self.proto)) from None
1118
1119        self.memoize(obj)
1120
1121    def save_type(self, obj):
1122        if obj is type(None):
1123            return self.save_reduce(type, (None,), obj=obj)
1124        elif obj is type(NotImplemented):
1125            return self.save_reduce(type, (NotImplemented,), obj=obj)
1126        elif obj is type(...):
1127            return self.save_reduce(type, (...,), obj=obj)
1128        return self.save_global(obj)
1129
1130    dispatch[FunctionType] = save_global
1131    dispatch[type] = save_type
1132
1133
1134# Unpickling machinery
1135
1136class _Unpickler:
1137
1138    def __init__(self, file, *, fix_imports=True,
1139                 encoding="ASCII", errors="strict", buffers=None):
1140        """This takes a binary file for reading a pickle data stream.
1141
1142        The protocol version of the pickle is detected automatically, so
1143        no proto argument is needed.
1144
1145        The argument *file* must have two methods, a read() method that
1146        takes an integer argument, and a readline() method that requires
1147        no arguments.  Both methods should return bytes.  Thus *file*
1148        can be a binary file object opened for reading, an io.BytesIO
1149        object, or any other custom object that meets this interface.
1150
1151        The file-like object must have two methods, a read() method
1152        that takes an integer argument, and a readline() method that
1153        requires no arguments.  Both methods should return bytes.
1154        Thus file-like object can be a binary file object opened for
1155        reading, a BytesIO object, or any other custom object that
1156        meets this interface.
1157
1158        If *buffers* is not None, it should be an iterable of buffer-enabled
1159        objects that is consumed each time the pickle stream references
1160        an out-of-band buffer view.  Such buffers have been given in order
1161        to the *buffer_callback* of a Pickler object.
1162
1163        If *buffers* is None (the default), then the buffers are taken
1164        from the pickle stream, assuming they are serialized there.
1165        It is an error for *buffers* to be None if the pickle stream
1166        was produced with a non-None *buffer_callback*.
1167
1168        Other optional arguments are *fix_imports*, *encoding* and
1169        *errors*, which are used to control compatibility support for
1170        pickle stream generated by Python 2.  If *fix_imports* is True,
1171        pickle will try to map the old Python 2 names to the new names
1172        used in Python 3.  The *encoding* and *errors* tell pickle how
1173        to decode 8-bit string instances pickled by Python 2; these
1174        default to 'ASCII' and 'strict', respectively. *encoding* can be
1175        'bytes' to read theses 8-bit string instances as bytes objects.
1176        """
1177        self._buffers = iter(buffers) if buffers is not None else None
1178        self._file_readline = file.readline
1179        self._file_read = file.read
1180        self.memo = {}
1181        self.encoding = encoding
1182        self.errors = errors
1183        self.proto = 0
1184        self.fix_imports = fix_imports
1185
1186    def load(self):
1187        """Read a pickled object representation from the open file.
1188
1189        Return the reconstituted object hierarchy specified in the file.
1190        """
1191        # Check whether Unpickler was initialized correctly. This is
1192        # only needed to mimic the behavior of _pickle.Unpickler.dump().
1193        if not hasattr(self, "_file_read"):
1194            raise UnpicklingError("Unpickler.__init__() was not called by "
1195                                  "%s.__init__()" % (self.__class__.__name__,))
1196        self._unframer = _Unframer(self._file_read, self._file_readline)
1197        self.read = self._unframer.read
1198        self.readinto = self._unframer.readinto
1199        self.readline = self._unframer.readline
1200        self.metastack = []
1201        self.stack = []
1202        self.append = self.stack.append
1203        self.proto = 0
1204        read = self.read
1205        dispatch = self.dispatch
1206        try:
1207            while True:
1208                key = read(1)
1209                if not key:
1210                    raise EOFError
1211                assert isinstance(key, bytes_types)
1212                dispatch[key[0]](self)
1213        except _Stop as stopinst:
1214            return stopinst.value
1215
1216    # Return a list of items pushed in the stack after last MARK instruction.
1217    def pop_mark(self):
1218        items = self.stack
1219        self.stack = self.metastack.pop()
1220        self.append = self.stack.append
1221        return items
1222
1223    def persistent_load(self, pid):
1224        raise UnpicklingError("unsupported persistent id encountered")
1225
1226    dispatch = {}
1227
1228    def load_proto(self):
1229        proto = self.read(1)[0]
1230        if not 0 <= proto <= HIGHEST_PROTOCOL:
1231            raise ValueError("unsupported pickle protocol: %d" % proto)
1232        self.proto = proto
1233    dispatch[PROTO[0]] = load_proto
1234
1235    def load_frame(self):
1236        frame_size, = unpack('<Q', self.read(8))
1237        if frame_size > sys.maxsize:
1238            raise ValueError("frame size > sys.maxsize: %d" % frame_size)
1239        self._unframer.load_frame(frame_size)
1240    dispatch[FRAME[0]] = load_frame
1241
1242    def load_persid(self):
1243        try:
1244            pid = self.readline()[:-1].decode("ascii")
1245        except UnicodeDecodeError:
1246            raise UnpicklingError(
1247                "persistent IDs in protocol 0 must be ASCII strings")
1248        self.append(self.persistent_load(pid))
1249    dispatch[PERSID[0]] = load_persid
1250
1251    def load_binpersid(self):
1252        pid = self.stack.pop()
1253        self.append(self.persistent_load(pid))
1254    dispatch[BINPERSID[0]] = load_binpersid
1255
1256    def load_none(self):
1257        self.append(None)
1258    dispatch[NONE[0]] = load_none
1259
1260    def load_false(self):
1261        self.append(False)
1262    dispatch[NEWFALSE[0]] = load_false
1263
1264    def load_true(self):
1265        self.append(True)
1266    dispatch[NEWTRUE[0]] = load_true
1267
1268    def load_int(self):
1269        data = self.readline()
1270        if data == FALSE[1:]:
1271            val = False
1272        elif data == TRUE[1:]:
1273            val = True
1274        else:
1275            val = int(data, 0)
1276        self.append(val)
1277    dispatch[INT[0]] = load_int
1278
1279    def load_binint(self):
1280        self.append(unpack('<i', self.read(4))[0])
1281    dispatch[BININT[0]] = load_binint
1282
1283    def load_binint1(self):
1284        self.append(self.read(1)[0])
1285    dispatch[BININT1[0]] = load_binint1
1286
1287    def load_binint2(self):
1288        self.append(unpack('<H', self.read(2))[0])
1289    dispatch[BININT2[0]] = load_binint2
1290
1291    def load_long(self):
1292        val = self.readline()[:-1]
1293        if val and val[-1] == b'L'[0]:
1294            val = val[:-1]
1295        self.append(int(val, 0))
1296    dispatch[LONG[0]] = load_long
1297
1298    def load_long1(self):
1299        n = self.read(1)[0]
1300        data = self.read(n)
1301        self.append(decode_long(data))
1302    dispatch[LONG1[0]] = load_long1
1303
1304    def load_long4(self):
1305        n, = unpack('<i', self.read(4))
1306        if n < 0:
1307            # Corrupt or hostile pickle -- we never write one like this
1308            raise UnpicklingError("LONG pickle has negative byte count")
1309        data = self.read(n)
1310        self.append(decode_long(data))
1311    dispatch[LONG4[0]] = load_long4
1312
1313    def load_float(self):
1314        self.append(float(self.readline()[:-1]))
1315    dispatch[FLOAT[0]] = load_float
1316
1317    def load_binfloat(self):
1318        self.append(unpack('>d', self.read(8))[0])
1319    dispatch[BINFLOAT[0]] = load_binfloat
1320
1321    def _decode_string(self, value):
1322        # Used to allow strings from Python 2 to be decoded either as
1323        # bytes or Unicode strings.  This should be used only with the
1324        # STRING, BINSTRING and SHORT_BINSTRING opcodes.
1325        if self.encoding == "bytes":
1326            return value
1327        else:
1328            return value.decode(self.encoding, self.errors)
1329
1330    def load_string(self):
1331        data = self.readline()[:-1]
1332        # Strip outermost quotes
1333        if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'':
1334            data = data[1:-1]
1335        else:
1336            raise UnpicklingError("the STRING opcode argument must be quoted")
1337        self.append(self._decode_string(codecs.escape_decode(data)[0]))
1338    dispatch[STRING[0]] = load_string
1339
1340    def load_binstring(self):
1341        # Deprecated BINSTRING uses signed 32-bit length
1342        len, = unpack('<i', self.read(4))
1343        if len < 0:
1344            raise UnpicklingError("BINSTRING pickle has negative byte count")
1345        data = self.read(len)
1346        self.append(self._decode_string(data))
1347    dispatch[BINSTRING[0]] = load_binstring
1348
1349    def load_binbytes(self):
1350        len, = unpack('<I', self.read(4))
1351        if len > maxsize:
1352            raise UnpicklingError("BINBYTES exceeds system's maximum size "
1353                                  "of %d bytes" % maxsize)
1354        self.append(self.read(len))
1355    dispatch[BINBYTES[0]] = load_binbytes
1356
1357    def load_unicode(self):
1358        self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
1359    dispatch[UNICODE[0]] = load_unicode
1360
1361    def load_binunicode(self):
1362        len, = unpack('<I', self.read(4))
1363        if len > maxsize:
1364            raise UnpicklingError("BINUNICODE exceeds system's maximum size "
1365                                  "of %d bytes" % maxsize)
1366        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1367    dispatch[BINUNICODE[0]] = load_binunicode
1368
1369    def load_binunicode8(self):
1370        len, = unpack('<Q', self.read(8))
1371        if len > maxsize:
1372            raise UnpicklingError("BINUNICODE8 exceeds system's maximum size "
1373                                  "of %d bytes" % maxsize)
1374        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1375    dispatch[BINUNICODE8[0]] = load_binunicode8
1376
1377    def load_binbytes8(self):
1378        len, = unpack('<Q', self.read(8))
1379        if len > maxsize:
1380            raise UnpicklingError("BINBYTES8 exceeds system's maximum size "
1381                                  "of %d bytes" % maxsize)
1382        self.append(self.read(len))
1383    dispatch[BINBYTES8[0]] = load_binbytes8
1384
1385    def load_bytearray8(self):
1386        len, = unpack('<Q', self.read(8))
1387        if len > maxsize:
1388            raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
1389                                  "of %d bytes" % maxsize)
1390        b = bytearray(len)
1391        self.readinto(b)
1392        self.append(b)
1393    dispatch[BYTEARRAY8[0]] = load_bytearray8
1394
1395    def load_next_buffer(self):
1396        if self._buffers is None:
1397            raise UnpicklingError("pickle stream refers to out-of-band data "
1398                                  "but no *buffers* argument was given")
1399        try:
1400            buf = next(self._buffers)
1401        except StopIteration:
1402            raise UnpicklingError("not enough out-of-band buffers")
1403        self.append(buf)
1404    dispatch[NEXT_BUFFER[0]] = load_next_buffer
1405
1406    def load_readonly_buffer(self):
1407        buf = self.stack[-1]
1408        with memoryview(buf) as m:
1409            if not m.readonly:
1410                self.stack[-1] = m.toreadonly()
1411    dispatch[READONLY_BUFFER[0]] = load_readonly_buffer
1412
1413    def load_short_binstring(self):
1414        len = self.read(1)[0]
1415        data = self.read(len)
1416        self.append(self._decode_string(data))
1417    dispatch[SHORT_BINSTRING[0]] = load_short_binstring
1418
1419    def load_short_binbytes(self):
1420        len = self.read(1)[0]
1421        self.append(self.read(len))
1422    dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
1423
1424    def load_short_binunicode(self):
1425        len = self.read(1)[0]
1426        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1427    dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
1428
1429    def load_tuple(self):
1430        items = self.pop_mark()
1431        self.append(tuple(items))
1432    dispatch[TUPLE[0]] = load_tuple
1433
1434    def load_empty_tuple(self):
1435        self.append(())
1436    dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
1437
1438    def load_tuple1(self):
1439        self.stack[-1] = (self.stack[-1],)
1440    dispatch[TUPLE1[0]] = load_tuple1
1441
1442    def load_tuple2(self):
1443        self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1444    dispatch[TUPLE2[0]] = load_tuple2
1445
1446    def load_tuple3(self):
1447        self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1448    dispatch[TUPLE3[0]] = load_tuple3
1449
1450    def load_empty_list(self):
1451        self.append([])
1452    dispatch[EMPTY_LIST[0]] = load_empty_list
1453
1454    def load_empty_dictionary(self):
1455        self.append({})
1456    dispatch[EMPTY_DICT[0]] = load_empty_dictionary
1457
1458    def load_empty_set(self):
1459        self.append(set())
1460    dispatch[EMPTY_SET[0]] = load_empty_set
1461
1462    def load_frozenset(self):
1463        items = self.pop_mark()
1464        self.append(frozenset(items))
1465    dispatch[FROZENSET[0]] = load_frozenset
1466
1467    def load_list(self):
1468        items = self.pop_mark()
1469        self.append(items)
1470    dispatch[LIST[0]] = load_list
1471
1472    def load_dict(self):
1473        items = self.pop_mark()
1474        d = {items[i]: items[i+1]
1475             for i in range(0, len(items), 2)}
1476        self.append(d)
1477    dispatch[DICT[0]] = load_dict
1478
1479    # INST and OBJ differ only in how they get a class object.  It's not
1480    # only sensible to do the rest in a common routine, the two routines
1481    # previously diverged and grew different bugs.
1482    # klass is the class to instantiate, and k points to the topmost mark
1483    # object, following which are the arguments for klass.__init__.
1484    def _instantiate(self, klass, args):
1485        if (args or not isinstance(klass, type) or
1486            hasattr(klass, "__getinitargs__")):
1487            try:
1488                value = klass(*args)
1489            except TypeError as err:
1490                raise TypeError("in constructor for %s: %s" %
1491                                (klass.__name__, str(err)), sys.exc_info()[2])
1492        else:
1493            value = klass.__new__(klass)
1494        self.append(value)
1495
1496    def load_inst(self):
1497        module = self.readline()[:-1].decode("ascii")
1498        name = self.readline()[:-1].decode("ascii")
1499        klass = self.find_class(module, name)
1500        self._instantiate(klass, self.pop_mark())
1501    dispatch[INST[0]] = load_inst
1502
1503    def load_obj(self):
1504        # Stack is ... markobject classobject arg1 arg2 ...
1505        args = self.pop_mark()
1506        cls = args.pop(0)
1507        self._instantiate(cls, args)
1508    dispatch[OBJ[0]] = load_obj
1509
1510    def load_newobj(self):
1511        args = self.stack.pop()
1512        cls = self.stack.pop()
1513        obj = cls.__new__(cls, *args)
1514        self.append(obj)
1515    dispatch[NEWOBJ[0]] = load_newobj
1516
1517    def load_newobj_ex(self):
1518        kwargs = self.stack.pop()
1519        args = self.stack.pop()
1520        cls = self.stack.pop()
1521        obj = cls.__new__(cls, *args, **kwargs)
1522        self.append(obj)
1523    dispatch[NEWOBJ_EX[0]] = load_newobj_ex
1524
1525    def load_global(self):
1526        module = self.readline()[:-1].decode("utf-8")
1527        name = self.readline()[:-1].decode("utf-8")
1528        klass = self.find_class(module, name)
1529        self.append(klass)
1530    dispatch[GLOBAL[0]] = load_global
1531
1532    def load_stack_global(self):
1533        name = self.stack.pop()
1534        module = self.stack.pop()
1535        if type(name) is not str or type(module) is not str:
1536            raise UnpicklingError("STACK_GLOBAL requires str")
1537        self.append(self.find_class(module, name))
1538    dispatch[STACK_GLOBAL[0]] = load_stack_global
1539
1540    def load_ext1(self):
1541        code = self.read(1)[0]
1542        self.get_extension(code)
1543    dispatch[EXT1[0]] = load_ext1
1544
1545    def load_ext2(self):
1546        code, = unpack('<H', self.read(2))
1547        self.get_extension(code)
1548    dispatch[EXT2[0]] = load_ext2
1549
1550    def load_ext4(self):
1551        code, = unpack('<i', self.read(4))
1552        self.get_extension(code)
1553    dispatch[EXT4[0]] = load_ext4
1554
1555    def get_extension(self, code):
1556        nil = []
1557        obj = _extension_cache.get(code, nil)
1558        if obj is not nil:
1559            self.append(obj)
1560            return
1561        key = _inverted_registry.get(code)
1562        if not key:
1563            if code <= 0: # note that 0 is forbidden
1564                # Corrupt or hostile pickle.
1565                raise UnpicklingError("EXT specifies code <= 0")
1566            raise ValueError("unregistered extension code %d" % code)
1567        obj = self.find_class(*key)
1568        _extension_cache[code] = obj
1569        self.append(obj)
1570
1571    def find_class(self, module, name):
1572        # Subclasses may override this.
1573        sys.audit('pickle.find_class', module, name)
1574        if self.proto < 3 and self.fix_imports:
1575            if (module, name) in _compat_pickle.NAME_MAPPING:
1576                module, name = _compat_pickle.NAME_MAPPING[(module, name)]
1577            elif module in _compat_pickle.IMPORT_MAPPING:
1578                module = _compat_pickle.IMPORT_MAPPING[module]
1579        __import__(module, level=0)
1580        if self.proto >= 4:
1581            return _getattribute(sys.modules[module], name)[0]
1582        else:
1583            return getattr(sys.modules[module], name)
1584
1585    def load_reduce(self):
1586        stack = self.stack
1587        args = stack.pop()
1588        func = stack[-1]
1589        stack[-1] = func(*args)
1590    dispatch[REDUCE[0]] = load_reduce
1591
1592    def load_pop(self):
1593        if self.stack:
1594            del self.stack[-1]
1595        else:
1596            self.pop_mark()
1597    dispatch[POP[0]] = load_pop
1598
1599    def load_pop_mark(self):
1600        self.pop_mark()
1601    dispatch[POP_MARK[0]] = load_pop_mark
1602
1603    def load_dup(self):
1604        self.append(self.stack[-1])
1605    dispatch[DUP[0]] = load_dup
1606
1607    def load_get(self):
1608        i = int(self.readline()[:-1])
1609        try:
1610            self.append(self.memo[i])
1611        except KeyError:
1612            msg = f'Memo value not found at index {i}'
1613            raise UnpicklingError(msg) from None
1614    dispatch[GET[0]] = load_get
1615
1616    def load_binget(self):
1617        i = self.read(1)[0]
1618        try:
1619            self.append(self.memo[i])
1620        except KeyError as exc:
1621            msg = f'Memo value not found at index {i}'
1622            raise UnpicklingError(msg) from None
1623    dispatch[BINGET[0]] = load_binget
1624
1625    def load_long_binget(self):
1626        i, = unpack('<I', self.read(4))
1627        try:
1628            self.append(self.memo[i])
1629        except KeyError as exc:
1630            msg = f'Memo value not found at index {i}'
1631            raise UnpicklingError(msg) from None
1632    dispatch[LONG_BINGET[0]] = load_long_binget
1633
1634    def load_put(self):
1635        i = int(self.readline()[:-1])
1636        if i < 0:
1637            raise ValueError("negative PUT argument")
1638        self.memo[i] = self.stack[-1]
1639    dispatch[PUT[0]] = load_put
1640
1641    def load_binput(self):
1642        i = self.read(1)[0]
1643        if i < 0:
1644            raise ValueError("negative BINPUT argument")
1645        self.memo[i] = self.stack[-1]
1646    dispatch[BINPUT[0]] = load_binput
1647
1648    def load_long_binput(self):
1649        i, = unpack('<I', self.read(4))
1650        if i > maxsize:
1651            raise ValueError("negative LONG_BINPUT argument")
1652        self.memo[i] = self.stack[-1]
1653    dispatch[LONG_BINPUT[0]] = load_long_binput
1654
1655    def load_memoize(self):
1656        memo = self.memo
1657        memo[len(memo)] = self.stack[-1]
1658    dispatch[MEMOIZE[0]] = load_memoize
1659
1660    def load_append(self):
1661        stack = self.stack
1662        value = stack.pop()
1663        list = stack[-1]
1664        list.append(value)
1665    dispatch[APPEND[0]] = load_append
1666
1667    def load_appends(self):
1668        items = self.pop_mark()
1669        list_obj = self.stack[-1]
1670        try:
1671            extend = list_obj.extend
1672        except AttributeError:
1673            pass
1674        else:
1675            extend(items)
1676            return
1677        # Even if the PEP 307 requires extend() and append() methods,
1678        # fall back on append() if the object has no extend() method
1679        # for backward compatibility.
1680        append = list_obj.append
1681        for item in items:
1682            append(item)
1683    dispatch[APPENDS[0]] = load_appends
1684
1685    def load_setitem(self):
1686        stack = self.stack
1687        value = stack.pop()
1688        key = stack.pop()
1689        dict = stack[-1]
1690        dict[key] = value
1691    dispatch[SETITEM[0]] = load_setitem
1692
1693    def load_setitems(self):
1694        items = self.pop_mark()
1695        dict = self.stack[-1]
1696        for i in range(0, len(items), 2):
1697            dict[items[i]] = items[i + 1]
1698    dispatch[SETITEMS[0]] = load_setitems
1699
1700    def load_additems(self):
1701        items = self.pop_mark()
1702        set_obj = self.stack[-1]
1703        if isinstance(set_obj, set):
1704            set_obj.update(items)
1705        else:
1706            add = set_obj.add
1707            for item in items:
1708                add(item)
1709    dispatch[ADDITEMS[0]] = load_additems
1710
1711    def load_build(self):
1712        stack = self.stack
1713        state = stack.pop()
1714        inst = stack[-1]
1715        setstate = getattr(inst, "__setstate__", None)
1716        if setstate is not None:
1717            setstate(state)
1718            return
1719        slotstate = None
1720        if isinstance(state, tuple) and len(state) == 2:
1721            state, slotstate = state
1722        if state:
1723            inst_dict = inst.__dict__
1724            intern = sys.intern
1725            for k, v in state.items():
1726                if type(k) is str:
1727                    inst_dict[intern(k)] = v
1728                else:
1729                    inst_dict[k] = v
1730        if slotstate:
1731            for k, v in slotstate.items():
1732                setattr(inst, k, v)
1733    dispatch[BUILD[0]] = load_build
1734
1735    def load_mark(self):
1736        self.metastack.append(self.stack)
1737        self.stack = []
1738        self.append = self.stack.append
1739    dispatch[MARK[0]] = load_mark
1740
1741    def load_stop(self):
1742        value = self.stack.pop()
1743        raise _Stop(value)
1744    dispatch[STOP[0]] = load_stop
1745
1746
1747# Shorthands
1748
1749def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None):
1750    _Pickler(file, protocol, fix_imports=fix_imports,
1751             buffer_callback=buffer_callback).dump(obj)
1752
1753def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None):
1754    f = io.BytesIO()
1755    _Pickler(f, protocol, fix_imports=fix_imports,
1756             buffer_callback=buffer_callback).dump(obj)
1757    res = f.getvalue()
1758    assert isinstance(res, bytes_types)
1759    return res
1760
1761def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict",
1762          buffers=None):
1763    return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
1764                     encoding=encoding, errors=errors).load()
1765
1766def _loads(s, /, *, fix_imports=True, encoding="ASCII", errors="strict",
1767           buffers=None):
1768    if isinstance(s, str):
1769        raise TypeError("Can't load pickle from unicode string")
1770    file = io.BytesIO(s)
1771    return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
1772                      encoding=encoding, errors=errors).load()
1773
1774# Use the faster _pickle if possible
1775try:
1776    from _pickle import (
1777        PickleError,
1778        PicklingError,
1779        UnpicklingError,
1780        Pickler,
1781        Unpickler,
1782        dump,
1783        dumps,
1784        load,
1785        loads
1786    )
1787except ImportError:
1788    Pickler, Unpickler = _Pickler, _Unpickler
1789    dump, dumps, load, loads = _dump, _dumps, _load, _loads
1790
1791# Doctest
1792def _test():
1793    import doctest
1794    return doctest.testmod()
1795
1796if __name__ == "__main__":
1797    import argparse
1798    parser = argparse.ArgumentParser(
1799        description='display contents of the pickle files')
1800    parser.add_argument(
1801        'pickle_file', type=argparse.FileType('br'),
1802        nargs='*', help='the pickle file')
1803    parser.add_argument(
1804        '-t', '--test', action='store_true',
1805        help='run self-test suite')
1806    parser.add_argument(
1807        '-v', action='store_true',
1808        help='run verbosely; only affects self-test run')
1809    args = parser.parse_args()
1810    if args.test:
1811        _test()
1812    else:
1813        if not args.pickle_file:
1814            parser.print_help()
1815        else:
1816            import pprint
1817            for f in args.pickle_file:
1818                obj = load(f)
1819                pprint.pprint(obj)
1820