• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Create portable serialized representations of Python objects.
2
3See module copyreg for a mechanism for registering custom picklers.
4See module pickletools source for extensive comments.
5
6Classes:
7
8    Pickler
9    Unpickler
10
11Functions:
12
13    dump(object, file)
14    dumps(object) -> string
15    load(file) -> object
16    loads(bytes) -> object
17
18Misc variables:
19
20    __version__
21    format_version
22    compatible_formats
23
24"""
25
26from types import FunctionType
27from copyreg import dispatch_table
28from copyreg import _extension_registry, _inverted_registry, _extension_cache
29from itertools import islice
30from functools import partial
31import sys
32from sys import maxsize
33from struct import pack, unpack
34import re
35import io
36import codecs
37import _compat_pickle
38
39__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
40           "Unpickler", "dump", "dumps", "load", "loads"]
41
42try:
43    from _pickle import PickleBuffer
44    __all__.append("PickleBuffer")
45    _HAVE_PICKLE_BUFFER = True
46except ImportError:
47    _HAVE_PICKLE_BUFFER = False
48
49
50# Shortcut for use in isinstance testing
51bytes_types = (bytes, bytearray)
52
53# These are purely informational; no code uses these.
54format_version = "4.0"                  # File format version we write
55compatible_formats = ["1.0",            # Original protocol 0
56                      "1.1",            # Protocol 0 with INST added
57                      "1.2",            # Original protocol 1
58                      "1.3",            # Protocol 1 with BINFLOAT added
59                      "2.0",            # Protocol 2
60                      "3.0",            # Protocol 3
61                      "4.0",            # Protocol 4
62                      "5.0",            # Protocol 5
63                      ]                 # Old format versions we can read
64
65# This is the highest protocol number we know how to read.
66HIGHEST_PROTOCOL = 5
67
68# The protocol we write by default.  May be less than HIGHEST_PROTOCOL.
69# Only bump this if the oldest still supported version of Python already
70# includes it.
71DEFAULT_PROTOCOL = 4
72
73class PickleError(Exception):
74    """A common base class for the other pickling exceptions."""
75    pass
76
77class PicklingError(PickleError):
78    """This exception is raised when an unpicklable object is passed to the
79    dump() method.
80
81    """
82    pass
83
84class UnpicklingError(PickleError):
85    """This exception is raised when there is a problem unpickling an object,
86    such as a security violation.
87
88    Note that other exceptions may also be raised during unpickling, including
89    (but not necessarily limited to) AttributeError, EOFError, ImportError,
90    and IndexError.
91
92    """
93    pass
94
95# An instance of _Stop is raised by Unpickler.load_stop() in response to
96# the STOP opcode, passing the object that is the result of unpickling.
97class _Stop(Exception):
98    def __init__(self, value):
99        self.value = value
100
101# Pickle opcodes.  See pickletools.py for extensive docs.  The listing
102# here is in kind-of alphabetical order of 1-character pickle code.
103# pickletools groups them by purpose.
104
105MARK           = b'('   # push special markobject on stack
106STOP           = b'.'   # every pickle ends with STOP
107POP            = b'0'   # discard topmost stack item
108POP_MARK       = b'1'   # discard stack top through topmost markobject
109DUP            = b'2'   # duplicate top stack item
110FLOAT          = b'F'   # push float object; decimal string argument
111INT            = b'I'   # push integer or bool; decimal string argument
112BININT         = b'J'   # push four-byte signed int
113BININT1        = b'K'   # push 1-byte unsigned int
114LONG           = b'L'   # push long; decimal string argument
115BININT2        = b'M'   # push 2-byte unsigned int
116NONE           = b'N'   # push None
117PERSID         = b'P'   # push persistent object; id is taken from string arg
118BINPERSID      = b'Q'   #  "       "         "  ;  "  "   "     "  stack
119REDUCE         = b'R'   # apply callable to argtuple, both on stack
120STRING         = b'S'   # push string; NL-terminated string argument
121BINSTRING      = b'T'   # push string; counted binary string argument
122SHORT_BINSTRING= b'U'   #  "     "   ;    "      "       "      " < 256 bytes
123UNICODE        = b'V'   # push Unicode string; raw-unicode-escaped'd argument
124BINUNICODE     = b'X'   #   "     "       "  ; counted UTF-8 string argument
125APPEND         = b'a'   # append stack top to list below it
126BUILD          = b'b'   # call __setstate__ or __dict__.update()
127GLOBAL         = b'c'   # push self.find_class(modname, name); 2 string args
128DICT           = b'd'   # build a dict from stack items
129EMPTY_DICT     = b'}'   # push empty dict
130APPENDS        = b'e'   # extend list on stack by topmost stack slice
131GET            = b'g'   # push item from memo on stack; index is string arg
132BINGET         = b'h'   #   "    "    "    "   "   "  ;   "    " 1-byte arg
133INST           = b'i'   # build & push class instance
134LONG_BINGET    = b'j'   # push item from memo on stack; index is 4-byte arg
135LIST           = b'l'   # build list from topmost stack items
136EMPTY_LIST     = b']'   # push empty list
137OBJ            = b'o'   # build & push class instance
138PUT            = b'p'   # store stack top in memo; index is string arg
139BINPUT         = b'q'   #   "     "    "   "   " ;   "    " 1-byte arg
140LONG_BINPUT    = b'r'   #   "     "    "   "   " ;   "    " 4-byte arg
141SETITEM        = b's'   # add key+value pair to dict
142TUPLE          = b't'   # build tuple from topmost stack items
143EMPTY_TUPLE    = b')'   # push empty tuple
144SETITEMS       = b'u'   # modify dict by adding topmost key+value pairs
145BINFLOAT       = b'G'   # push float; arg is 8-byte float encoding
146
147TRUE           = b'I01\n'  # not an opcode; see INT docs in pickletools.py
148FALSE          = b'I00\n'  # not an opcode; see INT docs in pickletools.py
149
150# Protocol 2
151
152PROTO          = b'\x80'  # identify pickle protocol
153NEWOBJ         = b'\x81'  # build object by applying cls.__new__ to argtuple
154EXT1           = b'\x82'  # push object from extension registry; 1-byte index
155EXT2           = b'\x83'  # ditto, but 2-byte index
156EXT4           = b'\x84'  # ditto, but 4-byte index
157TUPLE1         = b'\x85'  # build 1-tuple from stack top
158TUPLE2         = b'\x86'  # build 2-tuple from two topmost stack items
159TUPLE3         = b'\x87'  # build 3-tuple from three topmost stack items
160NEWTRUE        = b'\x88'  # push True
161NEWFALSE       = b'\x89'  # push False
162LONG1          = b'\x8a'  # push long from < 256 bytes
163LONG4          = b'\x8b'  # push really big long
164
165_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
166
167# Protocol 3 (Python 3.x)
168
169BINBYTES       = b'B'   # push bytes; counted binary string argument
170SHORT_BINBYTES = b'C'   #  "     "   ;    "      "       "      " < 256 bytes
171
172# Protocol 4
173
174SHORT_BINUNICODE = b'\x8c'  # push short string; UTF-8 length < 256 bytes
175BINUNICODE8      = b'\x8d'  # push very long string
176BINBYTES8        = b'\x8e'  # push very long bytes string
177EMPTY_SET        = b'\x8f'  # push empty set on the stack
178ADDITEMS         = b'\x90'  # modify set by adding topmost stack items
179FROZENSET        = b'\x91'  # build frozenset from topmost stack items
180NEWOBJ_EX        = b'\x92'  # like NEWOBJ but work with keyword only arguments
181STACK_GLOBAL     = b'\x93'  # same as GLOBAL but using names on the stacks
182MEMOIZE          = b'\x94'  # store top of the stack in memo
183FRAME            = b'\x95'  # indicate the beginning of a new frame
184
185# Protocol 5
186
187BYTEARRAY8       = b'\x96'  # push bytearray
188NEXT_BUFFER      = b'\x97'  # push next out-of-band buffer
189READONLY_BUFFER  = b'\x98'  # make top of stack readonly
190
191__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
192
193
194class _Framer:
195
196    _FRAME_SIZE_MIN = 4
197    _FRAME_SIZE_TARGET = 64 * 1024
198
199    def __init__(self, file_write):
200        self.file_write = file_write
201        self.current_frame = None
202
203    def start_framing(self):
204        self.current_frame = io.BytesIO()
205
206    def end_framing(self):
207        if self.current_frame and self.current_frame.tell() > 0:
208            self.commit_frame(force=True)
209            self.current_frame = None
210
211    def commit_frame(self, force=False):
212        if self.current_frame:
213            f = self.current_frame
214            if f.tell() >= self._FRAME_SIZE_TARGET or force:
215                data = f.getbuffer()
216                write = self.file_write
217                if len(data) >= self._FRAME_SIZE_MIN:
218                    # Issue a single call to the write method of the underlying
219                    # file object for the frame opcode with the size of the
220                    # frame. The concatenation is expected to be less expensive
221                    # than issuing an additional call to write.
222                    write(FRAME + pack("<Q", len(data)))
223
224                # Issue a separate call to write to append the frame
225                # contents without concatenation to the above to avoid a
226                # memory copy.
227                write(data)
228
229                # Start the new frame with a new io.BytesIO instance so that
230                # the file object can have delayed access to the previous frame
231                # contents via an unreleased memoryview of the previous
232                # io.BytesIO instance.
233                self.current_frame = io.BytesIO()
234
235    def write(self, data):
236        if self.current_frame:
237            return self.current_frame.write(data)
238        else:
239            return self.file_write(data)
240
241    def write_large_bytes(self, header, payload):
242        write = self.file_write
243        if self.current_frame:
244            # Terminate the current frame and flush it to the file.
245            self.commit_frame(force=True)
246
247        # Perform direct write of the header and payload of the large binary
248        # object. Be careful not to concatenate the header and the payload
249        # prior to calling 'write' as we do not want to allocate a large
250        # temporary bytes object.
251        # We intentionally do not insert a protocol 4 frame opcode to make
252        # it possible to optimize file.read calls in the loader.
253        write(header)
254        write(payload)
255
256
257class _Unframer:
258
259    def __init__(self, file_read, file_readline, file_tell=None):
260        self.file_read = file_read
261        self.file_readline = file_readline
262        self.current_frame = None
263
264    def readinto(self, buf):
265        if self.current_frame:
266            n = self.current_frame.readinto(buf)
267            if n == 0 and len(buf) != 0:
268                self.current_frame = None
269                n = len(buf)
270                buf[:] = self.file_read(n)
271                return n
272            if n < len(buf):
273                raise UnpicklingError(
274                    "pickle exhausted before end of frame")
275            return n
276        else:
277            n = len(buf)
278            buf[:] = self.file_read(n)
279            return n
280
281    def read(self, n):
282        if self.current_frame:
283            data = self.current_frame.read(n)
284            if not data and n != 0:
285                self.current_frame = None
286                return self.file_read(n)
287            if len(data) < n:
288                raise UnpicklingError(
289                    "pickle exhausted before end of frame")
290            return data
291        else:
292            return self.file_read(n)
293
294    def readline(self):
295        if self.current_frame:
296            data = self.current_frame.readline()
297            if not data:
298                self.current_frame = None
299                return self.file_readline()
300            if data[-1] != b'\n'[0]:
301                raise UnpicklingError(
302                    "pickle exhausted before end of frame")
303            return data
304        else:
305            return self.file_readline()
306
307    def load_frame(self, frame_size):
308        if self.current_frame and self.current_frame.read() != b'':
309            raise UnpicklingError(
310                "beginning of a new frame before end of current frame")
311        self.current_frame = io.BytesIO(self.file_read(frame_size))
312
313
314# Tools used for pickling.
315
316def _getattribute(obj, name):
317    top = obj
318    for subpath in name.split('.'):
319        if subpath == '<locals>':
320            raise AttributeError("Can't get local attribute {!r} on {!r}"
321                                 .format(name, top))
322        try:
323            parent = obj
324            obj = getattr(obj, subpath)
325        except AttributeError:
326            raise AttributeError("Can't get attribute {!r} on {!r}"
327                                 .format(name, top)) from None
328    return obj, parent
329
330def whichmodule(obj, name):
331    """Find the module an object belong to."""
332    module_name = getattr(obj, '__module__', None)
333    if module_name is not None:
334        return module_name
335    # Protect the iteration by using a list copy of sys.modules against dynamic
336    # modules that trigger imports of other modules upon calls to getattr.
337    for module_name, module in sys.modules.copy().items():
338        if (module_name == '__main__'
339            or module_name == '__mp_main__'  # bpo-42406
340            or module is None):
341            continue
342        try:
343            if _getattribute(module, name)[0] is obj:
344                return module_name
345        except AttributeError:
346            pass
347    return '__main__'
348
349def encode_long(x):
350    r"""Encode a long to a two's complement little-endian binary string.
351    Note that 0 is a special case, returning an empty string, to save a
352    byte in the LONG1 pickling context.
353
354    >>> encode_long(0)
355    b''
356    >>> encode_long(255)
357    b'\xff\x00'
358    >>> encode_long(32767)
359    b'\xff\x7f'
360    >>> encode_long(-256)
361    b'\x00\xff'
362    >>> encode_long(-32768)
363    b'\x00\x80'
364    >>> encode_long(-128)
365    b'\x80'
366    >>> encode_long(127)
367    b'\x7f'
368    >>>
369    """
370    if x == 0:
371        return b''
372    nbytes = (x.bit_length() >> 3) + 1
373    result = x.to_bytes(nbytes, byteorder='little', signed=True)
374    if x < 0 and nbytes > 1:
375        if result[-1] == 0xff and (result[-2] & 0x80) != 0:
376            result = result[:-1]
377    return result
378
379def decode_long(data):
380    r"""Decode a long from a two's complement little-endian binary string.
381
382    >>> decode_long(b'')
383    0
384    >>> decode_long(b"\xff\x00")
385    255
386    >>> decode_long(b"\xff\x7f")
387    32767
388    >>> decode_long(b"\x00\xff")
389    -256
390    >>> decode_long(b"\x00\x80")
391    -32768
392    >>> decode_long(b"\x80")
393    -128
394    >>> decode_long(b"\x7f")
395    127
396    """
397    return int.from_bytes(data, byteorder='little', signed=True)
398
399
400_NoValue = object()
401
402# Pickling machinery
403
404class _Pickler:
405
406    def __init__(self, file, protocol=None, *, fix_imports=True,
407                 buffer_callback=None):
408        """This takes a binary file for writing a pickle data stream.
409
410        The optional *protocol* argument tells the pickler to use the
411        given protocol; supported protocols are 0, 1, 2, 3, 4 and 5.
412        The default protocol is 4. It was introduced in Python 3.4, and
413        is incompatible with previous versions.
414
415        Specifying a negative protocol version selects the highest
416        protocol version supported.  The higher the protocol used, the
417        more recent the version of Python needed to read the pickle
418        produced.
419
420        The *file* argument must have a write() method that accepts a
421        single bytes argument. It can thus be a file object opened for
422        binary writing, an io.BytesIO instance, or any other custom
423        object that meets this interface.
424
425        If *fix_imports* is True and *protocol* is less than 3, pickle
426        will try to map the new Python 3 names to the old module names
427        used in Python 2, so that the pickle data stream is readable
428        with Python 2.
429
430        If *buffer_callback* is None (the default), buffer views are
431        serialized into *file* as part of the pickle stream.
432
433        If *buffer_callback* is not None, then it can be called any number
434        of times with a buffer view.  If the callback returns a false value
435        (such as None), the given buffer is out-of-band; otherwise the
436        buffer is serialized in-band, i.e. inside the pickle stream.
437
438        It is an error if *buffer_callback* is not None and *protocol*
439        is None or smaller than 5.
440        """
441        if protocol is None:
442            protocol = DEFAULT_PROTOCOL
443        if protocol < 0:
444            protocol = HIGHEST_PROTOCOL
445        elif not 0 <= protocol <= HIGHEST_PROTOCOL:
446            raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
447        if buffer_callback is not None and protocol < 5:
448            raise ValueError("buffer_callback needs protocol >= 5")
449        self._buffer_callback = buffer_callback
450        try:
451            self._file_write = file.write
452        except AttributeError:
453            raise TypeError("file must have a 'write' attribute")
454        self.framer = _Framer(self._file_write)
455        self.write = self.framer.write
456        self._write_large_bytes = self.framer.write_large_bytes
457        self.memo = {}
458        self.proto = int(protocol)
459        self.bin = protocol >= 1
460        self.fast = 0
461        self.fix_imports = fix_imports and protocol < 3
462
463    def clear_memo(self):
464        """Clears the pickler's "memo".
465
466        The memo is the data structure that remembers which objects the
467        pickler has already seen, so that shared or recursive objects
468        are pickled by reference and not by value.  This method is
469        useful when re-using picklers.
470        """
471        self.memo.clear()
472
473    def dump(self, obj):
474        """Write a pickled representation of obj to the open file."""
475        # Check whether Pickler was initialized correctly. This is
476        # only needed to mimic the behavior of _pickle.Pickler.dump().
477        if not hasattr(self, "_file_write"):
478            raise PicklingError("Pickler.__init__() was not called by "
479                                "%s.__init__()" % (self.__class__.__name__,))
480        if self.proto >= 2:
481            self.write(PROTO + pack("<B", self.proto))
482        if self.proto >= 4:
483            self.framer.start_framing()
484        self.save(obj)
485        self.write(STOP)
486        self.framer.end_framing()
487
488    def memoize(self, obj):
489        """Store an object in the memo."""
490
491        # The Pickler memo is a dictionary mapping object ids to 2-tuples
492        # that contain the Unpickler memo key and the object being memoized.
493        # The memo key is written to the pickle and will become
494        # the key in the Unpickler's memo.  The object is stored in the
495        # Pickler memo so that transient objects are kept alive during
496        # pickling.
497
498        # The use of the Unpickler memo length as the memo key is just a
499        # convention.  The only requirement is that the memo values be unique.
500        # But there appears no advantage to any other scheme, and this
501        # scheme allows the Unpickler memo to be implemented as a plain (but
502        # growable) array, indexed by memo key.
503        if self.fast:
504            return
505        assert id(obj) not in self.memo
506        idx = len(self.memo)
507        self.write(self.put(idx))
508        self.memo[id(obj)] = idx, obj
509
510    # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
511    def put(self, idx):
512        if self.proto >= 4:
513            return MEMOIZE
514        elif self.bin:
515            if idx < 256:
516                return BINPUT + pack("<B", idx)
517            else:
518                return LONG_BINPUT + pack("<I", idx)
519        else:
520            return PUT + repr(idx).encode("ascii") + b'\n'
521
522    # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
523    def get(self, i):
524        if self.bin:
525            if i < 256:
526                return BINGET + pack("<B", i)
527            else:
528                return LONG_BINGET + pack("<I", i)
529
530        return GET + repr(i).encode("ascii") + b'\n'
531
532    def save(self, obj, save_persistent_id=True):
533        self.framer.commit_frame()
534
535        # Check for persistent id (defined by a subclass)
536        if save_persistent_id:
537            pid = self.persistent_id(obj)
538            if pid is not None:
539                self.save_pers(pid)
540                return
541
542        # Check the memo
543        x = self.memo.get(id(obj))
544        if x is not None:
545            self.write(self.get(x[0]))
546            return
547
548        rv = NotImplemented
549        reduce = getattr(self, "reducer_override", _NoValue)
550        if reduce is not _NoValue:
551            rv = reduce(obj)
552
553        if rv is NotImplemented:
554            # Check the type dispatch table
555            t = type(obj)
556            f = self.dispatch.get(t)
557            if f is not None:
558                f(self, obj)  # Call unbound method with explicit self
559                return
560
561            # Check private dispatch table if any, or else
562            # copyreg.dispatch_table
563            reduce = getattr(self, 'dispatch_table', dispatch_table).get(t, _NoValue)
564            if reduce is not _NoValue:
565                rv = reduce(obj)
566            else:
567                # Check for a class with a custom metaclass; treat as regular
568                # class
569                if issubclass(t, type):
570                    self.save_global(obj)
571                    return
572
573                # Check for a __reduce_ex__ method, fall back to __reduce__
574                reduce = getattr(obj, "__reduce_ex__", _NoValue)
575                if reduce is not _NoValue:
576                    rv = reduce(self.proto)
577                else:
578                    reduce = getattr(obj, "__reduce__", _NoValue)
579                    if reduce is not _NoValue:
580                        rv = reduce()
581                    else:
582                        raise PicklingError("Can't pickle %r object: %r" %
583                                            (t.__name__, obj))
584
585        # Check for string returned by reduce(), meaning "save as global"
586        if isinstance(rv, str):
587            self.save_global(obj, rv)
588            return
589
590        # Assert that reduce() returned a tuple
591        if not isinstance(rv, tuple):
592            raise PicklingError("%s must return string or tuple" % reduce)
593
594        # Assert that it returned an appropriately sized tuple
595        l = len(rv)
596        if not (2 <= l <= 6):
597            raise PicklingError("Tuple returned by %s must have "
598                                "two to six elements" % reduce)
599
600        # Save the reduce() output and finally memoize the object
601        self.save_reduce(obj=obj, *rv)
602
603    def persistent_id(self, obj):
604        # This exists so a subclass can override it
605        return None
606
607    def save_pers(self, pid):
608        # Save a persistent id reference
609        if self.bin:
610            self.save(pid, save_persistent_id=False)
611            self.write(BINPERSID)
612        else:
613            try:
614                self.write(PERSID + str(pid).encode("ascii") + b'\n')
615            except UnicodeEncodeError:
616                raise PicklingError(
617                    "persistent IDs in protocol 0 must be ASCII strings")
618
619    def save_reduce(self, func, args, state=None, listitems=None,
620                    dictitems=None, state_setter=None, *, obj=None):
621        # This API is called by some subclasses
622
623        if not isinstance(args, tuple):
624            raise PicklingError("args from save_reduce() must be a tuple")
625        if not callable(func):
626            raise PicklingError("func from save_reduce() must be callable")
627
628        save = self.save
629        write = self.write
630
631        func_name = getattr(func, "__name__", "")
632        if self.proto >= 2 and func_name == "__newobj_ex__":
633            cls, args, kwargs = args
634            if not hasattr(cls, "__new__"):
635                raise PicklingError("args[0] from {} args has no __new__"
636                                    .format(func_name))
637            if obj is not None and cls is not obj.__class__:
638                raise PicklingError("args[0] from {} args has the wrong class"
639                                    .format(func_name))
640            if self.proto >= 4:
641                save(cls)
642                save(args)
643                save(kwargs)
644                write(NEWOBJ_EX)
645            else:
646                func = partial(cls.__new__, cls, *args, **kwargs)
647                save(func)
648                save(())
649                write(REDUCE)
650        elif self.proto >= 2 and func_name == "__newobj__":
651            # A __reduce__ implementation can direct protocol 2 or newer to
652            # use the more efficient NEWOBJ opcode, while still
653            # allowing protocol 0 and 1 to work normally.  For this to
654            # work, the function returned by __reduce__ should be
655            # called __newobj__, and its first argument should be a
656            # class.  The implementation for __newobj__
657            # should be as follows, although pickle has no way to
658            # verify this:
659            #
660            # def __newobj__(cls, *args):
661            #     return cls.__new__(cls, *args)
662            #
663            # Protocols 0 and 1 will pickle a reference to __newobj__,
664            # while protocol 2 (and above) will pickle a reference to
665            # cls, the remaining args tuple, and the NEWOBJ code,
666            # which calls cls.__new__(cls, *args) at unpickling time
667            # (see load_newobj below).  If __reduce__ returns a
668            # three-tuple, the state from the third tuple item will be
669            # pickled regardless of the protocol, calling __setstate__
670            # at unpickling time (see load_build below).
671            #
672            # Note that no standard __newobj__ implementation exists;
673            # you have to provide your own.  This is to enforce
674            # compatibility with Python 2.2 (pickles written using
675            # protocol 0 or 1 in Python 2.3 should be unpicklable by
676            # Python 2.2).
677            cls = args[0]
678            if not hasattr(cls, "__new__"):
679                raise PicklingError(
680                    "args[0] from __newobj__ args has no __new__")
681            if obj is not None and cls is not obj.__class__:
682                raise PicklingError(
683                    "args[0] from __newobj__ args has the wrong class")
684            args = args[1:]
685            save(cls)
686            save(args)
687            write(NEWOBJ)
688        else:
689            save(func)
690            save(args)
691            write(REDUCE)
692
693        if obj is not None:
694            # If the object is already in the memo, this means it is
695            # recursive. In this case, throw away everything we put on the
696            # stack, and fetch the object back from the memo.
697            if id(obj) in self.memo:
698                write(POP + self.get(self.memo[id(obj)][0]))
699            else:
700                self.memoize(obj)
701
702        # More new special cases (that work with older protocols as
703        # well): when __reduce__ returns a tuple with 4 or 5 items,
704        # the 4th and 5th item should be iterators that provide list
705        # items and dict items (as (key, value) tuples), or None.
706
707        if listitems is not None:
708            self._batch_appends(listitems)
709
710        if dictitems is not None:
711            self._batch_setitems(dictitems)
712
713        if state is not None:
714            if state_setter is None:
715                save(state)
716                write(BUILD)
717            else:
718                # If a state_setter is specified, call it instead of load_build
719                # to update obj's with its previous state.
720                # First, push state_setter and its tuple of expected arguments
721                # (obj, state) onto the stack.
722                save(state_setter)
723                save(obj)  # simple BINGET opcode as obj is already memoized.
724                save(state)
725                write(TUPLE2)
726                # Trigger a state_setter(obj, state) function call.
727                write(REDUCE)
728                # The purpose of state_setter is to carry-out an
729                # inplace modification of obj. We do not care about what the
730                # method might return, so its output is eventually removed from
731                # the stack.
732                write(POP)
733
734    # Methods below this point are dispatched through the dispatch table
735
736    dispatch = {}
737
738    def save_none(self, obj):
739        self.write(NONE)
740    dispatch[type(None)] = save_none
741
742    def save_bool(self, obj):
743        if self.proto >= 2:
744            self.write(NEWTRUE if obj else NEWFALSE)
745        else:
746            self.write(TRUE if obj else FALSE)
747    dispatch[bool] = save_bool
748
749    def save_long(self, obj):
750        if self.bin:
751            # If the int is small enough to fit in a signed 4-byte 2's-comp
752            # format, we can store it more efficiently than the general
753            # case.
754            # First one- and two-byte unsigned ints:
755            if obj >= 0:
756                if obj <= 0xff:
757                    self.write(BININT1 + pack("<B", obj))
758                    return
759                if obj <= 0xffff:
760                    self.write(BININT2 + pack("<H", obj))
761                    return
762            # Next check for 4-byte signed ints:
763            if -0x80000000 <= obj <= 0x7fffffff:
764                self.write(BININT + pack("<i", obj))
765                return
766        if self.proto >= 2:
767            encoded = encode_long(obj)
768            n = len(encoded)
769            if n < 256:
770                self.write(LONG1 + pack("<B", n) + encoded)
771            else:
772                self.write(LONG4 + pack("<i", n) + encoded)
773            return
774        if -0x80000000 <= obj <= 0x7fffffff:
775            self.write(INT + repr(obj).encode("ascii") + b'\n')
776        else:
777            self.write(LONG + repr(obj).encode("ascii") + b'L\n')
778    dispatch[int] = save_long
779
780    def save_float(self, obj):
781        if self.bin:
782            self.write(BINFLOAT + pack('>d', obj))
783        else:
784            self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
785    dispatch[float] = save_float
786
787    def _save_bytes_no_memo(self, obj):
788        # helper for writing bytes objects for protocol >= 3
789        # without memoizing them
790        assert self.proto >= 3
791        n = len(obj)
792        if n <= 0xff:
793            self.write(SHORT_BINBYTES + pack("<B", n) + obj)
794        elif n > 0xffffffff and self.proto >= 4:
795            self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
796        elif n >= self.framer._FRAME_SIZE_TARGET:
797            self._write_large_bytes(BINBYTES + pack("<I", n), obj)
798        else:
799            self.write(BINBYTES + pack("<I", n) + obj)
800
801    def save_bytes(self, obj):
802        if self.proto < 3:
803            if not obj: # bytes object is empty
804                self.save_reduce(bytes, (), obj=obj)
805            else:
806                self.save_reduce(codecs.encode,
807                                 (str(obj, 'latin1'), 'latin1'), obj=obj)
808            return
809        self._save_bytes_no_memo(obj)
810        self.memoize(obj)
811    dispatch[bytes] = save_bytes
812
813    def _save_bytearray_no_memo(self, obj):
814        # helper for writing bytearray objects for protocol >= 5
815        # without memoizing them
816        assert self.proto >= 5
817        n = len(obj)
818        if n >= self.framer._FRAME_SIZE_TARGET:
819            self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
820        else:
821            self.write(BYTEARRAY8 + pack("<Q", n) + obj)
822
823    def save_bytearray(self, obj):
824        if self.proto < 5:
825            if not obj:  # bytearray is empty
826                self.save_reduce(bytearray, (), obj=obj)
827            else:
828                self.save_reduce(bytearray, (bytes(obj),), obj=obj)
829            return
830        self._save_bytearray_no_memo(obj)
831        self.memoize(obj)
832    dispatch[bytearray] = save_bytearray
833
834    if _HAVE_PICKLE_BUFFER:
835        def save_picklebuffer(self, obj):
836            if self.proto < 5:
837                raise PicklingError("PickleBuffer can only be pickled with "
838                                    "protocol >= 5")
839            with obj.raw() as m:
840                if not m.contiguous:
841                    raise PicklingError("PickleBuffer can not be pickled when "
842                                        "pointing to a non-contiguous buffer")
843                in_band = True
844                if self._buffer_callback is not None:
845                    in_band = bool(self._buffer_callback(obj))
846                if in_band:
847                    # Write data in-band
848                    # XXX The C implementation avoids a copy here
849                    buf = m.tobytes()
850                    in_memo = id(buf) in self.memo
851                    if m.readonly:
852                        if in_memo:
853                            self._save_bytes_no_memo(buf)
854                        else:
855                            self.save_bytes(buf)
856                    else:
857                        if in_memo:
858                            self._save_bytearray_no_memo(buf)
859                        else:
860                            self.save_bytearray(buf)
861                else:
862                    # Write data out-of-band
863                    self.write(NEXT_BUFFER)
864                    if m.readonly:
865                        self.write(READONLY_BUFFER)
866
867        dispatch[PickleBuffer] = save_picklebuffer
868
869    def save_str(self, obj):
870        if self.bin:
871            encoded = obj.encode('utf-8', 'surrogatepass')
872            n = len(encoded)
873            if n <= 0xff and self.proto >= 4:
874                self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
875            elif n > 0xffffffff and self.proto >= 4:
876                self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
877            elif n >= self.framer._FRAME_SIZE_TARGET:
878                self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
879            else:
880                self.write(BINUNICODE + pack("<I", n) + encoded)
881        else:
882            # Escape what raw-unicode-escape doesn't, but memoize the original.
883            tmp = obj.replace("\\", "\\u005c")
884            tmp = tmp.replace("\0", "\\u0000")
885            tmp = tmp.replace("\n", "\\u000a")
886            tmp = tmp.replace("\r", "\\u000d")
887            tmp = tmp.replace("\x1a", "\\u001a")  # EOF on DOS
888            self.write(UNICODE + tmp.encode('raw-unicode-escape') + b'\n')
889        self.memoize(obj)
890    dispatch[str] = save_str
891
892    def save_tuple(self, obj):
893        if not obj: # tuple is empty
894            if self.bin:
895                self.write(EMPTY_TUPLE)
896            else:
897                self.write(MARK + TUPLE)
898            return
899
900        n = len(obj)
901        save = self.save
902        memo = self.memo
903        if n <= 3 and self.proto >= 2:
904            for element in obj:
905                save(element)
906            # Subtle.  Same as in the big comment below.
907            if id(obj) in memo:
908                get = self.get(memo[id(obj)][0])
909                self.write(POP * n + get)
910            else:
911                self.write(_tuplesize2code[n])
912                self.memoize(obj)
913            return
914
915        # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
916        # has more than 3 elements.
917        write = self.write
918        write(MARK)
919        for element in obj:
920            save(element)
921
922        if id(obj) in memo:
923            # Subtle.  d was not in memo when we entered save_tuple(), so
924            # the process of saving the tuple's elements must have saved
925            # the tuple itself:  the tuple is recursive.  The proper action
926            # now is to throw away everything we put on the stack, and
927            # simply GET the tuple (it's already constructed).  This check
928            # could have been done in the "for element" loop instead, but
929            # recursive tuples are a rare thing.
930            get = self.get(memo[id(obj)][0])
931            if self.bin:
932                write(POP_MARK + get)
933            else:   # proto 0 -- POP_MARK not available
934                write(POP * (n+1) + get)
935            return
936
937        # No recursion.
938        write(TUPLE)
939        self.memoize(obj)
940
941    dispatch[tuple] = save_tuple
942
943    def save_list(self, obj):
944        if self.bin:
945            self.write(EMPTY_LIST)
946        else:   # proto 0 -- can't use EMPTY_LIST
947            self.write(MARK + LIST)
948
949        self.memoize(obj)
950        self._batch_appends(obj)
951
952    dispatch[list] = save_list
953
954    _BATCHSIZE = 1000
955
956    def _batch_appends(self, items):
957        # Helper to batch up APPENDS sequences
958        save = self.save
959        write = self.write
960
961        if not self.bin:
962            for x in items:
963                save(x)
964                write(APPEND)
965            return
966
967        it = iter(items)
968        while True:
969            tmp = list(islice(it, self._BATCHSIZE))
970            n = len(tmp)
971            if n > 1:
972                write(MARK)
973                for x in tmp:
974                    save(x)
975                write(APPENDS)
976            elif n:
977                save(tmp[0])
978                write(APPEND)
979            # else tmp is empty, and we're done
980            if n < self._BATCHSIZE:
981                return
982
983    def save_dict(self, obj):
984        if self.bin:
985            self.write(EMPTY_DICT)
986        else:   # proto 0 -- can't use EMPTY_DICT
987            self.write(MARK + DICT)
988
989        self.memoize(obj)
990        self._batch_setitems(obj.items())
991
992    dispatch[dict] = save_dict
993
994    def _batch_setitems(self, items):
995        # Helper to batch up SETITEMS sequences; proto >= 1 only
996        save = self.save
997        write = self.write
998
999        if not self.bin:
1000            for k, v in items:
1001                save(k)
1002                save(v)
1003                write(SETITEM)
1004            return
1005
1006        it = iter(items)
1007        while True:
1008            tmp = list(islice(it, self._BATCHSIZE))
1009            n = len(tmp)
1010            if n > 1:
1011                write(MARK)
1012                for k, v in tmp:
1013                    save(k)
1014                    save(v)
1015                write(SETITEMS)
1016            elif n:
1017                k, v = tmp[0]
1018                save(k)
1019                save(v)
1020                write(SETITEM)
1021            # else tmp is empty, and we're done
1022            if n < self._BATCHSIZE:
1023                return
1024
1025    def save_set(self, obj):
1026        save = self.save
1027        write = self.write
1028
1029        if self.proto < 4:
1030            self.save_reduce(set, (list(obj),), obj=obj)
1031            return
1032
1033        write(EMPTY_SET)
1034        self.memoize(obj)
1035
1036        it = iter(obj)
1037        while True:
1038            batch = list(islice(it, self._BATCHSIZE))
1039            n = len(batch)
1040            if n > 0:
1041                write(MARK)
1042                for item in batch:
1043                    save(item)
1044                write(ADDITEMS)
1045            if n < self._BATCHSIZE:
1046                return
1047    dispatch[set] = save_set
1048
1049    def save_frozenset(self, obj):
1050        save = self.save
1051        write = self.write
1052
1053        if self.proto < 4:
1054            self.save_reduce(frozenset, (list(obj),), obj=obj)
1055            return
1056
1057        write(MARK)
1058        for item in obj:
1059            save(item)
1060
1061        if id(obj) in self.memo:
1062            # If the object is already in the memo, this means it is
1063            # recursive. In this case, throw away everything we put on the
1064            # stack, and fetch the object back from the memo.
1065            write(POP_MARK + self.get(self.memo[id(obj)][0]))
1066            return
1067
1068        write(FROZENSET)
1069        self.memoize(obj)
1070    dispatch[frozenset] = save_frozenset
1071
1072    def save_global(self, obj, name=None):
1073        write = self.write
1074        memo = self.memo
1075
1076        if name is None:
1077            name = getattr(obj, '__qualname__', None)
1078        if name is None:
1079            name = obj.__name__
1080
1081        module_name = whichmodule(obj, name)
1082        try:
1083            __import__(module_name, level=0)
1084            module = sys.modules[module_name]
1085            obj2, parent = _getattribute(module, name)
1086        except (ImportError, KeyError, AttributeError):
1087            raise PicklingError(
1088                "Can't pickle %r: it's not found as %s.%s" %
1089                (obj, module_name, name)) from None
1090        else:
1091            if obj2 is not obj:
1092                raise PicklingError(
1093                    "Can't pickle %r: it's not the same object as %s.%s" %
1094                    (obj, module_name, name))
1095
1096        if self.proto >= 2:
1097            code = _extension_registry.get((module_name, name), _NoValue)
1098            if code is not _NoValue:
1099                if code <= 0xff:
1100                    data = pack("<B", code)
1101                    if data == b'\0':
1102                        # Should never happen in normal circumstances,
1103                        # since the type and the value of the code are
1104                        # checked in copyreg.add_extension().
1105                        raise RuntimeError("extension code 0 is out of range")
1106                    write(EXT1 + data)
1107                elif code <= 0xffff:
1108                    write(EXT2 + pack("<H", code))
1109                else:
1110                    write(EXT4 + pack("<i", code))
1111                return
1112        lastname = name.rpartition('.')[2]
1113        if parent is module:
1114            name = lastname
1115        # Non-ASCII identifiers are supported only with protocols >= 3.
1116        if self.proto >= 4:
1117            self.save(module_name)
1118            self.save(name)
1119            write(STACK_GLOBAL)
1120        elif '.' in name:
1121            # In protocol < 4, objects with multi-part __qualname__
1122            # are represented as
1123            # getattr(getattr(..., attrname1), attrname2).
1124            dotted_path = name.split('.')
1125            name = dotted_path.pop(0)
1126            save = self.save
1127            for attrname in dotted_path:
1128                save(getattr)
1129                if self.proto < 2:
1130                    write(MARK)
1131            self._save_toplevel_by_name(module_name, name)
1132            for attrname in dotted_path:
1133                save(attrname)
1134                if self.proto < 2:
1135                    write(TUPLE)
1136                else:
1137                    write(TUPLE2)
1138                write(REDUCE)
1139        else:
1140            self._save_toplevel_by_name(module_name, name)
1141
1142        self.memoize(obj)
1143
1144    def _save_toplevel_by_name(self, module_name, name):
1145        if self.proto >= 3:
1146            # Non-ASCII identifiers are supported only with protocols >= 3.
1147            self.write(GLOBAL + bytes(module_name, "utf-8") + b'\n' +
1148                       bytes(name, "utf-8") + b'\n')
1149        else:
1150            if self.fix_imports:
1151                r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING
1152                r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING
1153                if (module_name, name) in r_name_mapping:
1154                    module_name, name = r_name_mapping[(module_name, name)]
1155                elif module_name in r_import_mapping:
1156                    module_name = r_import_mapping[module_name]
1157            try:
1158                self.write(GLOBAL + bytes(module_name, "ascii") + b'\n' +
1159                           bytes(name, "ascii") + b'\n')
1160            except UnicodeEncodeError:
1161                raise PicklingError(
1162                    "can't pickle global identifier '%s.%s' using "
1163                    "pickle protocol %i" % (module_name, name, self.proto)) from None
1164
1165    def save_type(self, obj):
1166        if obj is type(None):
1167            return self.save_reduce(type, (None,), obj=obj)
1168        elif obj is type(NotImplemented):
1169            return self.save_reduce(type, (NotImplemented,), obj=obj)
1170        elif obj is type(...):
1171            return self.save_reduce(type, (...,), obj=obj)
1172        return self.save_global(obj)
1173
1174    dispatch[FunctionType] = save_global
1175    dispatch[type] = save_type
1176
1177
1178# Unpickling machinery
1179
1180class _Unpickler:
1181
1182    def __init__(self, file, *, fix_imports=True,
1183                 encoding="ASCII", errors="strict", buffers=None):
1184        """This takes a binary file for reading a pickle data stream.
1185
1186        The protocol version of the pickle is detected automatically, so
1187        no proto argument is needed.
1188
1189        The argument *file* must have two methods, a read() method that
1190        takes an integer argument, and a readline() method that requires
1191        no arguments.  Both methods should return bytes.  Thus *file*
1192        can be a binary file object opened for reading, an io.BytesIO
1193        object, or any other custom object that meets this interface.
1194
1195        The file-like object must have two methods, a read() method
1196        that takes an integer argument, and a readline() method that
1197        requires no arguments.  Both methods should return bytes.
1198        Thus file-like object can be a binary file object opened for
1199        reading, a BytesIO object, or any other custom object that
1200        meets this interface.
1201
1202        If *buffers* is not None, it should be an iterable of buffer-enabled
1203        objects that is consumed each time the pickle stream references
1204        an out-of-band buffer view.  Such buffers have been given in order
1205        to the *buffer_callback* of a Pickler object.
1206
1207        If *buffers* is None (the default), then the buffers are taken
1208        from the pickle stream, assuming they are serialized there.
1209        It is an error for *buffers* to be None if the pickle stream
1210        was produced with a non-None *buffer_callback*.
1211
1212        Other optional arguments are *fix_imports*, *encoding* and
1213        *errors*, which are used to control compatibility support for
1214        pickle stream generated by Python 2.  If *fix_imports* is True,
1215        pickle will try to map the old Python 2 names to the new names
1216        used in Python 3.  The *encoding* and *errors* tell pickle how
1217        to decode 8-bit string instances pickled by Python 2; these
1218        default to 'ASCII' and 'strict', respectively. *encoding* can be
1219        'bytes' to read these 8-bit string instances as bytes objects.
1220        """
1221        self._buffers = iter(buffers) if buffers is not None else None
1222        self._file_readline = file.readline
1223        self._file_read = file.read
1224        self.memo = {}
1225        self.encoding = encoding
1226        self.errors = errors
1227        self.proto = 0
1228        self.fix_imports = fix_imports
1229
1230    def load(self):
1231        """Read a pickled object representation from the open file.
1232
1233        Return the reconstituted object hierarchy specified in the file.
1234        """
1235        # Check whether Unpickler was initialized correctly. This is
1236        # only needed to mimic the behavior of _pickle.Unpickler.dump().
1237        if not hasattr(self, "_file_read"):
1238            raise UnpicklingError("Unpickler.__init__() was not called by "
1239                                  "%s.__init__()" % (self.__class__.__name__,))
1240        self._unframer = _Unframer(self._file_read, self._file_readline)
1241        self.read = self._unframer.read
1242        self.readinto = self._unframer.readinto
1243        self.readline = self._unframer.readline
1244        self.metastack = []
1245        self.stack = []
1246        self.append = self.stack.append
1247        self.proto = 0
1248        read = self.read
1249        dispatch = self.dispatch
1250        try:
1251            while True:
1252                key = read(1)
1253                if not key:
1254                    raise EOFError
1255                assert isinstance(key, bytes_types)
1256                dispatch[key[0]](self)
1257        except _Stop as stopinst:
1258            return stopinst.value
1259
1260    # Return a list of items pushed in the stack after last MARK instruction.
1261    def pop_mark(self):
1262        items = self.stack
1263        self.stack = self.metastack.pop()
1264        self.append = self.stack.append
1265        return items
1266
1267    def persistent_load(self, pid):
1268        raise UnpicklingError("unsupported persistent id encountered")
1269
1270    dispatch = {}
1271
1272    def load_proto(self):
1273        proto = self.read(1)[0]
1274        if not 0 <= proto <= HIGHEST_PROTOCOL:
1275            raise ValueError("unsupported pickle protocol: %d" % proto)
1276        self.proto = proto
1277    dispatch[PROTO[0]] = load_proto
1278
1279    def load_frame(self):
1280        frame_size, = unpack('<Q', self.read(8))
1281        if frame_size > sys.maxsize:
1282            raise ValueError("frame size > sys.maxsize: %d" % frame_size)
1283        self._unframer.load_frame(frame_size)
1284    dispatch[FRAME[0]] = load_frame
1285
1286    def load_persid(self):
1287        try:
1288            pid = self.readline()[:-1].decode("ascii")
1289        except UnicodeDecodeError:
1290            raise UnpicklingError(
1291                "persistent IDs in protocol 0 must be ASCII strings")
1292        self.append(self.persistent_load(pid))
1293    dispatch[PERSID[0]] = load_persid
1294
1295    def load_binpersid(self):
1296        pid = self.stack.pop()
1297        self.append(self.persistent_load(pid))
1298    dispatch[BINPERSID[0]] = load_binpersid
1299
1300    def load_none(self):
1301        self.append(None)
1302    dispatch[NONE[0]] = load_none
1303
1304    def load_false(self):
1305        self.append(False)
1306    dispatch[NEWFALSE[0]] = load_false
1307
1308    def load_true(self):
1309        self.append(True)
1310    dispatch[NEWTRUE[0]] = load_true
1311
1312    def load_int(self):
1313        data = self.readline()
1314        if data == FALSE[1:]:
1315            val = False
1316        elif data == TRUE[1:]:
1317            val = True
1318        else:
1319            val = int(data, 0)
1320        self.append(val)
1321    dispatch[INT[0]] = load_int
1322
1323    def load_binint(self):
1324        self.append(unpack('<i', self.read(4))[0])
1325    dispatch[BININT[0]] = load_binint
1326
1327    def load_binint1(self):
1328        self.append(self.read(1)[0])
1329    dispatch[BININT1[0]] = load_binint1
1330
1331    def load_binint2(self):
1332        self.append(unpack('<H', self.read(2))[0])
1333    dispatch[BININT2[0]] = load_binint2
1334
1335    def load_long(self):
1336        val = self.readline()[:-1]
1337        if val and val[-1] == b'L'[0]:
1338            val = val[:-1]
1339        self.append(int(val, 0))
1340    dispatch[LONG[0]] = load_long
1341
1342    def load_long1(self):
1343        n = self.read(1)[0]
1344        data = self.read(n)
1345        self.append(decode_long(data))
1346    dispatch[LONG1[0]] = load_long1
1347
1348    def load_long4(self):
1349        n, = unpack('<i', self.read(4))
1350        if n < 0:
1351            # Corrupt or hostile pickle -- we never write one like this
1352            raise UnpicklingError("LONG pickle has negative byte count")
1353        data = self.read(n)
1354        self.append(decode_long(data))
1355    dispatch[LONG4[0]] = load_long4
1356
1357    def load_float(self):
1358        self.append(float(self.readline()[:-1]))
1359    dispatch[FLOAT[0]] = load_float
1360
1361    def load_binfloat(self):
1362        self.append(unpack('>d', self.read(8))[0])
1363    dispatch[BINFLOAT[0]] = load_binfloat
1364
1365    def _decode_string(self, value):
1366        # Used to allow strings from Python 2 to be decoded either as
1367        # bytes or Unicode strings.  This should be used only with the
1368        # STRING, BINSTRING and SHORT_BINSTRING opcodes.
1369        if self.encoding == "bytes":
1370            return value
1371        else:
1372            return value.decode(self.encoding, self.errors)
1373
1374    def load_string(self):
1375        data = self.readline()[:-1]
1376        # Strip outermost quotes
1377        if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'':
1378            data = data[1:-1]
1379        else:
1380            raise UnpicklingError("the STRING opcode argument must be quoted")
1381        self.append(self._decode_string(codecs.escape_decode(data)[0]))
1382    dispatch[STRING[0]] = load_string
1383
1384    def load_binstring(self):
1385        # Deprecated BINSTRING uses signed 32-bit length
1386        len, = unpack('<i', self.read(4))
1387        if len < 0:
1388            raise UnpicklingError("BINSTRING pickle has negative byte count")
1389        data = self.read(len)
1390        self.append(self._decode_string(data))
1391    dispatch[BINSTRING[0]] = load_binstring
1392
1393    def load_binbytes(self):
1394        len, = unpack('<I', self.read(4))
1395        if len > maxsize:
1396            raise UnpicklingError("BINBYTES exceeds system's maximum size "
1397                                  "of %d bytes" % maxsize)
1398        self.append(self.read(len))
1399    dispatch[BINBYTES[0]] = load_binbytes
1400
1401    def load_unicode(self):
1402        self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
1403    dispatch[UNICODE[0]] = load_unicode
1404
1405    def load_binunicode(self):
1406        len, = unpack('<I', self.read(4))
1407        if len > maxsize:
1408            raise UnpicklingError("BINUNICODE exceeds system's maximum size "
1409                                  "of %d bytes" % maxsize)
1410        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1411    dispatch[BINUNICODE[0]] = load_binunicode
1412
1413    def load_binunicode8(self):
1414        len, = unpack('<Q', self.read(8))
1415        if len > maxsize:
1416            raise UnpicklingError("BINUNICODE8 exceeds system's maximum size "
1417                                  "of %d bytes" % maxsize)
1418        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1419    dispatch[BINUNICODE8[0]] = load_binunicode8
1420
1421    def load_binbytes8(self):
1422        len, = unpack('<Q', self.read(8))
1423        if len > maxsize:
1424            raise UnpicklingError("BINBYTES8 exceeds system's maximum size "
1425                                  "of %d bytes" % maxsize)
1426        self.append(self.read(len))
1427    dispatch[BINBYTES8[0]] = load_binbytes8
1428
1429    def load_bytearray8(self):
1430        len, = unpack('<Q', self.read(8))
1431        if len > maxsize:
1432            raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
1433                                  "of %d bytes" % maxsize)
1434        b = bytearray(len)
1435        self.readinto(b)
1436        self.append(b)
1437    dispatch[BYTEARRAY8[0]] = load_bytearray8
1438
1439    def load_next_buffer(self):
1440        if self._buffers is None:
1441            raise UnpicklingError("pickle stream refers to out-of-band data "
1442                                  "but no *buffers* argument was given")
1443        try:
1444            buf = next(self._buffers)
1445        except StopIteration:
1446            raise UnpicklingError("not enough out-of-band buffers")
1447        self.append(buf)
1448    dispatch[NEXT_BUFFER[0]] = load_next_buffer
1449
1450    def load_readonly_buffer(self):
1451        buf = self.stack[-1]
1452        with memoryview(buf) as m:
1453            if not m.readonly:
1454                self.stack[-1] = m.toreadonly()
1455    dispatch[READONLY_BUFFER[0]] = load_readonly_buffer
1456
1457    def load_short_binstring(self):
1458        len = self.read(1)[0]
1459        data = self.read(len)
1460        self.append(self._decode_string(data))
1461    dispatch[SHORT_BINSTRING[0]] = load_short_binstring
1462
1463    def load_short_binbytes(self):
1464        len = self.read(1)[0]
1465        self.append(self.read(len))
1466    dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
1467
1468    def load_short_binunicode(self):
1469        len = self.read(1)[0]
1470        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1471    dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
1472
1473    def load_tuple(self):
1474        items = self.pop_mark()
1475        self.append(tuple(items))
1476    dispatch[TUPLE[0]] = load_tuple
1477
1478    def load_empty_tuple(self):
1479        self.append(())
1480    dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
1481
1482    def load_tuple1(self):
1483        self.stack[-1] = (self.stack[-1],)
1484    dispatch[TUPLE1[0]] = load_tuple1
1485
1486    def load_tuple2(self):
1487        self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1488    dispatch[TUPLE2[0]] = load_tuple2
1489
1490    def load_tuple3(self):
1491        self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1492    dispatch[TUPLE3[0]] = load_tuple3
1493
1494    def load_empty_list(self):
1495        self.append([])
1496    dispatch[EMPTY_LIST[0]] = load_empty_list
1497
1498    def load_empty_dictionary(self):
1499        self.append({})
1500    dispatch[EMPTY_DICT[0]] = load_empty_dictionary
1501
1502    def load_empty_set(self):
1503        self.append(set())
1504    dispatch[EMPTY_SET[0]] = load_empty_set
1505
1506    def load_frozenset(self):
1507        items = self.pop_mark()
1508        self.append(frozenset(items))
1509    dispatch[FROZENSET[0]] = load_frozenset
1510
1511    def load_list(self):
1512        items = self.pop_mark()
1513        self.append(items)
1514    dispatch[LIST[0]] = load_list
1515
1516    def load_dict(self):
1517        items = self.pop_mark()
1518        d = {items[i]: items[i+1]
1519             for i in range(0, len(items), 2)}
1520        self.append(d)
1521    dispatch[DICT[0]] = load_dict
1522
1523    # INST and OBJ differ only in how they get a class object.  It's not
1524    # only sensible to do the rest in a common routine, the two routines
1525    # previously diverged and grew different bugs.
1526    # klass is the class to instantiate, and k points to the topmost mark
1527    # object, following which are the arguments for klass.__init__.
1528    def _instantiate(self, klass, args):
1529        if (args or not isinstance(klass, type) or
1530            hasattr(klass, "__getinitargs__")):
1531            try:
1532                value = klass(*args)
1533            except TypeError as err:
1534                raise TypeError("in constructor for %s: %s" %
1535                                (klass.__name__, str(err)), err.__traceback__)
1536        else:
1537            value = klass.__new__(klass)
1538        self.append(value)
1539
1540    def load_inst(self):
1541        module = self.readline()[:-1].decode("ascii")
1542        name = self.readline()[:-1].decode("ascii")
1543        klass = self.find_class(module, name)
1544        self._instantiate(klass, self.pop_mark())
1545    dispatch[INST[0]] = load_inst
1546
1547    def load_obj(self):
1548        # Stack is ... markobject classobject arg1 arg2 ...
1549        args = self.pop_mark()
1550        cls = args.pop(0)
1551        self._instantiate(cls, args)
1552    dispatch[OBJ[0]] = load_obj
1553
1554    def load_newobj(self):
1555        args = self.stack.pop()
1556        cls = self.stack.pop()
1557        obj = cls.__new__(cls, *args)
1558        self.append(obj)
1559    dispatch[NEWOBJ[0]] = load_newobj
1560
1561    def load_newobj_ex(self):
1562        kwargs = self.stack.pop()
1563        args = self.stack.pop()
1564        cls = self.stack.pop()
1565        obj = cls.__new__(cls, *args, **kwargs)
1566        self.append(obj)
1567    dispatch[NEWOBJ_EX[0]] = load_newobj_ex
1568
1569    def load_global(self):
1570        module = self.readline()[:-1].decode("utf-8")
1571        name = self.readline()[:-1].decode("utf-8")
1572        klass = self.find_class(module, name)
1573        self.append(klass)
1574    dispatch[GLOBAL[0]] = load_global
1575
1576    def load_stack_global(self):
1577        name = self.stack.pop()
1578        module = self.stack.pop()
1579        if type(name) is not str or type(module) is not str:
1580            raise UnpicklingError("STACK_GLOBAL requires str")
1581        self.append(self.find_class(module, name))
1582    dispatch[STACK_GLOBAL[0]] = load_stack_global
1583
1584    def load_ext1(self):
1585        code = self.read(1)[0]
1586        self.get_extension(code)
1587    dispatch[EXT1[0]] = load_ext1
1588
1589    def load_ext2(self):
1590        code, = unpack('<H', self.read(2))
1591        self.get_extension(code)
1592    dispatch[EXT2[0]] = load_ext2
1593
1594    def load_ext4(self):
1595        code, = unpack('<i', self.read(4))
1596        self.get_extension(code)
1597    dispatch[EXT4[0]] = load_ext4
1598
1599    def get_extension(self, code):
1600        obj = _extension_cache.get(code, _NoValue)
1601        if obj is not _NoValue:
1602            self.append(obj)
1603            return
1604        key = _inverted_registry.get(code)
1605        if not key:
1606            if code <= 0: # note that 0 is forbidden
1607                # Corrupt or hostile pickle.
1608                raise UnpicklingError("EXT specifies code <= 0")
1609            raise ValueError("unregistered extension code %d" % code)
1610        obj = self.find_class(*key)
1611        _extension_cache[code] = obj
1612        self.append(obj)
1613
1614    def find_class(self, module, name):
1615        # Subclasses may override this.
1616        sys.audit('pickle.find_class', module, name)
1617        if self.proto < 3 and self.fix_imports:
1618            if (module, name) in _compat_pickle.NAME_MAPPING:
1619                module, name = _compat_pickle.NAME_MAPPING[(module, name)]
1620            elif module in _compat_pickle.IMPORT_MAPPING:
1621                module = _compat_pickle.IMPORT_MAPPING[module]
1622        __import__(module, level=0)
1623        if self.proto >= 4:
1624            return _getattribute(sys.modules[module], name)[0]
1625        else:
1626            return getattr(sys.modules[module], name)
1627
1628    def load_reduce(self):
1629        stack = self.stack
1630        args = stack.pop()
1631        func = stack[-1]
1632        stack[-1] = func(*args)
1633    dispatch[REDUCE[0]] = load_reduce
1634
1635    def load_pop(self):
1636        if self.stack:
1637            del self.stack[-1]
1638        else:
1639            self.pop_mark()
1640    dispatch[POP[0]] = load_pop
1641
1642    def load_pop_mark(self):
1643        self.pop_mark()
1644    dispatch[POP_MARK[0]] = load_pop_mark
1645
1646    def load_dup(self):
1647        self.append(self.stack[-1])
1648    dispatch[DUP[0]] = load_dup
1649
1650    def load_get(self):
1651        i = int(self.readline()[:-1])
1652        try:
1653            self.append(self.memo[i])
1654        except KeyError:
1655            msg = f'Memo value not found at index {i}'
1656            raise UnpicklingError(msg) from None
1657    dispatch[GET[0]] = load_get
1658
1659    def load_binget(self):
1660        i = self.read(1)[0]
1661        try:
1662            self.append(self.memo[i])
1663        except KeyError as exc:
1664            msg = f'Memo value not found at index {i}'
1665            raise UnpicklingError(msg) from None
1666    dispatch[BINGET[0]] = load_binget
1667
1668    def load_long_binget(self):
1669        i, = unpack('<I', self.read(4))
1670        try:
1671            self.append(self.memo[i])
1672        except KeyError as exc:
1673            msg = f'Memo value not found at index {i}'
1674            raise UnpicklingError(msg) from None
1675    dispatch[LONG_BINGET[0]] = load_long_binget
1676
1677    def load_put(self):
1678        i = int(self.readline()[:-1])
1679        if i < 0:
1680            raise ValueError("negative PUT argument")
1681        self.memo[i] = self.stack[-1]
1682    dispatch[PUT[0]] = load_put
1683
1684    def load_binput(self):
1685        i = self.read(1)[0]
1686        if i < 0:
1687            raise ValueError("negative BINPUT argument")
1688        self.memo[i] = self.stack[-1]
1689    dispatch[BINPUT[0]] = load_binput
1690
1691    def load_long_binput(self):
1692        i, = unpack('<I', self.read(4))
1693        if i > maxsize:
1694            raise ValueError("negative LONG_BINPUT argument")
1695        self.memo[i] = self.stack[-1]
1696    dispatch[LONG_BINPUT[0]] = load_long_binput
1697
1698    def load_memoize(self):
1699        memo = self.memo
1700        memo[len(memo)] = self.stack[-1]
1701    dispatch[MEMOIZE[0]] = load_memoize
1702
1703    def load_append(self):
1704        stack = self.stack
1705        value = stack.pop()
1706        list = stack[-1]
1707        list.append(value)
1708    dispatch[APPEND[0]] = load_append
1709
1710    def load_appends(self):
1711        items = self.pop_mark()
1712        list_obj = self.stack[-1]
1713        try:
1714            extend = list_obj.extend
1715        except AttributeError:
1716            pass
1717        else:
1718            extend(items)
1719            return
1720        # Even if the PEP 307 requires extend() and append() methods,
1721        # fall back on append() if the object has no extend() method
1722        # for backward compatibility.
1723        append = list_obj.append
1724        for item in items:
1725            append(item)
1726    dispatch[APPENDS[0]] = load_appends
1727
1728    def load_setitem(self):
1729        stack = self.stack
1730        value = stack.pop()
1731        key = stack.pop()
1732        dict = stack[-1]
1733        dict[key] = value
1734    dispatch[SETITEM[0]] = load_setitem
1735
1736    def load_setitems(self):
1737        items = self.pop_mark()
1738        dict = self.stack[-1]
1739        for i in range(0, len(items), 2):
1740            dict[items[i]] = items[i + 1]
1741    dispatch[SETITEMS[0]] = load_setitems
1742
1743    def load_additems(self):
1744        items = self.pop_mark()
1745        set_obj = self.stack[-1]
1746        if isinstance(set_obj, set):
1747            set_obj.update(items)
1748        else:
1749            add = set_obj.add
1750            for item in items:
1751                add(item)
1752    dispatch[ADDITEMS[0]] = load_additems
1753
1754    def load_build(self):
1755        stack = self.stack
1756        state = stack.pop()
1757        inst = stack[-1]
1758        setstate = getattr(inst, "__setstate__", _NoValue)
1759        if setstate is not _NoValue:
1760            setstate(state)
1761            return
1762        slotstate = None
1763        if isinstance(state, tuple) and len(state) == 2:
1764            state, slotstate = state
1765        if state:
1766            inst_dict = inst.__dict__
1767            intern = sys.intern
1768            for k, v in state.items():
1769                if type(k) is str:
1770                    inst_dict[intern(k)] = v
1771                else:
1772                    inst_dict[k] = v
1773        if slotstate:
1774            for k, v in slotstate.items():
1775                setattr(inst, k, v)
1776    dispatch[BUILD[0]] = load_build
1777
1778    def load_mark(self):
1779        self.metastack.append(self.stack)
1780        self.stack = []
1781        self.append = self.stack.append
1782    dispatch[MARK[0]] = load_mark
1783
1784    def load_stop(self):
1785        value = self.stack.pop()
1786        raise _Stop(value)
1787    dispatch[STOP[0]] = load_stop
1788
1789
1790# Shorthands
1791
1792def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None):
1793    _Pickler(file, protocol, fix_imports=fix_imports,
1794             buffer_callback=buffer_callback).dump(obj)
1795
1796def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None):
1797    f = io.BytesIO()
1798    _Pickler(f, protocol, fix_imports=fix_imports,
1799             buffer_callback=buffer_callback).dump(obj)
1800    res = f.getvalue()
1801    assert isinstance(res, bytes_types)
1802    return res
1803
1804def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict",
1805          buffers=None):
1806    return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
1807                     encoding=encoding, errors=errors).load()
1808
1809def _loads(s, /, *, fix_imports=True, encoding="ASCII", errors="strict",
1810           buffers=None):
1811    if isinstance(s, str):
1812        raise TypeError("Can't load pickle from unicode string")
1813    file = io.BytesIO(s)
1814    return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
1815                      encoding=encoding, errors=errors).load()
1816
1817# Use the faster _pickle if possible
1818try:
1819    from _pickle import (
1820        PickleError,
1821        PicklingError,
1822        UnpicklingError,
1823        Pickler,
1824        Unpickler,
1825        dump,
1826        dumps,
1827        load,
1828        loads
1829    )
1830except ImportError:
1831    Pickler, Unpickler = _Pickler, _Unpickler
1832    dump, dumps, load, loads = _dump, _dumps, _load, _loads
1833
1834# Doctest
1835def _test():
1836    import doctest
1837    return doctest.testmod()
1838
1839if __name__ == "__main__":
1840    import argparse
1841    parser = argparse.ArgumentParser(
1842        description='display contents of the pickle files')
1843    parser.add_argument(
1844        'pickle_file',
1845        nargs='*', help='the pickle file')
1846    parser.add_argument(
1847        '-t', '--test', action='store_true',
1848        help='run self-test suite')
1849    parser.add_argument(
1850        '-v', action='store_true',
1851        help='run verbosely; only affects self-test run')
1852    args = parser.parse_args()
1853    if args.test:
1854        _test()
1855    else:
1856        if not args.pickle_file:
1857            parser.print_help()
1858        else:
1859            import pprint
1860            for fn in args.pickle_file:
1861                if fn == '-':
1862                    obj = load(sys.stdin.buffer)
1863                else:
1864                    with open(fn, 'rb') as f:
1865                        obj = load(f)
1866                pprint.pprint(obj)
1867