• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Create portable serialized representations of Python objects.
2
3See module copyreg for a mechanism for registering custom picklers.
4See module pickletools source for extensive comments.
5
6Classes:
7
8    Pickler
9    Unpickler
10
11Functions:
12
13    dump(object, file)
14    dumps(object) -> string
15    load(file) -> object
16    loads(string) -> object
17
18Misc variables:
19
20    __version__
21    format_version
22    compatible_formats
23
24"""
25
26from types import FunctionType
27from copyreg import dispatch_table
28from copyreg import _extension_registry, _inverted_registry, _extension_cache
29from itertools import islice
30from functools import partial
31import sys
32from sys import maxsize
33from struct import pack, unpack
34import re
35import io
36import codecs
37import _compat_pickle
38
39__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
40           "Unpickler", "dump", "dumps", "load", "loads"]
41
42# Shortcut for use in isinstance testing
43bytes_types = (bytes, bytearray)
44
45# These are purely informational; no code uses these.
46format_version = "4.0"                  # File format version we write
47compatible_formats = ["1.0",            # Original protocol 0
48                      "1.1",            # Protocol 0 with INST added
49                      "1.2",            # Original protocol 1
50                      "1.3",            # Protocol 1 with BINFLOAT added
51                      "2.0",            # Protocol 2
52                      "3.0",            # Protocol 3
53                      "4.0",            # Protocol 4
54                      ]                 # Old format versions we can read
55
56# This is the highest protocol number we know how to read.
57HIGHEST_PROTOCOL = 4
58
59# The protocol we write by default.  May be less than HIGHEST_PROTOCOL.
60# We intentionally write a protocol that Python 2.x cannot read;
61# there are too many issues with that.
62DEFAULT_PROTOCOL = 3
63
64class PickleError(Exception):
65    """A common base class for the other pickling exceptions."""
66    pass
67
68class PicklingError(PickleError):
69    """This exception is raised when an unpicklable object is passed to the
70    dump() method.
71
72    """
73    pass
74
75class UnpicklingError(PickleError):
76    """This exception is raised when there is a problem unpickling an object,
77    such as a security violation.
78
79    Note that other exceptions may also be raised during unpickling, including
80    (but not necessarily limited to) AttributeError, EOFError, ImportError,
81    and IndexError.
82
83    """
84    pass
85
86# An instance of _Stop is raised by Unpickler.load_stop() in response to
87# the STOP opcode, passing the object that is the result of unpickling.
88class _Stop(Exception):
89    def __init__(self, value):
90        self.value = value
91
92# Jython has PyStringMap; it's a dict subclass with string keys
93try:
94    from org.python.core import PyStringMap
95except ImportError:
96    PyStringMap = None
97
98# Pickle opcodes.  See pickletools.py for extensive docs.  The listing
99# here is in kind-of alphabetical order of 1-character pickle code.
100# pickletools groups them by purpose.
101
102MARK           = b'('   # push special markobject on stack
103STOP           = b'.'   # every pickle ends with STOP
104POP            = b'0'   # discard topmost stack item
105POP_MARK       = b'1'   # discard stack top through topmost markobject
106DUP            = b'2'   # duplicate top stack item
107FLOAT          = b'F'   # push float object; decimal string argument
108INT            = b'I'   # push integer or bool; decimal string argument
109BININT         = b'J'   # push four-byte signed int
110BININT1        = b'K'   # push 1-byte unsigned int
111LONG           = b'L'   # push long; decimal string argument
112BININT2        = b'M'   # push 2-byte unsigned int
113NONE           = b'N'   # push None
114PERSID         = b'P'   # push persistent object; id is taken from string arg
115BINPERSID      = b'Q'   #  "       "         "  ;  "  "   "     "  stack
116REDUCE         = b'R'   # apply callable to argtuple, both on stack
117STRING         = b'S'   # push string; NL-terminated string argument
118BINSTRING      = b'T'   # push string; counted binary string argument
119SHORT_BINSTRING= b'U'   #  "     "   ;    "      "       "      " < 256 bytes
120UNICODE        = b'V'   # push Unicode string; raw-unicode-escaped'd argument
121BINUNICODE     = b'X'   #   "     "       "  ; counted UTF-8 string argument
122APPEND         = b'a'   # append stack top to list below it
123BUILD          = b'b'   # call __setstate__ or __dict__.update()
124GLOBAL         = b'c'   # push self.find_class(modname, name); 2 string args
125DICT           = b'd'   # build a dict from stack items
126EMPTY_DICT     = b'}'   # push empty dict
127APPENDS        = b'e'   # extend list on stack by topmost stack slice
128GET            = b'g'   # push item from memo on stack; index is string arg
129BINGET         = b'h'   #   "    "    "    "   "   "  ;   "    " 1-byte arg
130INST           = b'i'   # build & push class instance
131LONG_BINGET    = b'j'   # push item from memo on stack; index is 4-byte arg
132LIST           = b'l'   # build list from topmost stack items
133EMPTY_LIST     = b']'   # push empty list
134OBJ            = b'o'   # build & push class instance
135PUT            = b'p'   # store stack top in memo; index is string arg
136BINPUT         = b'q'   #   "     "    "   "   " ;   "    " 1-byte arg
137LONG_BINPUT    = b'r'   #   "     "    "   "   " ;   "    " 4-byte arg
138SETITEM        = b's'   # add key+value pair to dict
139TUPLE          = b't'   # build tuple from topmost stack items
140EMPTY_TUPLE    = b')'   # push empty tuple
141SETITEMS       = b'u'   # modify dict by adding topmost key+value pairs
142BINFLOAT       = b'G'   # push float; arg is 8-byte float encoding
143
144TRUE           = b'I01\n'  # not an opcode; see INT docs in pickletools.py
145FALSE          = b'I00\n'  # not an opcode; see INT docs in pickletools.py
146
147# Protocol 2
148
149PROTO          = b'\x80'  # identify pickle protocol
150NEWOBJ         = b'\x81'  # build object by applying cls.__new__ to argtuple
151EXT1           = b'\x82'  # push object from extension registry; 1-byte index
152EXT2           = b'\x83'  # ditto, but 2-byte index
153EXT4           = b'\x84'  # ditto, but 4-byte index
154TUPLE1         = b'\x85'  # build 1-tuple from stack top
155TUPLE2         = b'\x86'  # build 2-tuple from two topmost stack items
156TUPLE3         = b'\x87'  # build 3-tuple from three topmost stack items
157NEWTRUE        = b'\x88'  # push True
158NEWFALSE       = b'\x89'  # push False
159LONG1          = b'\x8a'  # push long from < 256 bytes
160LONG4          = b'\x8b'  # push really big long
161
162_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
163
164# Protocol 3 (Python 3.x)
165
166BINBYTES       = b'B'   # push bytes; counted binary string argument
167SHORT_BINBYTES = b'C'   #  "     "   ;    "      "       "      " < 256 bytes
168
169# Protocol 4
170SHORT_BINUNICODE = b'\x8c'  # push short string; UTF-8 length < 256 bytes
171BINUNICODE8      = b'\x8d'  # push very long string
172BINBYTES8        = b'\x8e'  # push very long bytes string
173EMPTY_SET        = b'\x8f'  # push empty set on the stack
174ADDITEMS         = b'\x90'  # modify set by adding topmost stack items
175FROZENSET        = b'\x91'  # build frozenset from topmost stack items
176NEWOBJ_EX        = b'\x92'  # like NEWOBJ but work with keyword only arguments
177STACK_GLOBAL     = b'\x93'  # same as GLOBAL but using names on the stacks
178MEMOIZE          = b'\x94'  # store top of the stack in memo
179FRAME            = b'\x95'  # indicate the beginning of a new frame
180
181__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
182
183
184class _Framer:
185
186    _FRAME_SIZE_MIN = 4
187    _FRAME_SIZE_TARGET = 64 * 1024
188
189    def __init__(self, file_write):
190        self.file_write = file_write
191        self.current_frame = None
192
193    def start_framing(self):
194        self.current_frame = io.BytesIO()
195
196    def end_framing(self):
197        if self.current_frame and self.current_frame.tell() > 0:
198            self.commit_frame(force=True)
199            self.current_frame = None
200
201    def commit_frame(self, force=False):
202        if self.current_frame:
203            f = self.current_frame
204            if f.tell() >= self._FRAME_SIZE_TARGET or force:
205                data = f.getbuffer()
206                write = self.file_write
207                if len(data) >= self._FRAME_SIZE_MIN:
208                    # Issue a single call to the write method of the underlying
209                    # file object for the frame opcode with the size of the
210                    # frame. The concatenation is expected to be less expensive
211                    # than issuing an additional call to write.
212                    write(FRAME + pack("<Q", len(data)))
213
214                # Issue a separate call to write to append the frame
215                # contents without concatenation to the above to avoid a
216                # memory copy.
217                write(data)
218
219                # Start the new frame with a new io.BytesIO instance so that
220                # the file object can have delayed access to the previous frame
221                # contents via an unreleased memoryview of the previous
222                # io.BytesIO instance.
223                self.current_frame = io.BytesIO()
224
225    def write(self, data):
226        if self.current_frame:
227            return self.current_frame.write(data)
228        else:
229            return self.file_write(data)
230
231    def write_large_bytes(self, header, payload):
232        write = self.file_write
233        if self.current_frame:
234            # Terminate the current frame and flush it to the file.
235            self.commit_frame(force=True)
236
237        # Perform direct write of the header and payload of the large binary
238        # object. Be careful not to concatenate the header and the payload
239        # prior to calling 'write' as we do not want to allocate a large
240        # temporary bytes object.
241        # We intentionally do not insert a protocol 4 frame opcode to make
242        # it possible to optimize file.read calls in the loader.
243        write(header)
244        write(payload)
245
246
247class _Unframer:
248
249    def __init__(self, file_read, file_readline, file_tell=None):
250        self.file_read = file_read
251        self.file_readline = file_readline
252        self.current_frame = None
253
254    def read(self, n):
255        if self.current_frame:
256            data = self.current_frame.read(n)
257            if not data and n != 0:
258                self.current_frame = None
259                return self.file_read(n)
260            if len(data) < n:
261                raise UnpicklingError(
262                    "pickle exhausted before end of frame")
263            return data
264        else:
265            return self.file_read(n)
266
267    def readline(self):
268        if self.current_frame:
269            data = self.current_frame.readline()
270            if not data:
271                self.current_frame = None
272                return self.file_readline()
273            if data[-1] != b'\n'[0]:
274                raise UnpicklingError(
275                    "pickle exhausted before end of frame")
276            return data
277        else:
278            return self.file_readline()
279
280    def load_frame(self, frame_size):
281        if self.current_frame and self.current_frame.read() != b'':
282            raise UnpicklingError(
283                "beginning of a new frame before end of current frame")
284        self.current_frame = io.BytesIO(self.file_read(frame_size))
285
286
287# Tools used for pickling.
288
289def _getattribute(obj, name):
290    for subpath in name.split('.'):
291        if subpath == '<locals>':
292            raise AttributeError("Can't get local attribute {!r} on {!r}"
293                                 .format(name, obj))
294        try:
295            parent = obj
296            obj = getattr(obj, subpath)
297        except AttributeError:
298            raise AttributeError("Can't get attribute {!r} on {!r}"
299                                 .format(name, obj)) from None
300    return obj, parent
301
302def whichmodule(obj, name):
303    """Find the module an object belong to."""
304    module_name = getattr(obj, '__module__', None)
305    if module_name is not None:
306        return module_name
307    # Protect the iteration by using a list copy of sys.modules against dynamic
308    # modules that trigger imports of other modules upon calls to getattr.
309    for module_name, module in list(sys.modules.items()):
310        if module_name == '__main__' or module is None:
311            continue
312        try:
313            if _getattribute(module, name)[0] is obj:
314                return module_name
315        except AttributeError:
316            pass
317    return '__main__'
318
319def encode_long(x):
320    r"""Encode a long to a two's complement little-endian binary string.
321    Note that 0 is a special case, returning an empty string, to save a
322    byte in the LONG1 pickling context.
323
324    >>> encode_long(0)
325    b''
326    >>> encode_long(255)
327    b'\xff\x00'
328    >>> encode_long(32767)
329    b'\xff\x7f'
330    >>> encode_long(-256)
331    b'\x00\xff'
332    >>> encode_long(-32768)
333    b'\x00\x80'
334    >>> encode_long(-128)
335    b'\x80'
336    >>> encode_long(127)
337    b'\x7f'
338    >>>
339    """
340    if x == 0:
341        return b''
342    nbytes = (x.bit_length() >> 3) + 1
343    result = x.to_bytes(nbytes, byteorder='little', signed=True)
344    if x < 0 and nbytes > 1:
345        if result[-1] == 0xff and (result[-2] & 0x80) != 0:
346            result = result[:-1]
347    return result
348
349def decode_long(data):
350    r"""Decode a long from a two's complement little-endian binary string.
351
352    >>> decode_long(b'')
353    0
354    >>> decode_long(b"\xff\x00")
355    255
356    >>> decode_long(b"\xff\x7f")
357    32767
358    >>> decode_long(b"\x00\xff")
359    -256
360    >>> decode_long(b"\x00\x80")
361    -32768
362    >>> decode_long(b"\x80")
363    -128
364    >>> decode_long(b"\x7f")
365    127
366    """
367    return int.from_bytes(data, byteorder='little', signed=True)
368
369
370# Pickling machinery
371
372class _Pickler:
373
374    def __init__(self, file, protocol=None, *, fix_imports=True):
375        """This takes a binary file for writing a pickle data stream.
376
377        The optional *protocol* argument tells the pickler to use the
378        given protocol; supported protocols are 0, 1, 2, 3 and 4.  The
379        default protocol is 3; a backward-incompatible protocol designed
380        for Python 3.
381
382        Specifying a negative protocol version selects the highest
383        protocol version supported.  The higher the protocol used, the
384        more recent the version of Python needed to read the pickle
385        produced.
386
387        The *file* argument must have a write() method that accepts a
388        single bytes argument. It can thus be a file object opened for
389        binary writing, an io.BytesIO instance, or any other custom
390        object that meets this interface.
391
392        If *fix_imports* is True and *protocol* is less than 3, pickle
393        will try to map the new Python 3 names to the old module names
394        used in Python 2, so that the pickle data stream is readable
395        with Python 2.
396        """
397        if protocol is None:
398            protocol = DEFAULT_PROTOCOL
399        if protocol < 0:
400            protocol = HIGHEST_PROTOCOL
401        elif not 0 <= protocol <= HIGHEST_PROTOCOL:
402            raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
403        try:
404            self._file_write = file.write
405        except AttributeError:
406            raise TypeError("file must have a 'write' attribute")
407        self.framer = _Framer(self._file_write)
408        self.write = self.framer.write
409        self._write_large_bytes = self.framer.write_large_bytes
410        self.memo = {}
411        self.proto = int(protocol)
412        self.bin = protocol >= 1
413        self.fast = 0
414        self.fix_imports = fix_imports and protocol < 3
415
416    def clear_memo(self):
417        """Clears the pickler's "memo".
418
419        The memo is the data structure that remembers which objects the
420        pickler has already seen, so that shared or recursive objects
421        are pickled by reference and not by value.  This method is
422        useful when re-using picklers.
423        """
424        self.memo.clear()
425
426    def dump(self, obj):
427        """Write a pickled representation of obj to the open file."""
428        # Check whether Pickler was initialized correctly. This is
429        # only needed to mimic the behavior of _pickle.Pickler.dump().
430        if not hasattr(self, "_file_write"):
431            raise PicklingError("Pickler.__init__() was not called by "
432                                "%s.__init__()" % (self.__class__.__name__,))
433        if self.proto >= 2:
434            self.write(PROTO + pack("<B", self.proto))
435        if self.proto >= 4:
436            self.framer.start_framing()
437        self.save(obj)
438        self.write(STOP)
439        self.framer.end_framing()
440
441    def memoize(self, obj):
442        """Store an object in the memo."""
443
444        # The Pickler memo is a dictionary mapping object ids to 2-tuples
445        # that contain the Unpickler memo key and the object being memoized.
446        # The memo key is written to the pickle and will become
447        # the key in the Unpickler's memo.  The object is stored in the
448        # Pickler memo so that transient objects are kept alive during
449        # pickling.
450
451        # The use of the Unpickler memo length as the memo key is just a
452        # convention.  The only requirement is that the memo values be unique.
453        # But there appears no advantage to any other scheme, and this
454        # scheme allows the Unpickler memo to be implemented as a plain (but
455        # growable) array, indexed by memo key.
456        if self.fast:
457            return
458        assert id(obj) not in self.memo
459        idx = len(self.memo)
460        self.write(self.put(idx))
461        self.memo[id(obj)] = idx, obj
462
463    # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
464    def put(self, idx):
465        if self.proto >= 4:
466            return MEMOIZE
467        elif self.bin:
468            if idx < 256:
469                return BINPUT + pack("<B", idx)
470            else:
471                return LONG_BINPUT + pack("<I", idx)
472        else:
473            return PUT + repr(idx).encode("ascii") + b'\n'
474
475    # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
476    def get(self, i):
477        if self.bin:
478            if i < 256:
479                return BINGET + pack("<B", i)
480            else:
481                return LONG_BINGET + pack("<I", i)
482
483        return GET + repr(i).encode("ascii") + b'\n'
484
485    def save(self, obj, save_persistent_id=True):
486        self.framer.commit_frame()
487
488        # Check for persistent id (defined by a subclass)
489        pid = self.persistent_id(obj)
490        if pid is not None and save_persistent_id:
491            self.save_pers(pid)
492            return
493
494        # Check the memo
495        x = self.memo.get(id(obj))
496        if x is not None:
497            self.write(self.get(x[0]))
498            return
499
500        # Check the type dispatch table
501        t = type(obj)
502        f = self.dispatch.get(t)
503        if f is not None:
504            f(self, obj) # Call unbound method with explicit self
505            return
506
507        # Check private dispatch table if any, or else copyreg.dispatch_table
508        reduce = getattr(self, 'dispatch_table', dispatch_table).get(t)
509        if reduce is not None:
510            rv = reduce(obj)
511        else:
512            # Check for a class with a custom metaclass; treat as regular class
513            try:
514                issc = issubclass(t, type)
515            except TypeError: # t is not a class (old Boost; see SF #502085)
516                issc = False
517            if issc:
518                self.save_global(obj)
519                return
520
521            # Check for a __reduce_ex__ method, fall back to __reduce__
522            reduce = getattr(obj, "__reduce_ex__", None)
523            if reduce is not None:
524                rv = reduce(self.proto)
525            else:
526                reduce = getattr(obj, "__reduce__", None)
527                if reduce is not None:
528                    rv = reduce()
529                else:
530                    raise PicklingError("Can't pickle %r object: %r" %
531                                        (t.__name__, obj))
532
533        # Check for string returned by reduce(), meaning "save as global"
534        if isinstance(rv, str):
535            self.save_global(obj, rv)
536            return
537
538        # Assert that reduce() returned a tuple
539        if not isinstance(rv, tuple):
540            raise PicklingError("%s must return string or tuple" % reduce)
541
542        # Assert that it returned an appropriately sized tuple
543        l = len(rv)
544        if not (2 <= l <= 5):
545            raise PicklingError("Tuple returned by %s must have "
546                                "two to five elements" % reduce)
547
548        # Save the reduce() output and finally memoize the object
549        self.save_reduce(obj=obj, *rv)
550
551    def persistent_id(self, obj):
552        # This exists so a subclass can override it
553        return None
554
555    def save_pers(self, pid):
556        # Save a persistent id reference
557        if self.bin:
558            self.save(pid, save_persistent_id=False)
559            self.write(BINPERSID)
560        else:
561            try:
562                self.write(PERSID + str(pid).encode("ascii") + b'\n')
563            except UnicodeEncodeError:
564                raise PicklingError(
565                    "persistent IDs in protocol 0 must be ASCII strings")
566
567    def save_reduce(self, func, args, state=None, listitems=None,
568                    dictitems=None, obj=None):
569        # This API is called by some subclasses
570
571        if not isinstance(args, tuple):
572            raise PicklingError("args from save_reduce() must be a tuple")
573        if not callable(func):
574            raise PicklingError("func from save_reduce() must be callable")
575
576        save = self.save
577        write = self.write
578
579        func_name = getattr(func, "__name__", "")
580        if self.proto >= 2 and func_name == "__newobj_ex__":
581            cls, args, kwargs = args
582            if not hasattr(cls, "__new__"):
583                raise PicklingError("args[0] from {} args has no __new__"
584                                    .format(func_name))
585            if obj is not None and cls is not obj.__class__:
586                raise PicklingError("args[0] from {} args has the wrong class"
587                                    .format(func_name))
588            if self.proto >= 4:
589                save(cls)
590                save(args)
591                save(kwargs)
592                write(NEWOBJ_EX)
593            else:
594                func = partial(cls.__new__, cls, *args, **kwargs)
595                save(func)
596                save(())
597                write(REDUCE)
598        elif self.proto >= 2 and func_name == "__newobj__":
599            # A __reduce__ implementation can direct protocol 2 or newer to
600            # use the more efficient NEWOBJ opcode, while still
601            # allowing protocol 0 and 1 to work normally.  For this to
602            # work, the function returned by __reduce__ should be
603            # called __newobj__, and its first argument should be a
604            # class.  The implementation for __newobj__
605            # should be as follows, although pickle has no way to
606            # verify this:
607            #
608            # def __newobj__(cls, *args):
609            #     return cls.__new__(cls, *args)
610            #
611            # Protocols 0 and 1 will pickle a reference to __newobj__,
612            # while protocol 2 (and above) will pickle a reference to
613            # cls, the remaining args tuple, and the NEWOBJ code,
614            # which calls cls.__new__(cls, *args) at unpickling time
615            # (see load_newobj below).  If __reduce__ returns a
616            # three-tuple, the state from the third tuple item will be
617            # pickled regardless of the protocol, calling __setstate__
618            # at unpickling time (see load_build below).
619            #
620            # Note that no standard __newobj__ implementation exists;
621            # you have to provide your own.  This is to enforce
622            # compatibility with Python 2.2 (pickles written using
623            # protocol 0 or 1 in Python 2.3 should be unpicklable by
624            # Python 2.2).
625            cls = args[0]
626            if not hasattr(cls, "__new__"):
627                raise PicklingError(
628                    "args[0] from __newobj__ args has no __new__")
629            if obj is not None and cls is not obj.__class__:
630                raise PicklingError(
631                    "args[0] from __newobj__ args has the wrong class")
632            args = args[1:]
633            save(cls)
634            save(args)
635            write(NEWOBJ)
636        else:
637            save(func)
638            save(args)
639            write(REDUCE)
640
641        if obj is not None:
642            # If the object is already in the memo, this means it is
643            # recursive. In this case, throw away everything we put on the
644            # stack, and fetch the object back from the memo.
645            if id(obj) in self.memo:
646                write(POP + self.get(self.memo[id(obj)][0]))
647            else:
648                self.memoize(obj)
649
650        # More new special cases (that work with older protocols as
651        # well): when __reduce__ returns a tuple with 4 or 5 items,
652        # the 4th and 5th item should be iterators that provide list
653        # items and dict items (as (key, value) tuples), or None.
654
655        if listitems is not None:
656            self._batch_appends(listitems)
657
658        if dictitems is not None:
659            self._batch_setitems(dictitems)
660
661        if state is not None:
662            save(state)
663            write(BUILD)
664
665    # Methods below this point are dispatched through the dispatch table
666
667    dispatch = {}
668
669    def save_none(self, obj):
670        self.write(NONE)
671    dispatch[type(None)] = save_none
672
673    def save_bool(self, obj):
674        if self.proto >= 2:
675            self.write(NEWTRUE if obj else NEWFALSE)
676        else:
677            self.write(TRUE if obj else FALSE)
678    dispatch[bool] = save_bool
679
680    def save_long(self, obj):
681        if self.bin:
682            # If the int is small enough to fit in a signed 4-byte 2's-comp
683            # format, we can store it more efficiently than the general
684            # case.
685            # First one- and two-byte unsigned ints:
686            if obj >= 0:
687                if obj <= 0xff:
688                    self.write(BININT1 + pack("<B", obj))
689                    return
690                if obj <= 0xffff:
691                    self.write(BININT2 + pack("<H", obj))
692                    return
693            # Next check for 4-byte signed ints:
694            if -0x80000000 <= obj <= 0x7fffffff:
695                self.write(BININT + pack("<i", obj))
696                return
697        if self.proto >= 2:
698            encoded = encode_long(obj)
699            n = len(encoded)
700            if n < 256:
701                self.write(LONG1 + pack("<B", n) + encoded)
702            else:
703                self.write(LONG4 + pack("<i", n) + encoded)
704            return
705        if -0x80000000 <= obj <= 0x7fffffff:
706            self.write(INT + repr(obj).encode("ascii") + b'\n')
707        else:
708            self.write(LONG + repr(obj).encode("ascii") + b'L\n')
709    dispatch[int] = save_long
710
711    def save_float(self, obj):
712        if self.bin:
713            self.write(BINFLOAT + pack('>d', obj))
714        else:
715            self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
716    dispatch[float] = save_float
717
718    def save_bytes(self, obj):
719        if self.proto < 3:
720            if not obj: # bytes object is empty
721                self.save_reduce(bytes, (), obj=obj)
722            else:
723                self.save_reduce(codecs.encode,
724                                 (str(obj, 'latin1'), 'latin1'), obj=obj)
725            return
726        n = len(obj)
727        if n <= 0xff:
728            self.write(SHORT_BINBYTES + pack("<B", n) + obj)
729        elif n > 0xffffffff and self.proto >= 4:
730            self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
731        elif n >= self.framer._FRAME_SIZE_TARGET:
732            self._write_large_bytes(BINBYTES + pack("<I", n), obj)
733        else:
734            self.write(BINBYTES + pack("<I", n) + obj)
735        self.memoize(obj)
736    dispatch[bytes] = save_bytes
737
738    def save_str(self, obj):
739        if self.bin:
740            encoded = obj.encode('utf-8', 'surrogatepass')
741            n = len(encoded)
742            if n <= 0xff and self.proto >= 4:
743                self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
744            elif n > 0xffffffff and self.proto >= 4:
745                self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
746            elif n >= self.framer._FRAME_SIZE_TARGET:
747                self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
748            else:
749                self.write(BINUNICODE + pack("<I", n) + encoded)
750        else:
751            obj = obj.replace("\\", "\\u005c")
752            obj = obj.replace("\n", "\\u000a")
753            self.write(UNICODE + obj.encode('raw-unicode-escape') +
754                       b'\n')
755        self.memoize(obj)
756    dispatch[str] = save_str
757
758    def save_tuple(self, obj):
759        if not obj: # tuple is empty
760            if self.bin:
761                self.write(EMPTY_TUPLE)
762            else:
763                self.write(MARK + TUPLE)
764            return
765
766        n = len(obj)
767        save = self.save
768        memo = self.memo
769        if n <= 3 and self.proto >= 2:
770            for element in obj:
771                save(element)
772            # Subtle.  Same as in the big comment below.
773            if id(obj) in memo:
774                get = self.get(memo[id(obj)][0])
775                self.write(POP * n + get)
776            else:
777                self.write(_tuplesize2code[n])
778                self.memoize(obj)
779            return
780
781        # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
782        # has more than 3 elements.
783        write = self.write
784        write(MARK)
785        for element in obj:
786            save(element)
787
788        if id(obj) in memo:
789            # Subtle.  d was not in memo when we entered save_tuple(), so
790            # the process of saving the tuple's elements must have saved
791            # the tuple itself:  the tuple is recursive.  The proper action
792            # now is to throw away everything we put on the stack, and
793            # simply GET the tuple (it's already constructed).  This check
794            # could have been done in the "for element" loop instead, but
795            # recursive tuples are a rare thing.
796            get = self.get(memo[id(obj)][0])
797            if self.bin:
798                write(POP_MARK + get)
799            else:   # proto 0 -- POP_MARK not available
800                write(POP * (n+1) + get)
801            return
802
803        # No recursion.
804        write(TUPLE)
805        self.memoize(obj)
806
807    dispatch[tuple] = save_tuple
808
809    def save_list(self, obj):
810        if self.bin:
811            self.write(EMPTY_LIST)
812        else:   # proto 0 -- can't use EMPTY_LIST
813            self.write(MARK + LIST)
814
815        self.memoize(obj)
816        self._batch_appends(obj)
817
818    dispatch[list] = save_list
819
820    _BATCHSIZE = 1000
821
822    def _batch_appends(self, items):
823        # Helper to batch up APPENDS sequences
824        save = self.save
825        write = self.write
826
827        if not self.bin:
828            for x in items:
829                save(x)
830                write(APPEND)
831            return
832
833        it = iter(items)
834        while True:
835            tmp = list(islice(it, self._BATCHSIZE))
836            n = len(tmp)
837            if n > 1:
838                write(MARK)
839                for x in tmp:
840                    save(x)
841                write(APPENDS)
842            elif n:
843                save(tmp[0])
844                write(APPEND)
845            # else tmp is empty, and we're done
846            if n < self._BATCHSIZE:
847                return
848
849    def save_dict(self, obj):
850        if self.bin:
851            self.write(EMPTY_DICT)
852        else:   # proto 0 -- can't use EMPTY_DICT
853            self.write(MARK + DICT)
854
855        self.memoize(obj)
856        self._batch_setitems(obj.items())
857
858    dispatch[dict] = save_dict
859    if PyStringMap is not None:
860        dispatch[PyStringMap] = save_dict
861
862    def _batch_setitems(self, items):
863        # Helper to batch up SETITEMS sequences; proto >= 1 only
864        save = self.save
865        write = self.write
866
867        if not self.bin:
868            for k, v in items:
869                save(k)
870                save(v)
871                write(SETITEM)
872            return
873
874        it = iter(items)
875        while True:
876            tmp = list(islice(it, self._BATCHSIZE))
877            n = len(tmp)
878            if n > 1:
879                write(MARK)
880                for k, v in tmp:
881                    save(k)
882                    save(v)
883                write(SETITEMS)
884            elif n:
885                k, v = tmp[0]
886                save(k)
887                save(v)
888                write(SETITEM)
889            # else tmp is empty, and we're done
890            if n < self._BATCHSIZE:
891                return
892
893    def save_set(self, obj):
894        save = self.save
895        write = self.write
896
897        if self.proto < 4:
898            self.save_reduce(set, (list(obj),), obj=obj)
899            return
900
901        write(EMPTY_SET)
902        self.memoize(obj)
903
904        it = iter(obj)
905        while True:
906            batch = list(islice(it, self._BATCHSIZE))
907            n = len(batch)
908            if n > 0:
909                write(MARK)
910                for item in batch:
911                    save(item)
912                write(ADDITEMS)
913            if n < self._BATCHSIZE:
914                return
915    dispatch[set] = save_set
916
917    def save_frozenset(self, obj):
918        save = self.save
919        write = self.write
920
921        if self.proto < 4:
922            self.save_reduce(frozenset, (list(obj),), obj=obj)
923            return
924
925        write(MARK)
926        for item in obj:
927            save(item)
928
929        if id(obj) in self.memo:
930            # If the object is already in the memo, this means it is
931            # recursive. In this case, throw away everything we put on the
932            # stack, and fetch the object back from the memo.
933            write(POP_MARK + self.get(self.memo[id(obj)][0]))
934            return
935
936        write(FROZENSET)
937        self.memoize(obj)
938    dispatch[frozenset] = save_frozenset
939
940    def save_global(self, obj, name=None):
941        write = self.write
942        memo = self.memo
943
944        if name is None:
945            name = getattr(obj, '__qualname__', None)
946        if name is None:
947            name = obj.__name__
948
949        module_name = whichmodule(obj, name)
950        try:
951            __import__(module_name, level=0)
952            module = sys.modules[module_name]
953            obj2, parent = _getattribute(module, name)
954        except (ImportError, KeyError, AttributeError):
955            raise PicklingError(
956                "Can't pickle %r: it's not found as %s.%s" %
957                (obj, module_name, name)) from None
958        else:
959            if obj2 is not obj:
960                raise PicklingError(
961                    "Can't pickle %r: it's not the same object as %s.%s" %
962                    (obj, module_name, name))
963
964        if self.proto >= 2:
965            code = _extension_registry.get((module_name, name))
966            if code:
967                assert code > 0
968                if code <= 0xff:
969                    write(EXT1 + pack("<B", code))
970                elif code <= 0xffff:
971                    write(EXT2 + pack("<H", code))
972                else:
973                    write(EXT4 + pack("<i", code))
974                return
975        lastname = name.rpartition('.')[2]
976        if parent is module:
977            name = lastname
978        # Non-ASCII identifiers are supported only with protocols >= 3.
979        if self.proto >= 4:
980            self.save(module_name)
981            self.save(name)
982            write(STACK_GLOBAL)
983        elif parent is not module:
984            self.save_reduce(getattr, (parent, lastname))
985        elif self.proto >= 3:
986            write(GLOBAL + bytes(module_name, "utf-8") + b'\n' +
987                  bytes(name, "utf-8") + b'\n')
988        else:
989            if self.fix_imports:
990                r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING
991                r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING
992                if (module_name, name) in r_name_mapping:
993                    module_name, name = r_name_mapping[(module_name, name)]
994                elif module_name in r_import_mapping:
995                    module_name = r_import_mapping[module_name]
996            try:
997                write(GLOBAL + bytes(module_name, "ascii") + b'\n' +
998                      bytes(name, "ascii") + b'\n')
999            except UnicodeEncodeError:
1000                raise PicklingError(
1001                    "can't pickle global identifier '%s.%s' using "
1002                    "pickle protocol %i" % (module, name, self.proto)) from None
1003
1004        self.memoize(obj)
1005
1006    def save_type(self, obj):
1007        if obj is type(None):
1008            return self.save_reduce(type, (None,), obj=obj)
1009        elif obj is type(NotImplemented):
1010            return self.save_reduce(type, (NotImplemented,), obj=obj)
1011        elif obj is type(...):
1012            return self.save_reduce(type, (...,), obj=obj)
1013        return self.save_global(obj)
1014
1015    dispatch[FunctionType] = save_global
1016    dispatch[type] = save_type
1017
1018
1019# Unpickling machinery
1020
1021class _Unpickler:
1022
1023    def __init__(self, file, *, fix_imports=True,
1024                 encoding="ASCII", errors="strict"):
1025        """This takes a binary file for reading a pickle data stream.
1026
1027        The protocol version of the pickle is detected automatically, so
1028        no proto argument is needed.
1029
1030        The argument *file* must have two methods, a read() method that
1031        takes an integer argument, and a readline() method that requires
1032        no arguments.  Both methods should return bytes.  Thus *file*
1033        can be a binary file object opened for reading, an io.BytesIO
1034        object, or any other custom object that meets this interface.
1035
1036        The file-like object must have two methods, a read() method
1037        that takes an integer argument, and a readline() method that
1038        requires no arguments.  Both methods should return bytes.
1039        Thus file-like object can be a binary file object opened for
1040        reading, a BytesIO object, or any other custom object that
1041        meets this interface.
1042
1043        Optional keyword arguments are *fix_imports*, *encoding* and
1044        *errors*, which are used to control compatibility support for
1045        pickle stream generated by Python 2.  If *fix_imports* is True,
1046        pickle will try to map the old Python 2 names to the new names
1047        used in Python 3.  The *encoding* and *errors* tell pickle how
1048        to decode 8-bit string instances pickled by Python 2; these
1049        default to 'ASCII' and 'strict', respectively. *encoding* can be
1050        'bytes' to read theses 8-bit string instances as bytes objects.
1051        """
1052        self._file_readline = file.readline
1053        self._file_read = file.read
1054        self.memo = {}
1055        self.encoding = encoding
1056        self.errors = errors
1057        self.proto = 0
1058        self.fix_imports = fix_imports
1059
1060    def load(self):
1061        """Read a pickled object representation from the open file.
1062
1063        Return the reconstituted object hierarchy specified in the file.
1064        """
1065        # Check whether Unpickler was initialized correctly. This is
1066        # only needed to mimic the behavior of _pickle.Unpickler.dump().
1067        if not hasattr(self, "_file_read"):
1068            raise UnpicklingError("Unpickler.__init__() was not called by "
1069                                  "%s.__init__()" % (self.__class__.__name__,))
1070        self._unframer = _Unframer(self._file_read, self._file_readline)
1071        self.read = self._unframer.read
1072        self.readline = self._unframer.readline
1073        self.metastack = []
1074        self.stack = []
1075        self.append = self.stack.append
1076        self.proto = 0
1077        read = self.read
1078        dispatch = self.dispatch
1079        try:
1080            while True:
1081                key = read(1)
1082                if not key:
1083                    raise EOFError
1084                assert isinstance(key, bytes_types)
1085                dispatch[key[0]](self)
1086        except _Stop as stopinst:
1087            return stopinst.value
1088
1089    # Return a list of items pushed in the stack after last MARK instruction.
1090    def pop_mark(self):
1091        items = self.stack
1092        self.stack = self.metastack.pop()
1093        self.append = self.stack.append
1094        return items
1095
1096    def persistent_load(self, pid):
1097        raise UnpicklingError("unsupported persistent id encountered")
1098
1099    dispatch = {}
1100
1101    def load_proto(self):
1102        proto = self.read(1)[0]
1103        if not 0 <= proto <= HIGHEST_PROTOCOL:
1104            raise ValueError("unsupported pickle protocol: %d" % proto)
1105        self.proto = proto
1106    dispatch[PROTO[0]] = load_proto
1107
1108    def load_frame(self):
1109        frame_size, = unpack('<Q', self.read(8))
1110        if frame_size > sys.maxsize:
1111            raise ValueError("frame size > sys.maxsize: %d" % frame_size)
1112        self._unframer.load_frame(frame_size)
1113    dispatch[FRAME[0]] = load_frame
1114
1115    def load_persid(self):
1116        try:
1117            pid = self.readline()[:-1].decode("ascii")
1118        except UnicodeDecodeError:
1119            raise UnpicklingError(
1120                "persistent IDs in protocol 0 must be ASCII strings")
1121        self.append(self.persistent_load(pid))
1122    dispatch[PERSID[0]] = load_persid
1123
1124    def load_binpersid(self):
1125        pid = self.stack.pop()
1126        self.append(self.persistent_load(pid))
1127    dispatch[BINPERSID[0]] = load_binpersid
1128
1129    def load_none(self):
1130        self.append(None)
1131    dispatch[NONE[0]] = load_none
1132
1133    def load_false(self):
1134        self.append(False)
1135    dispatch[NEWFALSE[0]] = load_false
1136
1137    def load_true(self):
1138        self.append(True)
1139    dispatch[NEWTRUE[0]] = load_true
1140
1141    def load_int(self):
1142        data = self.readline()
1143        if data == FALSE[1:]:
1144            val = False
1145        elif data == TRUE[1:]:
1146            val = True
1147        else:
1148            val = int(data, 0)
1149        self.append(val)
1150    dispatch[INT[0]] = load_int
1151
1152    def load_binint(self):
1153        self.append(unpack('<i', self.read(4))[0])
1154    dispatch[BININT[0]] = load_binint
1155
1156    def load_binint1(self):
1157        self.append(self.read(1)[0])
1158    dispatch[BININT1[0]] = load_binint1
1159
1160    def load_binint2(self):
1161        self.append(unpack('<H', self.read(2))[0])
1162    dispatch[BININT2[0]] = load_binint2
1163
1164    def load_long(self):
1165        val = self.readline()[:-1]
1166        if val and val[-1] == b'L'[0]:
1167            val = val[:-1]
1168        self.append(int(val, 0))
1169    dispatch[LONG[0]] = load_long
1170
1171    def load_long1(self):
1172        n = self.read(1)[0]
1173        data = self.read(n)
1174        self.append(decode_long(data))
1175    dispatch[LONG1[0]] = load_long1
1176
1177    def load_long4(self):
1178        n, = unpack('<i', self.read(4))
1179        if n < 0:
1180            # Corrupt or hostile pickle -- we never write one like this
1181            raise UnpicklingError("LONG pickle has negative byte count")
1182        data = self.read(n)
1183        self.append(decode_long(data))
1184    dispatch[LONG4[0]] = load_long4
1185
1186    def load_float(self):
1187        self.append(float(self.readline()[:-1]))
1188    dispatch[FLOAT[0]] = load_float
1189
1190    def load_binfloat(self):
1191        self.append(unpack('>d', self.read(8))[0])
1192    dispatch[BINFLOAT[0]] = load_binfloat
1193
1194    def _decode_string(self, value):
1195        # Used to allow strings from Python 2 to be decoded either as
1196        # bytes or Unicode strings.  This should be used only with the
1197        # STRING, BINSTRING and SHORT_BINSTRING opcodes.
1198        if self.encoding == "bytes":
1199            return value
1200        else:
1201            return value.decode(self.encoding, self.errors)
1202
1203    def load_string(self):
1204        data = self.readline()[:-1]
1205        # Strip outermost quotes
1206        if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'':
1207            data = data[1:-1]
1208        else:
1209            raise UnpicklingError("the STRING opcode argument must be quoted")
1210        self.append(self._decode_string(codecs.escape_decode(data)[0]))
1211    dispatch[STRING[0]] = load_string
1212
1213    def load_binstring(self):
1214        # Deprecated BINSTRING uses signed 32-bit length
1215        len, = unpack('<i', self.read(4))
1216        if len < 0:
1217            raise UnpicklingError("BINSTRING pickle has negative byte count")
1218        data = self.read(len)
1219        self.append(self._decode_string(data))
1220    dispatch[BINSTRING[0]] = load_binstring
1221
1222    def load_binbytes(self):
1223        len, = unpack('<I', self.read(4))
1224        if len > maxsize:
1225            raise UnpicklingError("BINBYTES exceeds system's maximum size "
1226                                  "of %d bytes" % maxsize)
1227        self.append(self.read(len))
1228    dispatch[BINBYTES[0]] = load_binbytes
1229
1230    def load_unicode(self):
1231        self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
1232    dispatch[UNICODE[0]] = load_unicode
1233
1234    def load_binunicode(self):
1235        len, = unpack('<I', self.read(4))
1236        if len > maxsize:
1237            raise UnpicklingError("BINUNICODE exceeds system's maximum size "
1238                                  "of %d bytes" % maxsize)
1239        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1240    dispatch[BINUNICODE[0]] = load_binunicode
1241
1242    def load_binunicode8(self):
1243        len, = unpack('<Q', self.read(8))
1244        if len > maxsize:
1245            raise UnpicklingError("BINUNICODE8 exceeds system's maximum size "
1246                                  "of %d bytes" % maxsize)
1247        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1248    dispatch[BINUNICODE8[0]] = load_binunicode8
1249
1250    def load_binbytes8(self):
1251        len, = unpack('<Q', self.read(8))
1252        if len > maxsize:
1253            raise UnpicklingError("BINBYTES8 exceeds system's maximum size "
1254                                  "of %d bytes" % maxsize)
1255        self.append(self.read(len))
1256    dispatch[BINBYTES8[0]] = load_binbytes8
1257
1258    def load_short_binstring(self):
1259        len = self.read(1)[0]
1260        data = self.read(len)
1261        self.append(self._decode_string(data))
1262    dispatch[SHORT_BINSTRING[0]] = load_short_binstring
1263
1264    def load_short_binbytes(self):
1265        len = self.read(1)[0]
1266        self.append(self.read(len))
1267    dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
1268
1269    def load_short_binunicode(self):
1270        len = self.read(1)[0]
1271        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1272    dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
1273
1274    def load_tuple(self):
1275        items = self.pop_mark()
1276        self.append(tuple(items))
1277    dispatch[TUPLE[0]] = load_tuple
1278
1279    def load_empty_tuple(self):
1280        self.append(())
1281    dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
1282
1283    def load_tuple1(self):
1284        self.stack[-1] = (self.stack[-1],)
1285    dispatch[TUPLE1[0]] = load_tuple1
1286
1287    def load_tuple2(self):
1288        self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1289    dispatch[TUPLE2[0]] = load_tuple2
1290
1291    def load_tuple3(self):
1292        self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1293    dispatch[TUPLE3[0]] = load_tuple3
1294
1295    def load_empty_list(self):
1296        self.append([])
1297    dispatch[EMPTY_LIST[0]] = load_empty_list
1298
1299    def load_empty_dictionary(self):
1300        self.append({})
1301    dispatch[EMPTY_DICT[0]] = load_empty_dictionary
1302
1303    def load_empty_set(self):
1304        self.append(set())
1305    dispatch[EMPTY_SET[0]] = load_empty_set
1306
1307    def load_frozenset(self):
1308        items = self.pop_mark()
1309        self.append(frozenset(items))
1310    dispatch[FROZENSET[0]] = load_frozenset
1311
1312    def load_list(self):
1313        items = self.pop_mark()
1314        self.append(items)
1315    dispatch[LIST[0]] = load_list
1316
1317    def load_dict(self):
1318        items = self.pop_mark()
1319        d = {items[i]: items[i+1]
1320             for i in range(0, len(items), 2)}
1321        self.append(d)
1322    dispatch[DICT[0]] = load_dict
1323
1324    # INST and OBJ differ only in how they get a class object.  It's not
1325    # only sensible to do the rest in a common routine, the two routines
1326    # previously diverged and grew different bugs.
1327    # klass is the class to instantiate, and k points to the topmost mark
1328    # object, following which are the arguments for klass.__init__.
1329    def _instantiate(self, klass, args):
1330        if (args or not isinstance(klass, type) or
1331            hasattr(klass, "__getinitargs__")):
1332            try:
1333                value = klass(*args)
1334            except TypeError as err:
1335                raise TypeError("in constructor for %s: %s" %
1336                                (klass.__name__, str(err)), sys.exc_info()[2])
1337        else:
1338            value = klass.__new__(klass)
1339        self.append(value)
1340
1341    def load_inst(self):
1342        module = self.readline()[:-1].decode("ascii")
1343        name = self.readline()[:-1].decode("ascii")
1344        klass = self.find_class(module, name)
1345        self._instantiate(klass, self.pop_mark())
1346    dispatch[INST[0]] = load_inst
1347
1348    def load_obj(self):
1349        # Stack is ... markobject classobject arg1 arg2 ...
1350        args = self.pop_mark()
1351        cls = args.pop(0)
1352        self._instantiate(cls, args)
1353    dispatch[OBJ[0]] = load_obj
1354
1355    def load_newobj(self):
1356        args = self.stack.pop()
1357        cls = self.stack.pop()
1358        obj = cls.__new__(cls, *args)
1359        self.append(obj)
1360    dispatch[NEWOBJ[0]] = load_newobj
1361
1362    def load_newobj_ex(self):
1363        kwargs = self.stack.pop()
1364        args = self.stack.pop()
1365        cls = self.stack.pop()
1366        obj = cls.__new__(cls, *args, **kwargs)
1367        self.append(obj)
1368    dispatch[NEWOBJ_EX[0]] = load_newobj_ex
1369
1370    def load_global(self):
1371        module = self.readline()[:-1].decode("utf-8")
1372        name = self.readline()[:-1].decode("utf-8")
1373        klass = self.find_class(module, name)
1374        self.append(klass)
1375    dispatch[GLOBAL[0]] = load_global
1376
1377    def load_stack_global(self):
1378        name = self.stack.pop()
1379        module = self.stack.pop()
1380        if type(name) is not str or type(module) is not str:
1381            raise UnpicklingError("STACK_GLOBAL requires str")
1382        self.append(self.find_class(module, name))
1383    dispatch[STACK_GLOBAL[0]] = load_stack_global
1384
1385    def load_ext1(self):
1386        code = self.read(1)[0]
1387        self.get_extension(code)
1388    dispatch[EXT1[0]] = load_ext1
1389
1390    def load_ext2(self):
1391        code, = unpack('<H', self.read(2))
1392        self.get_extension(code)
1393    dispatch[EXT2[0]] = load_ext2
1394
1395    def load_ext4(self):
1396        code, = unpack('<i', self.read(4))
1397        self.get_extension(code)
1398    dispatch[EXT4[0]] = load_ext4
1399
1400    def get_extension(self, code):
1401        nil = []
1402        obj = _extension_cache.get(code, nil)
1403        if obj is not nil:
1404            self.append(obj)
1405            return
1406        key = _inverted_registry.get(code)
1407        if not key:
1408            if code <= 0: # note that 0 is forbidden
1409                # Corrupt or hostile pickle.
1410                raise UnpicklingError("EXT specifies code <= 0")
1411            raise ValueError("unregistered extension code %d" % code)
1412        obj = self.find_class(*key)
1413        _extension_cache[code] = obj
1414        self.append(obj)
1415
1416    def find_class(self, module, name):
1417        # Subclasses may override this.
1418        if self.proto < 3 and self.fix_imports:
1419            if (module, name) in _compat_pickle.NAME_MAPPING:
1420                module, name = _compat_pickle.NAME_MAPPING[(module, name)]
1421            elif module in _compat_pickle.IMPORT_MAPPING:
1422                module = _compat_pickle.IMPORT_MAPPING[module]
1423        __import__(module, level=0)
1424        if self.proto >= 4:
1425            return _getattribute(sys.modules[module], name)[0]
1426        else:
1427            return getattr(sys.modules[module], name)
1428
1429    def load_reduce(self):
1430        stack = self.stack
1431        args = stack.pop()
1432        func = stack[-1]
1433        stack[-1] = func(*args)
1434    dispatch[REDUCE[0]] = load_reduce
1435
1436    def load_pop(self):
1437        if self.stack:
1438            del self.stack[-1]
1439        else:
1440            self.pop_mark()
1441    dispatch[POP[0]] = load_pop
1442
1443    def load_pop_mark(self):
1444        self.pop_mark()
1445    dispatch[POP_MARK[0]] = load_pop_mark
1446
1447    def load_dup(self):
1448        self.append(self.stack[-1])
1449    dispatch[DUP[0]] = load_dup
1450
1451    def load_get(self):
1452        i = int(self.readline()[:-1])
1453        self.append(self.memo[i])
1454    dispatch[GET[0]] = load_get
1455
1456    def load_binget(self):
1457        i = self.read(1)[0]
1458        self.append(self.memo[i])
1459    dispatch[BINGET[0]] = load_binget
1460
1461    def load_long_binget(self):
1462        i, = unpack('<I', self.read(4))
1463        self.append(self.memo[i])
1464    dispatch[LONG_BINGET[0]] = load_long_binget
1465
1466    def load_put(self):
1467        i = int(self.readline()[:-1])
1468        if i < 0:
1469            raise ValueError("negative PUT argument")
1470        self.memo[i] = self.stack[-1]
1471    dispatch[PUT[0]] = load_put
1472
1473    def load_binput(self):
1474        i = self.read(1)[0]
1475        if i < 0:
1476            raise ValueError("negative BINPUT argument")
1477        self.memo[i] = self.stack[-1]
1478    dispatch[BINPUT[0]] = load_binput
1479
1480    def load_long_binput(self):
1481        i, = unpack('<I', self.read(4))
1482        if i > maxsize:
1483            raise ValueError("negative LONG_BINPUT argument")
1484        self.memo[i] = self.stack[-1]
1485    dispatch[LONG_BINPUT[0]] = load_long_binput
1486
1487    def load_memoize(self):
1488        memo = self.memo
1489        memo[len(memo)] = self.stack[-1]
1490    dispatch[MEMOIZE[0]] = load_memoize
1491
1492    def load_append(self):
1493        stack = self.stack
1494        value = stack.pop()
1495        list = stack[-1]
1496        list.append(value)
1497    dispatch[APPEND[0]] = load_append
1498
1499    def load_appends(self):
1500        items = self.pop_mark()
1501        list_obj = self.stack[-1]
1502        try:
1503            extend = list_obj.extend
1504        except AttributeError:
1505            pass
1506        else:
1507            extend(items)
1508            return
1509        # Even if the PEP 307 requires extend() and append() methods,
1510        # fall back on append() if the object has no extend() method
1511        # for backward compatibility.
1512        append = list_obj.append
1513        for item in items:
1514            append(item)
1515    dispatch[APPENDS[0]] = load_appends
1516
1517    def load_setitem(self):
1518        stack = self.stack
1519        value = stack.pop()
1520        key = stack.pop()
1521        dict = stack[-1]
1522        dict[key] = value
1523    dispatch[SETITEM[0]] = load_setitem
1524
1525    def load_setitems(self):
1526        items = self.pop_mark()
1527        dict = self.stack[-1]
1528        for i in range(0, len(items), 2):
1529            dict[items[i]] = items[i + 1]
1530    dispatch[SETITEMS[0]] = load_setitems
1531
1532    def load_additems(self):
1533        items = self.pop_mark()
1534        set_obj = self.stack[-1]
1535        if isinstance(set_obj, set):
1536            set_obj.update(items)
1537        else:
1538            add = set_obj.add
1539            for item in items:
1540                add(item)
1541    dispatch[ADDITEMS[0]] = load_additems
1542
1543    def load_build(self):
1544        stack = self.stack
1545        state = stack.pop()
1546        inst = stack[-1]
1547        setstate = getattr(inst, "__setstate__", None)
1548        if setstate is not None:
1549            setstate(state)
1550            return
1551        slotstate = None
1552        if isinstance(state, tuple) and len(state) == 2:
1553            state, slotstate = state
1554        if state:
1555            inst_dict = inst.__dict__
1556            intern = sys.intern
1557            for k, v in state.items():
1558                if type(k) is str:
1559                    inst_dict[intern(k)] = v
1560                else:
1561                    inst_dict[k] = v
1562        if slotstate:
1563            for k, v in slotstate.items():
1564                setattr(inst, k, v)
1565    dispatch[BUILD[0]] = load_build
1566
1567    def load_mark(self):
1568        self.metastack.append(self.stack)
1569        self.stack = []
1570        self.append = self.stack.append
1571    dispatch[MARK[0]] = load_mark
1572
1573    def load_stop(self):
1574        value = self.stack.pop()
1575        raise _Stop(value)
1576    dispatch[STOP[0]] = load_stop
1577
1578
1579# Shorthands
1580
1581def _dump(obj, file, protocol=None, *, fix_imports=True):
1582    _Pickler(file, protocol, fix_imports=fix_imports).dump(obj)
1583
1584def _dumps(obj, protocol=None, *, fix_imports=True):
1585    f = io.BytesIO()
1586    _Pickler(f, protocol, fix_imports=fix_imports).dump(obj)
1587    res = f.getvalue()
1588    assert isinstance(res, bytes_types)
1589    return res
1590
1591def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict"):
1592    return _Unpickler(file, fix_imports=fix_imports,
1593                     encoding=encoding, errors=errors).load()
1594
1595def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"):
1596    if isinstance(s, str):
1597        raise TypeError("Can't load pickle from unicode string")
1598    file = io.BytesIO(s)
1599    return _Unpickler(file, fix_imports=fix_imports,
1600                      encoding=encoding, errors=errors).load()
1601
1602# Use the faster _pickle if possible
1603try:
1604    from _pickle import (
1605        PickleError,
1606        PicklingError,
1607        UnpicklingError,
1608        Pickler,
1609        Unpickler,
1610        dump,
1611        dumps,
1612        load,
1613        loads
1614    )
1615except ImportError:
1616    Pickler, Unpickler = _Pickler, _Unpickler
1617    dump, dumps, load, loads = _dump, _dumps, _load, _loads
1618
1619# Doctest
1620def _test():
1621    import doctest
1622    return doctest.testmod()
1623
1624if __name__ == "__main__":
1625    import argparse
1626    parser = argparse.ArgumentParser(
1627        description='display contents of the pickle files')
1628    parser.add_argument(
1629        'pickle_file', type=argparse.FileType('br'),
1630        nargs='*', help='the pickle file')
1631    parser.add_argument(
1632        '-t', '--test', action='store_true',
1633        help='run self-test suite')
1634    parser.add_argument(
1635        '-v', action='store_true',
1636        help='run verbosely; only affects self-test run')
1637    args = parser.parse_args()
1638    if args.test:
1639        _test()
1640    else:
1641        if not args.pickle_file:
1642            parser.print_help()
1643        else:
1644            import pprint
1645            for f in args.pickle_file:
1646                obj = load(f)
1647                pprint.pprint(obj)
1648