1"""Create portable serialized representations of Python objects. 2 3See module copyreg for a mechanism for registering custom picklers. 4See module pickletools source for extensive comments. 5 6Classes: 7 8 Pickler 9 Unpickler 10 11Functions: 12 13 dump(object, file) 14 dumps(object) -> string 15 load(file) -> object 16 loads(string) -> object 17 18Misc variables: 19 20 __version__ 21 format_version 22 compatible_formats 23 24""" 25 26from types import FunctionType 27from copyreg import dispatch_table 28from copyreg import _extension_registry, _inverted_registry, _extension_cache 29from itertools import islice 30from functools import partial 31import sys 32from sys import maxsize 33from struct import pack, unpack 34import re 35import io 36import codecs 37import _compat_pickle 38 39__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", 40 "Unpickler", "dump", "dumps", "load", "loads"] 41 42# Shortcut for use in isinstance testing 43bytes_types = (bytes, bytearray) 44 45# These are purely informational; no code uses these. 46format_version = "4.0" # File format version we write 47compatible_formats = ["1.0", # Original protocol 0 48 "1.1", # Protocol 0 with INST added 49 "1.2", # Original protocol 1 50 "1.3", # Protocol 1 with BINFLOAT added 51 "2.0", # Protocol 2 52 "3.0", # Protocol 3 53 "4.0", # Protocol 4 54 ] # Old format versions we can read 55 56# This is the highest protocol number we know how to read. 57HIGHEST_PROTOCOL = 4 58 59# The protocol we write by default. May be less than HIGHEST_PROTOCOL. 60# We intentionally write a protocol that Python 2.x cannot read; 61# there are too many issues with that. 62DEFAULT_PROTOCOL = 3 63 64class PickleError(Exception): 65 """A common base class for the other pickling exceptions.""" 66 pass 67 68class PicklingError(PickleError): 69 """This exception is raised when an unpicklable object is passed to the 70 dump() method. 71 72 """ 73 pass 74 75class UnpicklingError(PickleError): 76 """This exception is raised when there is a problem unpickling an object, 77 such as a security violation. 78 79 Note that other exceptions may also be raised during unpickling, including 80 (but not necessarily limited to) AttributeError, EOFError, ImportError, 81 and IndexError. 82 83 """ 84 pass 85 86# An instance of _Stop is raised by Unpickler.load_stop() in response to 87# the STOP opcode, passing the object that is the result of unpickling. 88class _Stop(Exception): 89 def __init__(self, value): 90 self.value = value 91 92# Jython has PyStringMap; it's a dict subclass with string keys 93try: 94 from org.python.core import PyStringMap 95except ImportError: 96 PyStringMap = None 97 98# Pickle opcodes. See pickletools.py for extensive docs. The listing 99# here is in kind-of alphabetical order of 1-character pickle code. 100# pickletools groups them by purpose. 101 102MARK = b'(' # push special markobject on stack 103STOP = b'.' # every pickle ends with STOP 104POP = b'0' # discard topmost stack item 105POP_MARK = b'1' # discard stack top through topmost markobject 106DUP = b'2' # duplicate top stack item 107FLOAT = b'F' # push float object; decimal string argument 108INT = b'I' # push integer or bool; decimal string argument 109BININT = b'J' # push four-byte signed int 110BININT1 = b'K' # push 1-byte unsigned int 111LONG = b'L' # push long; decimal string argument 112BININT2 = b'M' # push 2-byte unsigned int 113NONE = b'N' # push None 114PERSID = b'P' # push persistent object; id is taken from string arg 115BINPERSID = b'Q' # " " " ; " " " " stack 116REDUCE = b'R' # apply callable to argtuple, both on stack 117STRING = b'S' # push string; NL-terminated string argument 118BINSTRING = b'T' # push string; counted binary string argument 119SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes 120UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument 121BINUNICODE = b'X' # " " " ; counted UTF-8 string argument 122APPEND = b'a' # append stack top to list below it 123BUILD = b'b' # call __setstate__ or __dict__.update() 124GLOBAL = b'c' # push self.find_class(modname, name); 2 string args 125DICT = b'd' # build a dict from stack items 126EMPTY_DICT = b'}' # push empty dict 127APPENDS = b'e' # extend list on stack by topmost stack slice 128GET = b'g' # push item from memo on stack; index is string arg 129BINGET = b'h' # " " " " " " ; " " 1-byte arg 130INST = b'i' # build & push class instance 131LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg 132LIST = b'l' # build list from topmost stack items 133EMPTY_LIST = b']' # push empty list 134OBJ = b'o' # build & push class instance 135PUT = b'p' # store stack top in memo; index is string arg 136BINPUT = b'q' # " " " " " ; " " 1-byte arg 137LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg 138SETITEM = b's' # add key+value pair to dict 139TUPLE = b't' # build tuple from topmost stack items 140EMPTY_TUPLE = b')' # push empty tuple 141SETITEMS = b'u' # modify dict by adding topmost key+value pairs 142BINFLOAT = b'G' # push float; arg is 8-byte float encoding 143 144TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py 145FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py 146 147# Protocol 2 148 149PROTO = b'\x80' # identify pickle protocol 150NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple 151EXT1 = b'\x82' # push object from extension registry; 1-byte index 152EXT2 = b'\x83' # ditto, but 2-byte index 153EXT4 = b'\x84' # ditto, but 4-byte index 154TUPLE1 = b'\x85' # build 1-tuple from stack top 155TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items 156TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items 157NEWTRUE = b'\x88' # push True 158NEWFALSE = b'\x89' # push False 159LONG1 = b'\x8a' # push long from < 256 bytes 160LONG4 = b'\x8b' # push really big long 161 162_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3] 163 164# Protocol 3 (Python 3.x) 165 166BINBYTES = b'B' # push bytes; counted binary string argument 167SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes 168 169# Protocol 4 170SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes 171BINUNICODE8 = b'\x8d' # push very long string 172BINBYTES8 = b'\x8e' # push very long bytes string 173EMPTY_SET = b'\x8f' # push empty set on the stack 174ADDITEMS = b'\x90' # modify set by adding topmost stack items 175FROZENSET = b'\x91' # build frozenset from topmost stack items 176NEWOBJ_EX = b'\x92' # like NEWOBJ but work with keyword only arguments 177STACK_GLOBAL = b'\x93' # same as GLOBAL but using names on the stacks 178MEMOIZE = b'\x94' # store top of the stack in memo 179FRAME = b'\x95' # indicate the beginning of a new frame 180 181__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)]) 182 183 184class _Framer: 185 186 _FRAME_SIZE_MIN = 4 187 _FRAME_SIZE_TARGET = 64 * 1024 188 189 def __init__(self, file_write): 190 self.file_write = file_write 191 self.current_frame = None 192 193 def start_framing(self): 194 self.current_frame = io.BytesIO() 195 196 def end_framing(self): 197 if self.current_frame and self.current_frame.tell() > 0: 198 self.commit_frame(force=True) 199 self.current_frame = None 200 201 def commit_frame(self, force=False): 202 if self.current_frame: 203 f = self.current_frame 204 if f.tell() >= self._FRAME_SIZE_TARGET or force: 205 data = f.getbuffer() 206 write = self.file_write 207 if len(data) >= self._FRAME_SIZE_MIN: 208 # Issue a single call to the write method of the underlying 209 # file object for the frame opcode with the size of the 210 # frame. The concatenation is expected to be less expensive 211 # than issuing an additional call to write. 212 write(FRAME + pack("<Q", len(data))) 213 214 # Issue a separate call to write to append the frame 215 # contents without concatenation to the above to avoid a 216 # memory copy. 217 write(data) 218 219 # Start the new frame with a new io.BytesIO instance so that 220 # the file object can have delayed access to the previous frame 221 # contents via an unreleased memoryview of the previous 222 # io.BytesIO instance. 223 self.current_frame = io.BytesIO() 224 225 def write(self, data): 226 if self.current_frame: 227 return self.current_frame.write(data) 228 else: 229 return self.file_write(data) 230 231 def write_large_bytes(self, header, payload): 232 write = self.file_write 233 if self.current_frame: 234 # Terminate the current frame and flush it to the file. 235 self.commit_frame(force=True) 236 237 # Perform direct write of the header and payload of the large binary 238 # object. Be careful not to concatenate the header and the payload 239 # prior to calling 'write' as we do not want to allocate a large 240 # temporary bytes object. 241 # We intentionally do not insert a protocol 4 frame opcode to make 242 # it possible to optimize file.read calls in the loader. 243 write(header) 244 write(payload) 245 246 247class _Unframer: 248 249 def __init__(self, file_read, file_readline, file_tell=None): 250 self.file_read = file_read 251 self.file_readline = file_readline 252 self.current_frame = None 253 254 def read(self, n): 255 if self.current_frame: 256 data = self.current_frame.read(n) 257 if not data and n != 0: 258 self.current_frame = None 259 return self.file_read(n) 260 if len(data) < n: 261 raise UnpicklingError( 262 "pickle exhausted before end of frame") 263 return data 264 else: 265 return self.file_read(n) 266 267 def readline(self): 268 if self.current_frame: 269 data = self.current_frame.readline() 270 if not data: 271 self.current_frame = None 272 return self.file_readline() 273 if data[-1] != b'\n'[0]: 274 raise UnpicklingError( 275 "pickle exhausted before end of frame") 276 return data 277 else: 278 return self.file_readline() 279 280 def load_frame(self, frame_size): 281 if self.current_frame and self.current_frame.read() != b'': 282 raise UnpicklingError( 283 "beginning of a new frame before end of current frame") 284 self.current_frame = io.BytesIO(self.file_read(frame_size)) 285 286 287# Tools used for pickling. 288 289def _getattribute(obj, name): 290 for subpath in name.split('.'): 291 if subpath == '<locals>': 292 raise AttributeError("Can't get local attribute {!r} on {!r}" 293 .format(name, obj)) 294 try: 295 parent = obj 296 obj = getattr(obj, subpath) 297 except AttributeError: 298 raise AttributeError("Can't get attribute {!r} on {!r}" 299 .format(name, obj)) from None 300 return obj, parent 301 302def whichmodule(obj, name): 303 """Find the module an object belong to.""" 304 module_name = getattr(obj, '__module__', None) 305 if module_name is not None: 306 return module_name 307 # Protect the iteration by using a list copy of sys.modules against dynamic 308 # modules that trigger imports of other modules upon calls to getattr. 309 for module_name, module in list(sys.modules.items()): 310 if module_name == '__main__' or module is None: 311 continue 312 try: 313 if _getattribute(module, name)[0] is obj: 314 return module_name 315 except AttributeError: 316 pass 317 return '__main__' 318 319def encode_long(x): 320 r"""Encode a long to a two's complement little-endian binary string. 321 Note that 0 is a special case, returning an empty string, to save a 322 byte in the LONG1 pickling context. 323 324 >>> encode_long(0) 325 b'' 326 >>> encode_long(255) 327 b'\xff\x00' 328 >>> encode_long(32767) 329 b'\xff\x7f' 330 >>> encode_long(-256) 331 b'\x00\xff' 332 >>> encode_long(-32768) 333 b'\x00\x80' 334 >>> encode_long(-128) 335 b'\x80' 336 >>> encode_long(127) 337 b'\x7f' 338 >>> 339 """ 340 if x == 0: 341 return b'' 342 nbytes = (x.bit_length() >> 3) + 1 343 result = x.to_bytes(nbytes, byteorder='little', signed=True) 344 if x < 0 and nbytes > 1: 345 if result[-1] == 0xff and (result[-2] & 0x80) != 0: 346 result = result[:-1] 347 return result 348 349def decode_long(data): 350 r"""Decode a long from a two's complement little-endian binary string. 351 352 >>> decode_long(b'') 353 0 354 >>> decode_long(b"\xff\x00") 355 255 356 >>> decode_long(b"\xff\x7f") 357 32767 358 >>> decode_long(b"\x00\xff") 359 -256 360 >>> decode_long(b"\x00\x80") 361 -32768 362 >>> decode_long(b"\x80") 363 -128 364 >>> decode_long(b"\x7f") 365 127 366 """ 367 return int.from_bytes(data, byteorder='little', signed=True) 368 369 370# Pickling machinery 371 372class _Pickler: 373 374 def __init__(self, file, protocol=None, *, fix_imports=True): 375 """This takes a binary file for writing a pickle data stream. 376 377 The optional *protocol* argument tells the pickler to use the 378 given protocol; supported protocols are 0, 1, 2, 3 and 4. The 379 default protocol is 3; a backward-incompatible protocol designed 380 for Python 3. 381 382 Specifying a negative protocol version selects the highest 383 protocol version supported. The higher the protocol used, the 384 more recent the version of Python needed to read the pickle 385 produced. 386 387 The *file* argument must have a write() method that accepts a 388 single bytes argument. It can thus be a file object opened for 389 binary writing, an io.BytesIO instance, or any other custom 390 object that meets this interface. 391 392 If *fix_imports* is True and *protocol* is less than 3, pickle 393 will try to map the new Python 3 names to the old module names 394 used in Python 2, so that the pickle data stream is readable 395 with Python 2. 396 """ 397 if protocol is None: 398 protocol = DEFAULT_PROTOCOL 399 if protocol < 0: 400 protocol = HIGHEST_PROTOCOL 401 elif not 0 <= protocol <= HIGHEST_PROTOCOL: 402 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL) 403 try: 404 self._file_write = file.write 405 except AttributeError: 406 raise TypeError("file must have a 'write' attribute") 407 self.framer = _Framer(self._file_write) 408 self.write = self.framer.write 409 self._write_large_bytes = self.framer.write_large_bytes 410 self.memo = {} 411 self.proto = int(protocol) 412 self.bin = protocol >= 1 413 self.fast = 0 414 self.fix_imports = fix_imports and protocol < 3 415 416 def clear_memo(self): 417 """Clears the pickler's "memo". 418 419 The memo is the data structure that remembers which objects the 420 pickler has already seen, so that shared or recursive objects 421 are pickled by reference and not by value. This method is 422 useful when re-using picklers. 423 """ 424 self.memo.clear() 425 426 def dump(self, obj): 427 """Write a pickled representation of obj to the open file.""" 428 # Check whether Pickler was initialized correctly. This is 429 # only needed to mimic the behavior of _pickle.Pickler.dump(). 430 if not hasattr(self, "_file_write"): 431 raise PicklingError("Pickler.__init__() was not called by " 432 "%s.__init__()" % (self.__class__.__name__,)) 433 if self.proto >= 2: 434 self.write(PROTO + pack("<B", self.proto)) 435 if self.proto >= 4: 436 self.framer.start_framing() 437 self.save(obj) 438 self.write(STOP) 439 self.framer.end_framing() 440 441 def memoize(self, obj): 442 """Store an object in the memo.""" 443 444 # The Pickler memo is a dictionary mapping object ids to 2-tuples 445 # that contain the Unpickler memo key and the object being memoized. 446 # The memo key is written to the pickle and will become 447 # the key in the Unpickler's memo. The object is stored in the 448 # Pickler memo so that transient objects are kept alive during 449 # pickling. 450 451 # The use of the Unpickler memo length as the memo key is just a 452 # convention. The only requirement is that the memo values be unique. 453 # But there appears no advantage to any other scheme, and this 454 # scheme allows the Unpickler memo to be implemented as a plain (but 455 # growable) array, indexed by memo key. 456 if self.fast: 457 return 458 assert id(obj) not in self.memo 459 idx = len(self.memo) 460 self.write(self.put(idx)) 461 self.memo[id(obj)] = idx, obj 462 463 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i. 464 def put(self, idx): 465 if self.proto >= 4: 466 return MEMOIZE 467 elif self.bin: 468 if idx < 256: 469 return BINPUT + pack("<B", idx) 470 else: 471 return LONG_BINPUT + pack("<I", idx) 472 else: 473 return PUT + repr(idx).encode("ascii") + b'\n' 474 475 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i. 476 def get(self, i): 477 if self.bin: 478 if i < 256: 479 return BINGET + pack("<B", i) 480 else: 481 return LONG_BINGET + pack("<I", i) 482 483 return GET + repr(i).encode("ascii") + b'\n' 484 485 def save(self, obj, save_persistent_id=True): 486 self.framer.commit_frame() 487 488 # Check for persistent id (defined by a subclass) 489 pid = self.persistent_id(obj) 490 if pid is not None and save_persistent_id: 491 self.save_pers(pid) 492 return 493 494 # Check the memo 495 x = self.memo.get(id(obj)) 496 if x is not None: 497 self.write(self.get(x[0])) 498 return 499 500 # Check the type dispatch table 501 t = type(obj) 502 f = self.dispatch.get(t) 503 if f is not None: 504 f(self, obj) # Call unbound method with explicit self 505 return 506 507 # Check private dispatch table if any, or else copyreg.dispatch_table 508 reduce = getattr(self, 'dispatch_table', dispatch_table).get(t) 509 if reduce is not None: 510 rv = reduce(obj) 511 else: 512 # Check for a class with a custom metaclass; treat as regular class 513 try: 514 issc = issubclass(t, type) 515 except TypeError: # t is not a class (old Boost; see SF #502085) 516 issc = False 517 if issc: 518 self.save_global(obj) 519 return 520 521 # Check for a __reduce_ex__ method, fall back to __reduce__ 522 reduce = getattr(obj, "__reduce_ex__", None) 523 if reduce is not None: 524 rv = reduce(self.proto) 525 else: 526 reduce = getattr(obj, "__reduce__", None) 527 if reduce is not None: 528 rv = reduce() 529 else: 530 raise PicklingError("Can't pickle %r object: %r" % 531 (t.__name__, obj)) 532 533 # Check for string returned by reduce(), meaning "save as global" 534 if isinstance(rv, str): 535 self.save_global(obj, rv) 536 return 537 538 # Assert that reduce() returned a tuple 539 if not isinstance(rv, tuple): 540 raise PicklingError("%s must return string or tuple" % reduce) 541 542 # Assert that it returned an appropriately sized tuple 543 l = len(rv) 544 if not (2 <= l <= 5): 545 raise PicklingError("Tuple returned by %s must have " 546 "two to five elements" % reduce) 547 548 # Save the reduce() output and finally memoize the object 549 self.save_reduce(obj=obj, *rv) 550 551 def persistent_id(self, obj): 552 # This exists so a subclass can override it 553 return None 554 555 def save_pers(self, pid): 556 # Save a persistent id reference 557 if self.bin: 558 self.save(pid, save_persistent_id=False) 559 self.write(BINPERSID) 560 else: 561 try: 562 self.write(PERSID + str(pid).encode("ascii") + b'\n') 563 except UnicodeEncodeError: 564 raise PicklingError( 565 "persistent IDs in protocol 0 must be ASCII strings") 566 567 def save_reduce(self, func, args, state=None, listitems=None, 568 dictitems=None, obj=None): 569 # This API is called by some subclasses 570 571 if not isinstance(args, tuple): 572 raise PicklingError("args from save_reduce() must be a tuple") 573 if not callable(func): 574 raise PicklingError("func from save_reduce() must be callable") 575 576 save = self.save 577 write = self.write 578 579 func_name = getattr(func, "__name__", "") 580 if self.proto >= 2 and func_name == "__newobj_ex__": 581 cls, args, kwargs = args 582 if not hasattr(cls, "__new__"): 583 raise PicklingError("args[0] from {} args has no __new__" 584 .format(func_name)) 585 if obj is not None and cls is not obj.__class__: 586 raise PicklingError("args[0] from {} args has the wrong class" 587 .format(func_name)) 588 if self.proto >= 4: 589 save(cls) 590 save(args) 591 save(kwargs) 592 write(NEWOBJ_EX) 593 else: 594 func = partial(cls.__new__, cls, *args, **kwargs) 595 save(func) 596 save(()) 597 write(REDUCE) 598 elif self.proto >= 2 and func_name == "__newobj__": 599 # A __reduce__ implementation can direct protocol 2 or newer to 600 # use the more efficient NEWOBJ opcode, while still 601 # allowing protocol 0 and 1 to work normally. For this to 602 # work, the function returned by __reduce__ should be 603 # called __newobj__, and its first argument should be a 604 # class. The implementation for __newobj__ 605 # should be as follows, although pickle has no way to 606 # verify this: 607 # 608 # def __newobj__(cls, *args): 609 # return cls.__new__(cls, *args) 610 # 611 # Protocols 0 and 1 will pickle a reference to __newobj__, 612 # while protocol 2 (and above) will pickle a reference to 613 # cls, the remaining args tuple, and the NEWOBJ code, 614 # which calls cls.__new__(cls, *args) at unpickling time 615 # (see load_newobj below). If __reduce__ returns a 616 # three-tuple, the state from the third tuple item will be 617 # pickled regardless of the protocol, calling __setstate__ 618 # at unpickling time (see load_build below). 619 # 620 # Note that no standard __newobj__ implementation exists; 621 # you have to provide your own. This is to enforce 622 # compatibility with Python 2.2 (pickles written using 623 # protocol 0 or 1 in Python 2.3 should be unpicklable by 624 # Python 2.2). 625 cls = args[0] 626 if not hasattr(cls, "__new__"): 627 raise PicklingError( 628 "args[0] from __newobj__ args has no __new__") 629 if obj is not None and cls is not obj.__class__: 630 raise PicklingError( 631 "args[0] from __newobj__ args has the wrong class") 632 args = args[1:] 633 save(cls) 634 save(args) 635 write(NEWOBJ) 636 else: 637 save(func) 638 save(args) 639 write(REDUCE) 640 641 if obj is not None: 642 # If the object is already in the memo, this means it is 643 # recursive. In this case, throw away everything we put on the 644 # stack, and fetch the object back from the memo. 645 if id(obj) in self.memo: 646 write(POP + self.get(self.memo[id(obj)][0])) 647 else: 648 self.memoize(obj) 649 650 # More new special cases (that work with older protocols as 651 # well): when __reduce__ returns a tuple with 4 or 5 items, 652 # the 4th and 5th item should be iterators that provide list 653 # items and dict items (as (key, value) tuples), or None. 654 655 if listitems is not None: 656 self._batch_appends(listitems) 657 658 if dictitems is not None: 659 self._batch_setitems(dictitems) 660 661 if state is not None: 662 save(state) 663 write(BUILD) 664 665 # Methods below this point are dispatched through the dispatch table 666 667 dispatch = {} 668 669 def save_none(self, obj): 670 self.write(NONE) 671 dispatch[type(None)] = save_none 672 673 def save_bool(self, obj): 674 if self.proto >= 2: 675 self.write(NEWTRUE if obj else NEWFALSE) 676 else: 677 self.write(TRUE if obj else FALSE) 678 dispatch[bool] = save_bool 679 680 def save_long(self, obj): 681 if self.bin: 682 # If the int is small enough to fit in a signed 4-byte 2's-comp 683 # format, we can store it more efficiently than the general 684 # case. 685 # First one- and two-byte unsigned ints: 686 if obj >= 0: 687 if obj <= 0xff: 688 self.write(BININT1 + pack("<B", obj)) 689 return 690 if obj <= 0xffff: 691 self.write(BININT2 + pack("<H", obj)) 692 return 693 # Next check for 4-byte signed ints: 694 if -0x80000000 <= obj <= 0x7fffffff: 695 self.write(BININT + pack("<i", obj)) 696 return 697 if self.proto >= 2: 698 encoded = encode_long(obj) 699 n = len(encoded) 700 if n < 256: 701 self.write(LONG1 + pack("<B", n) + encoded) 702 else: 703 self.write(LONG4 + pack("<i", n) + encoded) 704 return 705 if -0x80000000 <= obj <= 0x7fffffff: 706 self.write(INT + repr(obj).encode("ascii") + b'\n') 707 else: 708 self.write(LONG + repr(obj).encode("ascii") + b'L\n') 709 dispatch[int] = save_long 710 711 def save_float(self, obj): 712 if self.bin: 713 self.write(BINFLOAT + pack('>d', obj)) 714 else: 715 self.write(FLOAT + repr(obj).encode("ascii") + b'\n') 716 dispatch[float] = save_float 717 718 def save_bytes(self, obj): 719 if self.proto < 3: 720 if not obj: # bytes object is empty 721 self.save_reduce(bytes, (), obj=obj) 722 else: 723 self.save_reduce(codecs.encode, 724 (str(obj, 'latin1'), 'latin1'), obj=obj) 725 return 726 n = len(obj) 727 if n <= 0xff: 728 self.write(SHORT_BINBYTES + pack("<B", n) + obj) 729 elif n > 0xffffffff and self.proto >= 4: 730 self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj) 731 elif n >= self.framer._FRAME_SIZE_TARGET: 732 self._write_large_bytes(BINBYTES + pack("<I", n), obj) 733 else: 734 self.write(BINBYTES + pack("<I", n) + obj) 735 self.memoize(obj) 736 dispatch[bytes] = save_bytes 737 738 def save_str(self, obj): 739 if self.bin: 740 encoded = obj.encode('utf-8', 'surrogatepass') 741 n = len(encoded) 742 if n <= 0xff and self.proto >= 4: 743 self.write(SHORT_BINUNICODE + pack("<B", n) + encoded) 744 elif n > 0xffffffff and self.proto >= 4: 745 self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded) 746 elif n >= self.framer._FRAME_SIZE_TARGET: 747 self._write_large_bytes(BINUNICODE + pack("<I", n), encoded) 748 else: 749 self.write(BINUNICODE + pack("<I", n) + encoded) 750 else: 751 obj = obj.replace("\\", "\\u005c") 752 obj = obj.replace("\n", "\\u000a") 753 self.write(UNICODE + obj.encode('raw-unicode-escape') + 754 b'\n') 755 self.memoize(obj) 756 dispatch[str] = save_str 757 758 def save_tuple(self, obj): 759 if not obj: # tuple is empty 760 if self.bin: 761 self.write(EMPTY_TUPLE) 762 else: 763 self.write(MARK + TUPLE) 764 return 765 766 n = len(obj) 767 save = self.save 768 memo = self.memo 769 if n <= 3 and self.proto >= 2: 770 for element in obj: 771 save(element) 772 # Subtle. Same as in the big comment below. 773 if id(obj) in memo: 774 get = self.get(memo[id(obj)][0]) 775 self.write(POP * n + get) 776 else: 777 self.write(_tuplesize2code[n]) 778 self.memoize(obj) 779 return 780 781 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple 782 # has more than 3 elements. 783 write = self.write 784 write(MARK) 785 for element in obj: 786 save(element) 787 788 if id(obj) in memo: 789 # Subtle. d was not in memo when we entered save_tuple(), so 790 # the process of saving the tuple's elements must have saved 791 # the tuple itself: the tuple is recursive. The proper action 792 # now is to throw away everything we put on the stack, and 793 # simply GET the tuple (it's already constructed). This check 794 # could have been done in the "for element" loop instead, but 795 # recursive tuples are a rare thing. 796 get = self.get(memo[id(obj)][0]) 797 if self.bin: 798 write(POP_MARK + get) 799 else: # proto 0 -- POP_MARK not available 800 write(POP * (n+1) + get) 801 return 802 803 # No recursion. 804 write(TUPLE) 805 self.memoize(obj) 806 807 dispatch[tuple] = save_tuple 808 809 def save_list(self, obj): 810 if self.bin: 811 self.write(EMPTY_LIST) 812 else: # proto 0 -- can't use EMPTY_LIST 813 self.write(MARK + LIST) 814 815 self.memoize(obj) 816 self._batch_appends(obj) 817 818 dispatch[list] = save_list 819 820 _BATCHSIZE = 1000 821 822 def _batch_appends(self, items): 823 # Helper to batch up APPENDS sequences 824 save = self.save 825 write = self.write 826 827 if not self.bin: 828 for x in items: 829 save(x) 830 write(APPEND) 831 return 832 833 it = iter(items) 834 while True: 835 tmp = list(islice(it, self._BATCHSIZE)) 836 n = len(tmp) 837 if n > 1: 838 write(MARK) 839 for x in tmp: 840 save(x) 841 write(APPENDS) 842 elif n: 843 save(tmp[0]) 844 write(APPEND) 845 # else tmp is empty, and we're done 846 if n < self._BATCHSIZE: 847 return 848 849 def save_dict(self, obj): 850 if self.bin: 851 self.write(EMPTY_DICT) 852 else: # proto 0 -- can't use EMPTY_DICT 853 self.write(MARK + DICT) 854 855 self.memoize(obj) 856 self._batch_setitems(obj.items()) 857 858 dispatch[dict] = save_dict 859 if PyStringMap is not None: 860 dispatch[PyStringMap] = save_dict 861 862 def _batch_setitems(self, items): 863 # Helper to batch up SETITEMS sequences; proto >= 1 only 864 save = self.save 865 write = self.write 866 867 if not self.bin: 868 for k, v in items: 869 save(k) 870 save(v) 871 write(SETITEM) 872 return 873 874 it = iter(items) 875 while True: 876 tmp = list(islice(it, self._BATCHSIZE)) 877 n = len(tmp) 878 if n > 1: 879 write(MARK) 880 for k, v in tmp: 881 save(k) 882 save(v) 883 write(SETITEMS) 884 elif n: 885 k, v = tmp[0] 886 save(k) 887 save(v) 888 write(SETITEM) 889 # else tmp is empty, and we're done 890 if n < self._BATCHSIZE: 891 return 892 893 def save_set(self, obj): 894 save = self.save 895 write = self.write 896 897 if self.proto < 4: 898 self.save_reduce(set, (list(obj),), obj=obj) 899 return 900 901 write(EMPTY_SET) 902 self.memoize(obj) 903 904 it = iter(obj) 905 while True: 906 batch = list(islice(it, self._BATCHSIZE)) 907 n = len(batch) 908 if n > 0: 909 write(MARK) 910 for item in batch: 911 save(item) 912 write(ADDITEMS) 913 if n < self._BATCHSIZE: 914 return 915 dispatch[set] = save_set 916 917 def save_frozenset(self, obj): 918 save = self.save 919 write = self.write 920 921 if self.proto < 4: 922 self.save_reduce(frozenset, (list(obj),), obj=obj) 923 return 924 925 write(MARK) 926 for item in obj: 927 save(item) 928 929 if id(obj) in self.memo: 930 # If the object is already in the memo, this means it is 931 # recursive. In this case, throw away everything we put on the 932 # stack, and fetch the object back from the memo. 933 write(POP_MARK + self.get(self.memo[id(obj)][0])) 934 return 935 936 write(FROZENSET) 937 self.memoize(obj) 938 dispatch[frozenset] = save_frozenset 939 940 def save_global(self, obj, name=None): 941 write = self.write 942 memo = self.memo 943 944 if name is None: 945 name = getattr(obj, '__qualname__', None) 946 if name is None: 947 name = obj.__name__ 948 949 module_name = whichmodule(obj, name) 950 try: 951 __import__(module_name, level=0) 952 module = sys.modules[module_name] 953 obj2, parent = _getattribute(module, name) 954 except (ImportError, KeyError, AttributeError): 955 raise PicklingError( 956 "Can't pickle %r: it's not found as %s.%s" % 957 (obj, module_name, name)) from None 958 else: 959 if obj2 is not obj: 960 raise PicklingError( 961 "Can't pickle %r: it's not the same object as %s.%s" % 962 (obj, module_name, name)) 963 964 if self.proto >= 2: 965 code = _extension_registry.get((module_name, name)) 966 if code: 967 assert code > 0 968 if code <= 0xff: 969 write(EXT1 + pack("<B", code)) 970 elif code <= 0xffff: 971 write(EXT2 + pack("<H", code)) 972 else: 973 write(EXT4 + pack("<i", code)) 974 return 975 lastname = name.rpartition('.')[2] 976 if parent is module: 977 name = lastname 978 # Non-ASCII identifiers are supported only with protocols >= 3. 979 if self.proto >= 4: 980 self.save(module_name) 981 self.save(name) 982 write(STACK_GLOBAL) 983 elif parent is not module: 984 self.save_reduce(getattr, (parent, lastname)) 985 elif self.proto >= 3: 986 write(GLOBAL + bytes(module_name, "utf-8") + b'\n' + 987 bytes(name, "utf-8") + b'\n') 988 else: 989 if self.fix_imports: 990 r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING 991 r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING 992 if (module_name, name) in r_name_mapping: 993 module_name, name = r_name_mapping[(module_name, name)] 994 elif module_name in r_import_mapping: 995 module_name = r_import_mapping[module_name] 996 try: 997 write(GLOBAL + bytes(module_name, "ascii") + b'\n' + 998 bytes(name, "ascii") + b'\n') 999 except UnicodeEncodeError: 1000 raise PicklingError( 1001 "can't pickle global identifier '%s.%s' using " 1002 "pickle protocol %i" % (module, name, self.proto)) from None 1003 1004 self.memoize(obj) 1005 1006 def save_type(self, obj): 1007 if obj is type(None): 1008 return self.save_reduce(type, (None,), obj=obj) 1009 elif obj is type(NotImplemented): 1010 return self.save_reduce(type, (NotImplemented,), obj=obj) 1011 elif obj is type(...): 1012 return self.save_reduce(type, (...,), obj=obj) 1013 return self.save_global(obj) 1014 1015 dispatch[FunctionType] = save_global 1016 dispatch[type] = save_type 1017 1018 1019# Unpickling machinery 1020 1021class _Unpickler: 1022 1023 def __init__(self, file, *, fix_imports=True, 1024 encoding="ASCII", errors="strict"): 1025 """This takes a binary file for reading a pickle data stream. 1026 1027 The protocol version of the pickle is detected automatically, so 1028 no proto argument is needed. 1029 1030 The argument *file* must have two methods, a read() method that 1031 takes an integer argument, and a readline() method that requires 1032 no arguments. Both methods should return bytes. Thus *file* 1033 can be a binary file object opened for reading, an io.BytesIO 1034 object, or any other custom object that meets this interface. 1035 1036 The file-like object must have two methods, a read() method 1037 that takes an integer argument, and a readline() method that 1038 requires no arguments. Both methods should return bytes. 1039 Thus file-like object can be a binary file object opened for 1040 reading, a BytesIO object, or any other custom object that 1041 meets this interface. 1042 1043 Optional keyword arguments are *fix_imports*, *encoding* and 1044 *errors*, which are used to control compatibility support for 1045 pickle stream generated by Python 2. If *fix_imports* is True, 1046 pickle will try to map the old Python 2 names to the new names 1047 used in Python 3. The *encoding* and *errors* tell pickle how 1048 to decode 8-bit string instances pickled by Python 2; these 1049 default to 'ASCII' and 'strict', respectively. *encoding* can be 1050 'bytes' to read theses 8-bit string instances as bytes objects. 1051 """ 1052 self._file_readline = file.readline 1053 self._file_read = file.read 1054 self.memo = {} 1055 self.encoding = encoding 1056 self.errors = errors 1057 self.proto = 0 1058 self.fix_imports = fix_imports 1059 1060 def load(self): 1061 """Read a pickled object representation from the open file. 1062 1063 Return the reconstituted object hierarchy specified in the file. 1064 """ 1065 # Check whether Unpickler was initialized correctly. This is 1066 # only needed to mimic the behavior of _pickle.Unpickler.dump(). 1067 if not hasattr(self, "_file_read"): 1068 raise UnpicklingError("Unpickler.__init__() was not called by " 1069 "%s.__init__()" % (self.__class__.__name__,)) 1070 self._unframer = _Unframer(self._file_read, self._file_readline) 1071 self.read = self._unframer.read 1072 self.readline = self._unframer.readline 1073 self.metastack = [] 1074 self.stack = [] 1075 self.append = self.stack.append 1076 self.proto = 0 1077 read = self.read 1078 dispatch = self.dispatch 1079 try: 1080 while True: 1081 key = read(1) 1082 if not key: 1083 raise EOFError 1084 assert isinstance(key, bytes_types) 1085 dispatch[key[0]](self) 1086 except _Stop as stopinst: 1087 return stopinst.value 1088 1089 # Return a list of items pushed in the stack after last MARK instruction. 1090 def pop_mark(self): 1091 items = self.stack 1092 self.stack = self.metastack.pop() 1093 self.append = self.stack.append 1094 return items 1095 1096 def persistent_load(self, pid): 1097 raise UnpicklingError("unsupported persistent id encountered") 1098 1099 dispatch = {} 1100 1101 def load_proto(self): 1102 proto = self.read(1)[0] 1103 if not 0 <= proto <= HIGHEST_PROTOCOL: 1104 raise ValueError("unsupported pickle protocol: %d" % proto) 1105 self.proto = proto 1106 dispatch[PROTO[0]] = load_proto 1107 1108 def load_frame(self): 1109 frame_size, = unpack('<Q', self.read(8)) 1110 if frame_size > sys.maxsize: 1111 raise ValueError("frame size > sys.maxsize: %d" % frame_size) 1112 self._unframer.load_frame(frame_size) 1113 dispatch[FRAME[0]] = load_frame 1114 1115 def load_persid(self): 1116 try: 1117 pid = self.readline()[:-1].decode("ascii") 1118 except UnicodeDecodeError: 1119 raise UnpicklingError( 1120 "persistent IDs in protocol 0 must be ASCII strings") 1121 self.append(self.persistent_load(pid)) 1122 dispatch[PERSID[0]] = load_persid 1123 1124 def load_binpersid(self): 1125 pid = self.stack.pop() 1126 self.append(self.persistent_load(pid)) 1127 dispatch[BINPERSID[0]] = load_binpersid 1128 1129 def load_none(self): 1130 self.append(None) 1131 dispatch[NONE[0]] = load_none 1132 1133 def load_false(self): 1134 self.append(False) 1135 dispatch[NEWFALSE[0]] = load_false 1136 1137 def load_true(self): 1138 self.append(True) 1139 dispatch[NEWTRUE[0]] = load_true 1140 1141 def load_int(self): 1142 data = self.readline() 1143 if data == FALSE[1:]: 1144 val = False 1145 elif data == TRUE[1:]: 1146 val = True 1147 else: 1148 val = int(data, 0) 1149 self.append(val) 1150 dispatch[INT[0]] = load_int 1151 1152 def load_binint(self): 1153 self.append(unpack('<i', self.read(4))[0]) 1154 dispatch[BININT[0]] = load_binint 1155 1156 def load_binint1(self): 1157 self.append(self.read(1)[0]) 1158 dispatch[BININT1[0]] = load_binint1 1159 1160 def load_binint2(self): 1161 self.append(unpack('<H', self.read(2))[0]) 1162 dispatch[BININT2[0]] = load_binint2 1163 1164 def load_long(self): 1165 val = self.readline()[:-1] 1166 if val and val[-1] == b'L'[0]: 1167 val = val[:-1] 1168 self.append(int(val, 0)) 1169 dispatch[LONG[0]] = load_long 1170 1171 def load_long1(self): 1172 n = self.read(1)[0] 1173 data = self.read(n) 1174 self.append(decode_long(data)) 1175 dispatch[LONG1[0]] = load_long1 1176 1177 def load_long4(self): 1178 n, = unpack('<i', self.read(4)) 1179 if n < 0: 1180 # Corrupt or hostile pickle -- we never write one like this 1181 raise UnpicklingError("LONG pickle has negative byte count") 1182 data = self.read(n) 1183 self.append(decode_long(data)) 1184 dispatch[LONG4[0]] = load_long4 1185 1186 def load_float(self): 1187 self.append(float(self.readline()[:-1])) 1188 dispatch[FLOAT[0]] = load_float 1189 1190 def load_binfloat(self): 1191 self.append(unpack('>d', self.read(8))[0]) 1192 dispatch[BINFLOAT[0]] = load_binfloat 1193 1194 def _decode_string(self, value): 1195 # Used to allow strings from Python 2 to be decoded either as 1196 # bytes or Unicode strings. This should be used only with the 1197 # STRING, BINSTRING and SHORT_BINSTRING opcodes. 1198 if self.encoding == "bytes": 1199 return value 1200 else: 1201 return value.decode(self.encoding, self.errors) 1202 1203 def load_string(self): 1204 data = self.readline()[:-1] 1205 # Strip outermost quotes 1206 if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'': 1207 data = data[1:-1] 1208 else: 1209 raise UnpicklingError("the STRING opcode argument must be quoted") 1210 self.append(self._decode_string(codecs.escape_decode(data)[0])) 1211 dispatch[STRING[0]] = load_string 1212 1213 def load_binstring(self): 1214 # Deprecated BINSTRING uses signed 32-bit length 1215 len, = unpack('<i', self.read(4)) 1216 if len < 0: 1217 raise UnpicklingError("BINSTRING pickle has negative byte count") 1218 data = self.read(len) 1219 self.append(self._decode_string(data)) 1220 dispatch[BINSTRING[0]] = load_binstring 1221 1222 def load_binbytes(self): 1223 len, = unpack('<I', self.read(4)) 1224 if len > maxsize: 1225 raise UnpicklingError("BINBYTES exceeds system's maximum size " 1226 "of %d bytes" % maxsize) 1227 self.append(self.read(len)) 1228 dispatch[BINBYTES[0]] = load_binbytes 1229 1230 def load_unicode(self): 1231 self.append(str(self.readline()[:-1], 'raw-unicode-escape')) 1232 dispatch[UNICODE[0]] = load_unicode 1233 1234 def load_binunicode(self): 1235 len, = unpack('<I', self.read(4)) 1236 if len > maxsize: 1237 raise UnpicklingError("BINUNICODE exceeds system's maximum size " 1238 "of %d bytes" % maxsize) 1239 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1240 dispatch[BINUNICODE[0]] = load_binunicode 1241 1242 def load_binunicode8(self): 1243 len, = unpack('<Q', self.read(8)) 1244 if len > maxsize: 1245 raise UnpicklingError("BINUNICODE8 exceeds system's maximum size " 1246 "of %d bytes" % maxsize) 1247 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1248 dispatch[BINUNICODE8[0]] = load_binunicode8 1249 1250 def load_binbytes8(self): 1251 len, = unpack('<Q', self.read(8)) 1252 if len > maxsize: 1253 raise UnpicklingError("BINBYTES8 exceeds system's maximum size " 1254 "of %d bytes" % maxsize) 1255 self.append(self.read(len)) 1256 dispatch[BINBYTES8[0]] = load_binbytes8 1257 1258 def load_short_binstring(self): 1259 len = self.read(1)[0] 1260 data = self.read(len) 1261 self.append(self._decode_string(data)) 1262 dispatch[SHORT_BINSTRING[0]] = load_short_binstring 1263 1264 def load_short_binbytes(self): 1265 len = self.read(1)[0] 1266 self.append(self.read(len)) 1267 dispatch[SHORT_BINBYTES[0]] = load_short_binbytes 1268 1269 def load_short_binunicode(self): 1270 len = self.read(1)[0] 1271 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1272 dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode 1273 1274 def load_tuple(self): 1275 items = self.pop_mark() 1276 self.append(tuple(items)) 1277 dispatch[TUPLE[0]] = load_tuple 1278 1279 def load_empty_tuple(self): 1280 self.append(()) 1281 dispatch[EMPTY_TUPLE[0]] = load_empty_tuple 1282 1283 def load_tuple1(self): 1284 self.stack[-1] = (self.stack[-1],) 1285 dispatch[TUPLE1[0]] = load_tuple1 1286 1287 def load_tuple2(self): 1288 self.stack[-2:] = [(self.stack[-2], self.stack[-1])] 1289 dispatch[TUPLE2[0]] = load_tuple2 1290 1291 def load_tuple3(self): 1292 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])] 1293 dispatch[TUPLE3[0]] = load_tuple3 1294 1295 def load_empty_list(self): 1296 self.append([]) 1297 dispatch[EMPTY_LIST[0]] = load_empty_list 1298 1299 def load_empty_dictionary(self): 1300 self.append({}) 1301 dispatch[EMPTY_DICT[0]] = load_empty_dictionary 1302 1303 def load_empty_set(self): 1304 self.append(set()) 1305 dispatch[EMPTY_SET[0]] = load_empty_set 1306 1307 def load_frozenset(self): 1308 items = self.pop_mark() 1309 self.append(frozenset(items)) 1310 dispatch[FROZENSET[0]] = load_frozenset 1311 1312 def load_list(self): 1313 items = self.pop_mark() 1314 self.append(items) 1315 dispatch[LIST[0]] = load_list 1316 1317 def load_dict(self): 1318 items = self.pop_mark() 1319 d = {items[i]: items[i+1] 1320 for i in range(0, len(items), 2)} 1321 self.append(d) 1322 dispatch[DICT[0]] = load_dict 1323 1324 # INST and OBJ differ only in how they get a class object. It's not 1325 # only sensible to do the rest in a common routine, the two routines 1326 # previously diverged and grew different bugs. 1327 # klass is the class to instantiate, and k points to the topmost mark 1328 # object, following which are the arguments for klass.__init__. 1329 def _instantiate(self, klass, args): 1330 if (args or not isinstance(klass, type) or 1331 hasattr(klass, "__getinitargs__")): 1332 try: 1333 value = klass(*args) 1334 except TypeError as err: 1335 raise TypeError("in constructor for %s: %s" % 1336 (klass.__name__, str(err)), sys.exc_info()[2]) 1337 else: 1338 value = klass.__new__(klass) 1339 self.append(value) 1340 1341 def load_inst(self): 1342 module = self.readline()[:-1].decode("ascii") 1343 name = self.readline()[:-1].decode("ascii") 1344 klass = self.find_class(module, name) 1345 self._instantiate(klass, self.pop_mark()) 1346 dispatch[INST[0]] = load_inst 1347 1348 def load_obj(self): 1349 # Stack is ... markobject classobject arg1 arg2 ... 1350 args = self.pop_mark() 1351 cls = args.pop(0) 1352 self._instantiate(cls, args) 1353 dispatch[OBJ[0]] = load_obj 1354 1355 def load_newobj(self): 1356 args = self.stack.pop() 1357 cls = self.stack.pop() 1358 obj = cls.__new__(cls, *args) 1359 self.append(obj) 1360 dispatch[NEWOBJ[0]] = load_newobj 1361 1362 def load_newobj_ex(self): 1363 kwargs = self.stack.pop() 1364 args = self.stack.pop() 1365 cls = self.stack.pop() 1366 obj = cls.__new__(cls, *args, **kwargs) 1367 self.append(obj) 1368 dispatch[NEWOBJ_EX[0]] = load_newobj_ex 1369 1370 def load_global(self): 1371 module = self.readline()[:-1].decode("utf-8") 1372 name = self.readline()[:-1].decode("utf-8") 1373 klass = self.find_class(module, name) 1374 self.append(klass) 1375 dispatch[GLOBAL[0]] = load_global 1376 1377 def load_stack_global(self): 1378 name = self.stack.pop() 1379 module = self.stack.pop() 1380 if type(name) is not str or type(module) is not str: 1381 raise UnpicklingError("STACK_GLOBAL requires str") 1382 self.append(self.find_class(module, name)) 1383 dispatch[STACK_GLOBAL[0]] = load_stack_global 1384 1385 def load_ext1(self): 1386 code = self.read(1)[0] 1387 self.get_extension(code) 1388 dispatch[EXT1[0]] = load_ext1 1389 1390 def load_ext2(self): 1391 code, = unpack('<H', self.read(2)) 1392 self.get_extension(code) 1393 dispatch[EXT2[0]] = load_ext2 1394 1395 def load_ext4(self): 1396 code, = unpack('<i', self.read(4)) 1397 self.get_extension(code) 1398 dispatch[EXT4[0]] = load_ext4 1399 1400 def get_extension(self, code): 1401 nil = [] 1402 obj = _extension_cache.get(code, nil) 1403 if obj is not nil: 1404 self.append(obj) 1405 return 1406 key = _inverted_registry.get(code) 1407 if not key: 1408 if code <= 0: # note that 0 is forbidden 1409 # Corrupt or hostile pickle. 1410 raise UnpicklingError("EXT specifies code <= 0") 1411 raise ValueError("unregistered extension code %d" % code) 1412 obj = self.find_class(*key) 1413 _extension_cache[code] = obj 1414 self.append(obj) 1415 1416 def find_class(self, module, name): 1417 # Subclasses may override this. 1418 if self.proto < 3 and self.fix_imports: 1419 if (module, name) in _compat_pickle.NAME_MAPPING: 1420 module, name = _compat_pickle.NAME_MAPPING[(module, name)] 1421 elif module in _compat_pickle.IMPORT_MAPPING: 1422 module = _compat_pickle.IMPORT_MAPPING[module] 1423 __import__(module, level=0) 1424 if self.proto >= 4: 1425 return _getattribute(sys.modules[module], name)[0] 1426 else: 1427 return getattr(sys.modules[module], name) 1428 1429 def load_reduce(self): 1430 stack = self.stack 1431 args = stack.pop() 1432 func = stack[-1] 1433 stack[-1] = func(*args) 1434 dispatch[REDUCE[0]] = load_reduce 1435 1436 def load_pop(self): 1437 if self.stack: 1438 del self.stack[-1] 1439 else: 1440 self.pop_mark() 1441 dispatch[POP[0]] = load_pop 1442 1443 def load_pop_mark(self): 1444 self.pop_mark() 1445 dispatch[POP_MARK[0]] = load_pop_mark 1446 1447 def load_dup(self): 1448 self.append(self.stack[-1]) 1449 dispatch[DUP[0]] = load_dup 1450 1451 def load_get(self): 1452 i = int(self.readline()[:-1]) 1453 self.append(self.memo[i]) 1454 dispatch[GET[0]] = load_get 1455 1456 def load_binget(self): 1457 i = self.read(1)[0] 1458 self.append(self.memo[i]) 1459 dispatch[BINGET[0]] = load_binget 1460 1461 def load_long_binget(self): 1462 i, = unpack('<I', self.read(4)) 1463 self.append(self.memo[i]) 1464 dispatch[LONG_BINGET[0]] = load_long_binget 1465 1466 def load_put(self): 1467 i = int(self.readline()[:-1]) 1468 if i < 0: 1469 raise ValueError("negative PUT argument") 1470 self.memo[i] = self.stack[-1] 1471 dispatch[PUT[0]] = load_put 1472 1473 def load_binput(self): 1474 i = self.read(1)[0] 1475 if i < 0: 1476 raise ValueError("negative BINPUT argument") 1477 self.memo[i] = self.stack[-1] 1478 dispatch[BINPUT[0]] = load_binput 1479 1480 def load_long_binput(self): 1481 i, = unpack('<I', self.read(4)) 1482 if i > maxsize: 1483 raise ValueError("negative LONG_BINPUT argument") 1484 self.memo[i] = self.stack[-1] 1485 dispatch[LONG_BINPUT[0]] = load_long_binput 1486 1487 def load_memoize(self): 1488 memo = self.memo 1489 memo[len(memo)] = self.stack[-1] 1490 dispatch[MEMOIZE[0]] = load_memoize 1491 1492 def load_append(self): 1493 stack = self.stack 1494 value = stack.pop() 1495 list = stack[-1] 1496 list.append(value) 1497 dispatch[APPEND[0]] = load_append 1498 1499 def load_appends(self): 1500 items = self.pop_mark() 1501 list_obj = self.stack[-1] 1502 try: 1503 extend = list_obj.extend 1504 except AttributeError: 1505 pass 1506 else: 1507 extend(items) 1508 return 1509 # Even if the PEP 307 requires extend() and append() methods, 1510 # fall back on append() if the object has no extend() method 1511 # for backward compatibility. 1512 append = list_obj.append 1513 for item in items: 1514 append(item) 1515 dispatch[APPENDS[0]] = load_appends 1516 1517 def load_setitem(self): 1518 stack = self.stack 1519 value = stack.pop() 1520 key = stack.pop() 1521 dict = stack[-1] 1522 dict[key] = value 1523 dispatch[SETITEM[0]] = load_setitem 1524 1525 def load_setitems(self): 1526 items = self.pop_mark() 1527 dict = self.stack[-1] 1528 for i in range(0, len(items), 2): 1529 dict[items[i]] = items[i + 1] 1530 dispatch[SETITEMS[0]] = load_setitems 1531 1532 def load_additems(self): 1533 items = self.pop_mark() 1534 set_obj = self.stack[-1] 1535 if isinstance(set_obj, set): 1536 set_obj.update(items) 1537 else: 1538 add = set_obj.add 1539 for item in items: 1540 add(item) 1541 dispatch[ADDITEMS[0]] = load_additems 1542 1543 def load_build(self): 1544 stack = self.stack 1545 state = stack.pop() 1546 inst = stack[-1] 1547 setstate = getattr(inst, "__setstate__", None) 1548 if setstate is not None: 1549 setstate(state) 1550 return 1551 slotstate = None 1552 if isinstance(state, tuple) and len(state) == 2: 1553 state, slotstate = state 1554 if state: 1555 inst_dict = inst.__dict__ 1556 intern = sys.intern 1557 for k, v in state.items(): 1558 if type(k) is str: 1559 inst_dict[intern(k)] = v 1560 else: 1561 inst_dict[k] = v 1562 if slotstate: 1563 for k, v in slotstate.items(): 1564 setattr(inst, k, v) 1565 dispatch[BUILD[0]] = load_build 1566 1567 def load_mark(self): 1568 self.metastack.append(self.stack) 1569 self.stack = [] 1570 self.append = self.stack.append 1571 dispatch[MARK[0]] = load_mark 1572 1573 def load_stop(self): 1574 value = self.stack.pop() 1575 raise _Stop(value) 1576 dispatch[STOP[0]] = load_stop 1577 1578 1579# Shorthands 1580 1581def _dump(obj, file, protocol=None, *, fix_imports=True): 1582 _Pickler(file, protocol, fix_imports=fix_imports).dump(obj) 1583 1584def _dumps(obj, protocol=None, *, fix_imports=True): 1585 f = io.BytesIO() 1586 _Pickler(f, protocol, fix_imports=fix_imports).dump(obj) 1587 res = f.getvalue() 1588 assert isinstance(res, bytes_types) 1589 return res 1590 1591def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict"): 1592 return _Unpickler(file, fix_imports=fix_imports, 1593 encoding=encoding, errors=errors).load() 1594 1595def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"): 1596 if isinstance(s, str): 1597 raise TypeError("Can't load pickle from unicode string") 1598 file = io.BytesIO(s) 1599 return _Unpickler(file, fix_imports=fix_imports, 1600 encoding=encoding, errors=errors).load() 1601 1602# Use the faster _pickle if possible 1603try: 1604 from _pickle import ( 1605 PickleError, 1606 PicklingError, 1607 UnpicklingError, 1608 Pickler, 1609 Unpickler, 1610 dump, 1611 dumps, 1612 load, 1613 loads 1614 ) 1615except ImportError: 1616 Pickler, Unpickler = _Pickler, _Unpickler 1617 dump, dumps, load, loads = _dump, _dumps, _load, _loads 1618 1619# Doctest 1620def _test(): 1621 import doctest 1622 return doctest.testmod() 1623 1624if __name__ == "__main__": 1625 import argparse 1626 parser = argparse.ArgumentParser( 1627 description='display contents of the pickle files') 1628 parser.add_argument( 1629 'pickle_file', type=argparse.FileType('br'), 1630 nargs='*', help='the pickle file') 1631 parser.add_argument( 1632 '-t', '--test', action='store_true', 1633 help='run self-test suite') 1634 parser.add_argument( 1635 '-v', action='store_true', 1636 help='run verbosely; only affects self-test run') 1637 args = parser.parse_args() 1638 if args.test: 1639 _test() 1640 else: 1641 if not args.pickle_file: 1642 parser.print_help() 1643 else: 1644 import pprint 1645 for f in args.pickle_file: 1646 obj = load(f) 1647 pprint.pprint(obj) 1648