1"""Create portable serialized representations of Python objects. 2 3See module copyreg for a mechanism for registering custom picklers. 4See module pickletools source for extensive comments. 5 6Classes: 7 8 Pickler 9 Unpickler 10 11Functions: 12 13 dump(object, file) 14 dumps(object) -> string 15 load(file) -> object 16 loads(string) -> object 17 18Misc variables: 19 20 __version__ 21 format_version 22 compatible_formats 23 24""" 25 26from types import FunctionType 27from copyreg import dispatch_table 28from copyreg import _extension_registry, _inverted_registry, _extension_cache 29from itertools import islice 30from functools import partial 31import sys 32from sys import maxsize 33from struct import pack, unpack 34import re 35import io 36import codecs 37import _compat_pickle 38 39__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", 40 "Unpickler", "dump", "dumps", "load", "loads"] 41 42try: 43 from _pickle import PickleBuffer 44 __all__.append("PickleBuffer") 45 _HAVE_PICKLE_BUFFER = True 46except ImportError: 47 _HAVE_PICKLE_BUFFER = False 48 49 50# Shortcut for use in isinstance testing 51bytes_types = (bytes, bytearray) 52 53# These are purely informational; no code uses these. 54format_version = "4.0" # File format version we write 55compatible_formats = ["1.0", # Original protocol 0 56 "1.1", # Protocol 0 with INST added 57 "1.2", # Original protocol 1 58 "1.3", # Protocol 1 with BINFLOAT added 59 "2.0", # Protocol 2 60 "3.0", # Protocol 3 61 "4.0", # Protocol 4 62 "5.0", # Protocol 5 63 ] # Old format versions we can read 64 65# This is the highest protocol number we know how to read. 66HIGHEST_PROTOCOL = 5 67 68# The protocol we write by default. May be less than HIGHEST_PROTOCOL. 69# Only bump this if the oldest still supported version of Python already 70# includes it. 71DEFAULT_PROTOCOL = 4 72 73class PickleError(Exception): 74 """A common base class for the other pickling exceptions.""" 75 pass 76 77class PicklingError(PickleError): 78 """This exception is raised when an unpicklable object is passed to the 79 dump() method. 80 81 """ 82 pass 83 84class UnpicklingError(PickleError): 85 """This exception is raised when there is a problem unpickling an object, 86 such as a security violation. 87 88 Note that other exceptions may also be raised during unpickling, including 89 (but not necessarily limited to) AttributeError, EOFError, ImportError, 90 and IndexError. 91 92 """ 93 pass 94 95# An instance of _Stop is raised by Unpickler.load_stop() in response to 96# the STOP opcode, passing the object that is the result of unpickling. 97class _Stop(Exception): 98 def __init__(self, value): 99 self.value = value 100 101# Jython has PyStringMap; it's a dict subclass with string keys 102try: 103 from org.python.core import PyStringMap 104except ImportError: 105 PyStringMap = None 106 107# Pickle opcodes. See pickletools.py for extensive docs. The listing 108# here is in kind-of alphabetical order of 1-character pickle code. 109# pickletools groups them by purpose. 110 111MARK = b'(' # push special markobject on stack 112STOP = b'.' # every pickle ends with STOP 113POP = b'0' # discard topmost stack item 114POP_MARK = b'1' # discard stack top through topmost markobject 115DUP = b'2' # duplicate top stack item 116FLOAT = b'F' # push float object; decimal string argument 117INT = b'I' # push integer or bool; decimal string argument 118BININT = b'J' # push four-byte signed int 119BININT1 = b'K' # push 1-byte unsigned int 120LONG = b'L' # push long; decimal string argument 121BININT2 = b'M' # push 2-byte unsigned int 122NONE = b'N' # push None 123PERSID = b'P' # push persistent object; id is taken from string arg 124BINPERSID = b'Q' # " " " ; " " " " stack 125REDUCE = b'R' # apply callable to argtuple, both on stack 126STRING = b'S' # push string; NL-terminated string argument 127BINSTRING = b'T' # push string; counted binary string argument 128SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes 129UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument 130BINUNICODE = b'X' # " " " ; counted UTF-8 string argument 131APPEND = b'a' # append stack top to list below it 132BUILD = b'b' # call __setstate__ or __dict__.update() 133GLOBAL = b'c' # push self.find_class(modname, name); 2 string args 134DICT = b'd' # build a dict from stack items 135EMPTY_DICT = b'}' # push empty dict 136APPENDS = b'e' # extend list on stack by topmost stack slice 137GET = b'g' # push item from memo on stack; index is string arg 138BINGET = b'h' # " " " " " " ; " " 1-byte arg 139INST = b'i' # build & push class instance 140LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg 141LIST = b'l' # build list from topmost stack items 142EMPTY_LIST = b']' # push empty list 143OBJ = b'o' # build & push class instance 144PUT = b'p' # store stack top in memo; index is string arg 145BINPUT = b'q' # " " " " " ; " " 1-byte arg 146LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg 147SETITEM = b's' # add key+value pair to dict 148TUPLE = b't' # build tuple from topmost stack items 149EMPTY_TUPLE = b')' # push empty tuple 150SETITEMS = b'u' # modify dict by adding topmost key+value pairs 151BINFLOAT = b'G' # push float; arg is 8-byte float encoding 152 153TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py 154FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py 155 156# Protocol 2 157 158PROTO = b'\x80' # identify pickle protocol 159NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple 160EXT1 = b'\x82' # push object from extension registry; 1-byte index 161EXT2 = b'\x83' # ditto, but 2-byte index 162EXT4 = b'\x84' # ditto, but 4-byte index 163TUPLE1 = b'\x85' # build 1-tuple from stack top 164TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items 165TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items 166NEWTRUE = b'\x88' # push True 167NEWFALSE = b'\x89' # push False 168LONG1 = b'\x8a' # push long from < 256 bytes 169LONG4 = b'\x8b' # push really big long 170 171_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3] 172 173# Protocol 3 (Python 3.x) 174 175BINBYTES = b'B' # push bytes; counted binary string argument 176SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes 177 178# Protocol 4 179 180SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes 181BINUNICODE8 = b'\x8d' # push very long string 182BINBYTES8 = b'\x8e' # push very long bytes string 183EMPTY_SET = b'\x8f' # push empty set on the stack 184ADDITEMS = b'\x90' # modify set by adding topmost stack items 185FROZENSET = b'\x91' # build frozenset from topmost stack items 186NEWOBJ_EX = b'\x92' # like NEWOBJ but work with keyword only arguments 187STACK_GLOBAL = b'\x93' # same as GLOBAL but using names on the stacks 188MEMOIZE = b'\x94' # store top of the stack in memo 189FRAME = b'\x95' # indicate the beginning of a new frame 190 191# Protocol 5 192 193BYTEARRAY8 = b'\x96' # push bytearray 194NEXT_BUFFER = b'\x97' # push next out-of-band buffer 195READONLY_BUFFER = b'\x98' # make top of stack readonly 196 197__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)]) 198 199 200class _Framer: 201 202 _FRAME_SIZE_MIN = 4 203 _FRAME_SIZE_TARGET = 64 * 1024 204 205 def __init__(self, file_write): 206 self.file_write = file_write 207 self.current_frame = None 208 209 def start_framing(self): 210 self.current_frame = io.BytesIO() 211 212 def end_framing(self): 213 if self.current_frame and self.current_frame.tell() > 0: 214 self.commit_frame(force=True) 215 self.current_frame = None 216 217 def commit_frame(self, force=False): 218 if self.current_frame: 219 f = self.current_frame 220 if f.tell() >= self._FRAME_SIZE_TARGET or force: 221 data = f.getbuffer() 222 write = self.file_write 223 if len(data) >= self._FRAME_SIZE_MIN: 224 # Issue a single call to the write method of the underlying 225 # file object for the frame opcode with the size of the 226 # frame. The concatenation is expected to be less expensive 227 # than issuing an additional call to write. 228 write(FRAME + pack("<Q", len(data))) 229 230 # Issue a separate call to write to append the frame 231 # contents without concatenation to the above to avoid a 232 # memory copy. 233 write(data) 234 235 # Start the new frame with a new io.BytesIO instance so that 236 # the file object can have delayed access to the previous frame 237 # contents via an unreleased memoryview of the previous 238 # io.BytesIO instance. 239 self.current_frame = io.BytesIO() 240 241 def write(self, data): 242 if self.current_frame: 243 return self.current_frame.write(data) 244 else: 245 return self.file_write(data) 246 247 def write_large_bytes(self, header, payload): 248 write = self.file_write 249 if self.current_frame: 250 # Terminate the current frame and flush it to the file. 251 self.commit_frame(force=True) 252 253 # Perform direct write of the header and payload of the large binary 254 # object. Be careful not to concatenate the header and the payload 255 # prior to calling 'write' as we do not want to allocate a large 256 # temporary bytes object. 257 # We intentionally do not insert a protocol 4 frame opcode to make 258 # it possible to optimize file.read calls in the loader. 259 write(header) 260 write(payload) 261 262 263class _Unframer: 264 265 def __init__(self, file_read, file_readline, file_tell=None): 266 self.file_read = file_read 267 self.file_readline = file_readline 268 self.current_frame = None 269 270 def readinto(self, buf): 271 if self.current_frame: 272 n = self.current_frame.readinto(buf) 273 if n == 0 and len(buf) != 0: 274 self.current_frame = None 275 n = len(buf) 276 buf[:] = self.file_read(n) 277 return n 278 if n < len(buf): 279 raise UnpicklingError( 280 "pickle exhausted before end of frame") 281 return n 282 else: 283 n = len(buf) 284 buf[:] = self.file_read(n) 285 return n 286 287 def read(self, n): 288 if self.current_frame: 289 data = self.current_frame.read(n) 290 if not data and n != 0: 291 self.current_frame = None 292 return self.file_read(n) 293 if len(data) < n: 294 raise UnpicklingError( 295 "pickle exhausted before end of frame") 296 return data 297 else: 298 return self.file_read(n) 299 300 def readline(self): 301 if self.current_frame: 302 data = self.current_frame.readline() 303 if not data: 304 self.current_frame = None 305 return self.file_readline() 306 if data[-1] != b'\n'[0]: 307 raise UnpicklingError( 308 "pickle exhausted before end of frame") 309 return data 310 else: 311 return self.file_readline() 312 313 def load_frame(self, frame_size): 314 if self.current_frame and self.current_frame.read() != b'': 315 raise UnpicklingError( 316 "beginning of a new frame before end of current frame") 317 self.current_frame = io.BytesIO(self.file_read(frame_size)) 318 319 320# Tools used for pickling. 321 322def _getattribute(obj, name): 323 for subpath in name.split('.'): 324 if subpath == '<locals>': 325 raise AttributeError("Can't get local attribute {!r} on {!r}" 326 .format(name, obj)) 327 try: 328 parent = obj 329 obj = getattr(obj, subpath) 330 except AttributeError: 331 raise AttributeError("Can't get attribute {!r} on {!r}" 332 .format(name, obj)) from None 333 return obj, parent 334 335def whichmodule(obj, name): 336 """Find the module an object belong to.""" 337 module_name = getattr(obj, '__module__', None) 338 if module_name is not None: 339 return module_name 340 # Protect the iteration by using a list copy of sys.modules against dynamic 341 # modules that trigger imports of other modules upon calls to getattr. 342 for module_name, module in sys.modules.copy().items(): 343 if module_name == '__main__' or module is None: 344 continue 345 try: 346 if _getattribute(module, name)[0] is obj: 347 return module_name 348 except AttributeError: 349 pass 350 return '__main__' 351 352def encode_long(x): 353 r"""Encode a long to a two's complement little-endian binary string. 354 Note that 0 is a special case, returning an empty string, to save a 355 byte in the LONG1 pickling context. 356 357 >>> encode_long(0) 358 b'' 359 >>> encode_long(255) 360 b'\xff\x00' 361 >>> encode_long(32767) 362 b'\xff\x7f' 363 >>> encode_long(-256) 364 b'\x00\xff' 365 >>> encode_long(-32768) 366 b'\x00\x80' 367 >>> encode_long(-128) 368 b'\x80' 369 >>> encode_long(127) 370 b'\x7f' 371 >>> 372 """ 373 if x == 0: 374 return b'' 375 nbytes = (x.bit_length() >> 3) + 1 376 result = x.to_bytes(nbytes, byteorder='little', signed=True) 377 if x < 0 and nbytes > 1: 378 if result[-1] == 0xff and (result[-2] & 0x80) != 0: 379 result = result[:-1] 380 return result 381 382def decode_long(data): 383 r"""Decode a long from a two's complement little-endian binary string. 384 385 >>> decode_long(b'') 386 0 387 >>> decode_long(b"\xff\x00") 388 255 389 >>> decode_long(b"\xff\x7f") 390 32767 391 >>> decode_long(b"\x00\xff") 392 -256 393 >>> decode_long(b"\x00\x80") 394 -32768 395 >>> decode_long(b"\x80") 396 -128 397 >>> decode_long(b"\x7f") 398 127 399 """ 400 return int.from_bytes(data, byteorder='little', signed=True) 401 402 403# Pickling machinery 404 405class _Pickler: 406 407 def __init__(self, file, protocol=None, *, fix_imports=True, 408 buffer_callback=None): 409 """This takes a binary file for writing a pickle data stream. 410 411 The optional *protocol* argument tells the pickler to use the 412 given protocol; supported protocols are 0, 1, 2, 3, 4 and 5. 413 The default protocol is 4. It was introduced in Python 3.4, and 414 is incompatible with previous versions. 415 416 Specifying a negative protocol version selects the highest 417 protocol version supported. The higher the protocol used, the 418 more recent the version of Python needed to read the pickle 419 produced. 420 421 The *file* argument must have a write() method that accepts a 422 single bytes argument. It can thus be a file object opened for 423 binary writing, an io.BytesIO instance, or any other custom 424 object that meets this interface. 425 426 If *fix_imports* is True and *protocol* is less than 3, pickle 427 will try to map the new Python 3 names to the old module names 428 used in Python 2, so that the pickle data stream is readable 429 with Python 2. 430 431 If *buffer_callback* is None (the default), buffer views are 432 serialized into *file* as part of the pickle stream. 433 434 If *buffer_callback* is not None, then it can be called any number 435 of times with a buffer view. If the callback returns a false value 436 (such as None), the given buffer is out-of-band; otherwise the 437 buffer is serialized in-band, i.e. inside the pickle stream. 438 439 It is an error if *buffer_callback* is not None and *protocol* 440 is None or smaller than 5. 441 """ 442 if protocol is None: 443 protocol = DEFAULT_PROTOCOL 444 if protocol < 0: 445 protocol = HIGHEST_PROTOCOL 446 elif not 0 <= protocol <= HIGHEST_PROTOCOL: 447 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL) 448 if buffer_callback is not None and protocol < 5: 449 raise ValueError("buffer_callback needs protocol >= 5") 450 self._buffer_callback = buffer_callback 451 try: 452 self._file_write = file.write 453 except AttributeError: 454 raise TypeError("file must have a 'write' attribute") 455 self.framer = _Framer(self._file_write) 456 self.write = self.framer.write 457 self._write_large_bytes = self.framer.write_large_bytes 458 self.memo = {} 459 self.proto = int(protocol) 460 self.bin = protocol >= 1 461 self.fast = 0 462 self.fix_imports = fix_imports and protocol < 3 463 464 def clear_memo(self): 465 """Clears the pickler's "memo". 466 467 The memo is the data structure that remembers which objects the 468 pickler has already seen, so that shared or recursive objects 469 are pickled by reference and not by value. This method is 470 useful when re-using picklers. 471 """ 472 self.memo.clear() 473 474 def dump(self, obj): 475 """Write a pickled representation of obj to the open file.""" 476 # Check whether Pickler was initialized correctly. This is 477 # only needed to mimic the behavior of _pickle.Pickler.dump(). 478 if not hasattr(self, "_file_write"): 479 raise PicklingError("Pickler.__init__() was not called by " 480 "%s.__init__()" % (self.__class__.__name__,)) 481 if self.proto >= 2: 482 self.write(PROTO + pack("<B", self.proto)) 483 if self.proto >= 4: 484 self.framer.start_framing() 485 self.save(obj) 486 self.write(STOP) 487 self.framer.end_framing() 488 489 def memoize(self, obj): 490 """Store an object in the memo.""" 491 492 # The Pickler memo is a dictionary mapping object ids to 2-tuples 493 # that contain the Unpickler memo key and the object being memoized. 494 # The memo key is written to the pickle and will become 495 # the key in the Unpickler's memo. The object is stored in the 496 # Pickler memo so that transient objects are kept alive during 497 # pickling. 498 499 # The use of the Unpickler memo length as the memo key is just a 500 # convention. The only requirement is that the memo values be unique. 501 # But there appears no advantage to any other scheme, and this 502 # scheme allows the Unpickler memo to be implemented as a plain (but 503 # growable) array, indexed by memo key. 504 if self.fast: 505 return 506 assert id(obj) not in self.memo 507 idx = len(self.memo) 508 self.write(self.put(idx)) 509 self.memo[id(obj)] = idx, obj 510 511 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i. 512 def put(self, idx): 513 if self.proto >= 4: 514 return MEMOIZE 515 elif self.bin: 516 if idx < 256: 517 return BINPUT + pack("<B", idx) 518 else: 519 return LONG_BINPUT + pack("<I", idx) 520 else: 521 return PUT + repr(idx).encode("ascii") + b'\n' 522 523 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i. 524 def get(self, i): 525 if self.bin: 526 if i < 256: 527 return BINGET + pack("<B", i) 528 else: 529 return LONG_BINGET + pack("<I", i) 530 531 return GET + repr(i).encode("ascii") + b'\n' 532 533 def save(self, obj, save_persistent_id=True): 534 self.framer.commit_frame() 535 536 # Check for persistent id (defined by a subclass) 537 pid = self.persistent_id(obj) 538 if pid is not None and save_persistent_id: 539 self.save_pers(pid) 540 return 541 542 # Check the memo 543 x = self.memo.get(id(obj)) 544 if x is not None: 545 self.write(self.get(x[0])) 546 return 547 548 rv = NotImplemented 549 reduce = getattr(self, "reducer_override", None) 550 if reduce is not None: 551 rv = reduce(obj) 552 553 if rv is NotImplemented: 554 # Check the type dispatch table 555 t = type(obj) 556 f = self.dispatch.get(t) 557 if f is not None: 558 f(self, obj) # Call unbound method with explicit self 559 return 560 561 # Check private dispatch table if any, or else 562 # copyreg.dispatch_table 563 reduce = getattr(self, 'dispatch_table', dispatch_table).get(t) 564 if reduce is not None: 565 rv = reduce(obj) 566 else: 567 # Check for a class with a custom metaclass; treat as regular 568 # class 569 if issubclass(t, type): 570 self.save_global(obj) 571 return 572 573 # Check for a __reduce_ex__ method, fall back to __reduce__ 574 reduce = getattr(obj, "__reduce_ex__", None) 575 if reduce is not None: 576 rv = reduce(self.proto) 577 else: 578 reduce = getattr(obj, "__reduce__", None) 579 if reduce is not None: 580 rv = reduce() 581 else: 582 raise PicklingError("Can't pickle %r object: %r" % 583 (t.__name__, obj)) 584 585 # Check for string returned by reduce(), meaning "save as global" 586 if isinstance(rv, str): 587 self.save_global(obj, rv) 588 return 589 590 # Assert that reduce() returned a tuple 591 if not isinstance(rv, tuple): 592 raise PicklingError("%s must return string or tuple" % reduce) 593 594 # Assert that it returned an appropriately sized tuple 595 l = len(rv) 596 if not (2 <= l <= 6): 597 raise PicklingError("Tuple returned by %s must have " 598 "two to six elements" % reduce) 599 600 # Save the reduce() output and finally memoize the object 601 self.save_reduce(obj=obj, *rv) 602 603 def persistent_id(self, obj): 604 # This exists so a subclass can override it 605 return None 606 607 def save_pers(self, pid): 608 # Save a persistent id reference 609 if self.bin: 610 self.save(pid, save_persistent_id=False) 611 self.write(BINPERSID) 612 else: 613 try: 614 self.write(PERSID + str(pid).encode("ascii") + b'\n') 615 except UnicodeEncodeError: 616 raise PicklingError( 617 "persistent IDs in protocol 0 must be ASCII strings") 618 619 def save_reduce(self, func, args, state=None, listitems=None, 620 dictitems=None, state_setter=None, obj=None): 621 # This API is called by some subclasses 622 623 if not isinstance(args, tuple): 624 raise PicklingError("args from save_reduce() must be a tuple") 625 if not callable(func): 626 raise PicklingError("func from save_reduce() must be callable") 627 628 save = self.save 629 write = self.write 630 631 func_name = getattr(func, "__name__", "") 632 if self.proto >= 2 and func_name == "__newobj_ex__": 633 cls, args, kwargs = args 634 if not hasattr(cls, "__new__"): 635 raise PicklingError("args[0] from {} args has no __new__" 636 .format(func_name)) 637 if obj is not None and cls is not obj.__class__: 638 raise PicklingError("args[0] from {} args has the wrong class" 639 .format(func_name)) 640 if self.proto >= 4: 641 save(cls) 642 save(args) 643 save(kwargs) 644 write(NEWOBJ_EX) 645 else: 646 func = partial(cls.__new__, cls, *args, **kwargs) 647 save(func) 648 save(()) 649 write(REDUCE) 650 elif self.proto >= 2 and func_name == "__newobj__": 651 # A __reduce__ implementation can direct protocol 2 or newer to 652 # use the more efficient NEWOBJ opcode, while still 653 # allowing protocol 0 and 1 to work normally. For this to 654 # work, the function returned by __reduce__ should be 655 # called __newobj__, and its first argument should be a 656 # class. The implementation for __newobj__ 657 # should be as follows, although pickle has no way to 658 # verify this: 659 # 660 # def __newobj__(cls, *args): 661 # return cls.__new__(cls, *args) 662 # 663 # Protocols 0 and 1 will pickle a reference to __newobj__, 664 # while protocol 2 (and above) will pickle a reference to 665 # cls, the remaining args tuple, and the NEWOBJ code, 666 # which calls cls.__new__(cls, *args) at unpickling time 667 # (see load_newobj below). If __reduce__ returns a 668 # three-tuple, the state from the third tuple item will be 669 # pickled regardless of the protocol, calling __setstate__ 670 # at unpickling time (see load_build below). 671 # 672 # Note that no standard __newobj__ implementation exists; 673 # you have to provide your own. This is to enforce 674 # compatibility with Python 2.2 (pickles written using 675 # protocol 0 or 1 in Python 2.3 should be unpicklable by 676 # Python 2.2). 677 cls = args[0] 678 if not hasattr(cls, "__new__"): 679 raise PicklingError( 680 "args[0] from __newobj__ args has no __new__") 681 if obj is not None and cls is not obj.__class__: 682 raise PicklingError( 683 "args[0] from __newobj__ args has the wrong class") 684 args = args[1:] 685 save(cls) 686 save(args) 687 write(NEWOBJ) 688 else: 689 save(func) 690 save(args) 691 write(REDUCE) 692 693 if obj is not None: 694 # If the object is already in the memo, this means it is 695 # recursive. In this case, throw away everything we put on the 696 # stack, and fetch the object back from the memo. 697 if id(obj) in self.memo: 698 write(POP + self.get(self.memo[id(obj)][0])) 699 else: 700 self.memoize(obj) 701 702 # More new special cases (that work with older protocols as 703 # well): when __reduce__ returns a tuple with 4 or 5 items, 704 # the 4th and 5th item should be iterators that provide list 705 # items and dict items (as (key, value) tuples), or None. 706 707 if listitems is not None: 708 self._batch_appends(listitems) 709 710 if dictitems is not None: 711 self._batch_setitems(dictitems) 712 713 if state is not None: 714 if state_setter is None: 715 save(state) 716 write(BUILD) 717 else: 718 # If a state_setter is specified, call it instead of load_build 719 # to update obj's with its previous state. 720 # First, push state_setter and its tuple of expected arguments 721 # (obj, state) onto the stack. 722 save(state_setter) 723 save(obj) # simple BINGET opcode as obj is already memoized. 724 save(state) 725 write(TUPLE2) 726 # Trigger a state_setter(obj, state) function call. 727 write(REDUCE) 728 # The purpose of state_setter is to carry-out an 729 # inplace modification of obj. We do not care about what the 730 # method might return, so its output is eventually removed from 731 # the stack. 732 write(POP) 733 734 # Methods below this point are dispatched through the dispatch table 735 736 dispatch = {} 737 738 def save_none(self, obj): 739 self.write(NONE) 740 dispatch[type(None)] = save_none 741 742 def save_bool(self, obj): 743 if self.proto >= 2: 744 self.write(NEWTRUE if obj else NEWFALSE) 745 else: 746 self.write(TRUE if obj else FALSE) 747 dispatch[bool] = save_bool 748 749 def save_long(self, obj): 750 if self.bin: 751 # If the int is small enough to fit in a signed 4-byte 2's-comp 752 # format, we can store it more efficiently than the general 753 # case. 754 # First one- and two-byte unsigned ints: 755 if obj >= 0: 756 if obj <= 0xff: 757 self.write(BININT1 + pack("<B", obj)) 758 return 759 if obj <= 0xffff: 760 self.write(BININT2 + pack("<H", obj)) 761 return 762 # Next check for 4-byte signed ints: 763 if -0x80000000 <= obj <= 0x7fffffff: 764 self.write(BININT + pack("<i", obj)) 765 return 766 if self.proto >= 2: 767 encoded = encode_long(obj) 768 n = len(encoded) 769 if n < 256: 770 self.write(LONG1 + pack("<B", n) + encoded) 771 else: 772 self.write(LONG4 + pack("<i", n) + encoded) 773 return 774 if -0x80000000 <= obj <= 0x7fffffff: 775 self.write(INT + repr(obj).encode("ascii") + b'\n') 776 else: 777 self.write(LONG + repr(obj).encode("ascii") + b'L\n') 778 dispatch[int] = save_long 779 780 def save_float(self, obj): 781 if self.bin: 782 self.write(BINFLOAT + pack('>d', obj)) 783 else: 784 self.write(FLOAT + repr(obj).encode("ascii") + b'\n') 785 dispatch[float] = save_float 786 787 def save_bytes(self, obj): 788 if self.proto < 3: 789 if not obj: # bytes object is empty 790 self.save_reduce(bytes, (), obj=obj) 791 else: 792 self.save_reduce(codecs.encode, 793 (str(obj, 'latin1'), 'latin1'), obj=obj) 794 return 795 n = len(obj) 796 if n <= 0xff: 797 self.write(SHORT_BINBYTES + pack("<B", n) + obj) 798 elif n > 0xffffffff and self.proto >= 4: 799 self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj) 800 elif n >= self.framer._FRAME_SIZE_TARGET: 801 self._write_large_bytes(BINBYTES + pack("<I", n), obj) 802 else: 803 self.write(BINBYTES + pack("<I", n) + obj) 804 self.memoize(obj) 805 dispatch[bytes] = save_bytes 806 807 def save_bytearray(self, obj): 808 if self.proto < 5: 809 if not obj: # bytearray is empty 810 self.save_reduce(bytearray, (), obj=obj) 811 else: 812 self.save_reduce(bytearray, (bytes(obj),), obj=obj) 813 return 814 n = len(obj) 815 if n >= self.framer._FRAME_SIZE_TARGET: 816 self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj) 817 else: 818 self.write(BYTEARRAY8 + pack("<Q", n) + obj) 819 dispatch[bytearray] = save_bytearray 820 821 if _HAVE_PICKLE_BUFFER: 822 def save_picklebuffer(self, obj): 823 if self.proto < 5: 824 raise PicklingError("PickleBuffer can only pickled with " 825 "protocol >= 5") 826 with obj.raw() as m: 827 if not m.contiguous: 828 raise PicklingError("PickleBuffer can not be pickled when " 829 "pointing to a non-contiguous buffer") 830 in_band = True 831 if self._buffer_callback is not None: 832 in_band = bool(self._buffer_callback(obj)) 833 if in_band: 834 # Write data in-band 835 # XXX The C implementation avoids a copy here 836 if m.readonly: 837 self.save_bytes(m.tobytes()) 838 else: 839 self.save_bytearray(m.tobytes()) 840 else: 841 # Write data out-of-band 842 self.write(NEXT_BUFFER) 843 if m.readonly: 844 self.write(READONLY_BUFFER) 845 846 dispatch[PickleBuffer] = save_picklebuffer 847 848 def save_str(self, obj): 849 if self.bin: 850 encoded = obj.encode('utf-8', 'surrogatepass') 851 n = len(encoded) 852 if n <= 0xff and self.proto >= 4: 853 self.write(SHORT_BINUNICODE + pack("<B", n) + encoded) 854 elif n > 0xffffffff and self.proto >= 4: 855 self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded) 856 elif n >= self.framer._FRAME_SIZE_TARGET: 857 self._write_large_bytes(BINUNICODE + pack("<I", n), encoded) 858 else: 859 self.write(BINUNICODE + pack("<I", n) + encoded) 860 else: 861 obj = obj.replace("\\", "\\u005c") 862 obj = obj.replace("\0", "\\u0000") 863 obj = obj.replace("\n", "\\u000a") 864 obj = obj.replace("\r", "\\u000d") 865 obj = obj.replace("\x1a", "\\u001a") # EOF on DOS 866 self.write(UNICODE + obj.encode('raw-unicode-escape') + 867 b'\n') 868 self.memoize(obj) 869 dispatch[str] = save_str 870 871 def save_tuple(self, obj): 872 if not obj: # tuple is empty 873 if self.bin: 874 self.write(EMPTY_TUPLE) 875 else: 876 self.write(MARK + TUPLE) 877 return 878 879 n = len(obj) 880 save = self.save 881 memo = self.memo 882 if n <= 3 and self.proto >= 2: 883 for element in obj: 884 save(element) 885 # Subtle. Same as in the big comment below. 886 if id(obj) in memo: 887 get = self.get(memo[id(obj)][0]) 888 self.write(POP * n + get) 889 else: 890 self.write(_tuplesize2code[n]) 891 self.memoize(obj) 892 return 893 894 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple 895 # has more than 3 elements. 896 write = self.write 897 write(MARK) 898 for element in obj: 899 save(element) 900 901 if id(obj) in memo: 902 # Subtle. d was not in memo when we entered save_tuple(), so 903 # the process of saving the tuple's elements must have saved 904 # the tuple itself: the tuple is recursive. The proper action 905 # now is to throw away everything we put on the stack, and 906 # simply GET the tuple (it's already constructed). This check 907 # could have been done in the "for element" loop instead, but 908 # recursive tuples are a rare thing. 909 get = self.get(memo[id(obj)][0]) 910 if self.bin: 911 write(POP_MARK + get) 912 else: # proto 0 -- POP_MARK not available 913 write(POP * (n+1) + get) 914 return 915 916 # No recursion. 917 write(TUPLE) 918 self.memoize(obj) 919 920 dispatch[tuple] = save_tuple 921 922 def save_list(self, obj): 923 if self.bin: 924 self.write(EMPTY_LIST) 925 else: # proto 0 -- can't use EMPTY_LIST 926 self.write(MARK + LIST) 927 928 self.memoize(obj) 929 self._batch_appends(obj) 930 931 dispatch[list] = save_list 932 933 _BATCHSIZE = 1000 934 935 def _batch_appends(self, items): 936 # Helper to batch up APPENDS sequences 937 save = self.save 938 write = self.write 939 940 if not self.bin: 941 for x in items: 942 save(x) 943 write(APPEND) 944 return 945 946 it = iter(items) 947 while True: 948 tmp = list(islice(it, self._BATCHSIZE)) 949 n = len(tmp) 950 if n > 1: 951 write(MARK) 952 for x in tmp: 953 save(x) 954 write(APPENDS) 955 elif n: 956 save(tmp[0]) 957 write(APPEND) 958 # else tmp is empty, and we're done 959 if n < self._BATCHSIZE: 960 return 961 962 def save_dict(self, obj): 963 if self.bin: 964 self.write(EMPTY_DICT) 965 else: # proto 0 -- can't use EMPTY_DICT 966 self.write(MARK + DICT) 967 968 self.memoize(obj) 969 self._batch_setitems(obj.items()) 970 971 dispatch[dict] = save_dict 972 if PyStringMap is not None: 973 dispatch[PyStringMap] = save_dict 974 975 def _batch_setitems(self, items): 976 # Helper to batch up SETITEMS sequences; proto >= 1 only 977 save = self.save 978 write = self.write 979 980 if not self.bin: 981 for k, v in items: 982 save(k) 983 save(v) 984 write(SETITEM) 985 return 986 987 it = iter(items) 988 while True: 989 tmp = list(islice(it, self._BATCHSIZE)) 990 n = len(tmp) 991 if n > 1: 992 write(MARK) 993 for k, v in tmp: 994 save(k) 995 save(v) 996 write(SETITEMS) 997 elif n: 998 k, v = tmp[0] 999 save(k) 1000 save(v) 1001 write(SETITEM) 1002 # else tmp is empty, and we're done 1003 if n < self._BATCHSIZE: 1004 return 1005 1006 def save_set(self, obj): 1007 save = self.save 1008 write = self.write 1009 1010 if self.proto < 4: 1011 self.save_reduce(set, (list(obj),), obj=obj) 1012 return 1013 1014 write(EMPTY_SET) 1015 self.memoize(obj) 1016 1017 it = iter(obj) 1018 while True: 1019 batch = list(islice(it, self._BATCHSIZE)) 1020 n = len(batch) 1021 if n > 0: 1022 write(MARK) 1023 for item in batch: 1024 save(item) 1025 write(ADDITEMS) 1026 if n < self._BATCHSIZE: 1027 return 1028 dispatch[set] = save_set 1029 1030 def save_frozenset(self, obj): 1031 save = self.save 1032 write = self.write 1033 1034 if self.proto < 4: 1035 self.save_reduce(frozenset, (list(obj),), obj=obj) 1036 return 1037 1038 write(MARK) 1039 for item in obj: 1040 save(item) 1041 1042 if id(obj) in self.memo: 1043 # If the object is already in the memo, this means it is 1044 # recursive. In this case, throw away everything we put on the 1045 # stack, and fetch the object back from the memo. 1046 write(POP_MARK + self.get(self.memo[id(obj)][0])) 1047 return 1048 1049 write(FROZENSET) 1050 self.memoize(obj) 1051 dispatch[frozenset] = save_frozenset 1052 1053 def save_global(self, obj, name=None): 1054 write = self.write 1055 memo = self.memo 1056 1057 if name is None: 1058 name = getattr(obj, '__qualname__', None) 1059 if name is None: 1060 name = obj.__name__ 1061 1062 module_name = whichmodule(obj, name) 1063 try: 1064 __import__(module_name, level=0) 1065 module = sys.modules[module_name] 1066 obj2, parent = _getattribute(module, name) 1067 except (ImportError, KeyError, AttributeError): 1068 raise PicklingError( 1069 "Can't pickle %r: it's not found as %s.%s" % 1070 (obj, module_name, name)) from None 1071 else: 1072 if obj2 is not obj: 1073 raise PicklingError( 1074 "Can't pickle %r: it's not the same object as %s.%s" % 1075 (obj, module_name, name)) 1076 1077 if self.proto >= 2: 1078 code = _extension_registry.get((module_name, name)) 1079 if code: 1080 assert code > 0 1081 if code <= 0xff: 1082 write(EXT1 + pack("<B", code)) 1083 elif code <= 0xffff: 1084 write(EXT2 + pack("<H", code)) 1085 else: 1086 write(EXT4 + pack("<i", code)) 1087 return 1088 lastname = name.rpartition('.')[2] 1089 if parent is module: 1090 name = lastname 1091 # Non-ASCII identifiers are supported only with protocols >= 3. 1092 if self.proto >= 4: 1093 self.save(module_name) 1094 self.save(name) 1095 write(STACK_GLOBAL) 1096 elif parent is not module: 1097 self.save_reduce(getattr, (parent, lastname)) 1098 elif self.proto >= 3: 1099 write(GLOBAL + bytes(module_name, "utf-8") + b'\n' + 1100 bytes(name, "utf-8") + b'\n') 1101 else: 1102 if self.fix_imports: 1103 r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING 1104 r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING 1105 if (module_name, name) in r_name_mapping: 1106 module_name, name = r_name_mapping[(module_name, name)] 1107 elif module_name in r_import_mapping: 1108 module_name = r_import_mapping[module_name] 1109 try: 1110 write(GLOBAL + bytes(module_name, "ascii") + b'\n' + 1111 bytes(name, "ascii") + b'\n') 1112 except UnicodeEncodeError: 1113 raise PicklingError( 1114 "can't pickle global identifier '%s.%s' using " 1115 "pickle protocol %i" % (module, name, self.proto)) from None 1116 1117 self.memoize(obj) 1118 1119 def save_type(self, obj): 1120 if obj is type(None): 1121 return self.save_reduce(type, (None,), obj=obj) 1122 elif obj is type(NotImplemented): 1123 return self.save_reduce(type, (NotImplemented,), obj=obj) 1124 elif obj is type(...): 1125 return self.save_reduce(type, (...,), obj=obj) 1126 return self.save_global(obj) 1127 1128 dispatch[FunctionType] = save_global 1129 dispatch[type] = save_type 1130 1131 1132# Unpickling machinery 1133 1134class _Unpickler: 1135 1136 def __init__(self, file, *, fix_imports=True, 1137 encoding="ASCII", errors="strict", buffers=None): 1138 """This takes a binary file for reading a pickle data stream. 1139 1140 The protocol version of the pickle is detected automatically, so 1141 no proto argument is needed. 1142 1143 The argument *file* must have two methods, a read() method that 1144 takes an integer argument, and a readline() method that requires 1145 no arguments. Both methods should return bytes. Thus *file* 1146 can be a binary file object opened for reading, an io.BytesIO 1147 object, or any other custom object that meets this interface. 1148 1149 The file-like object must have two methods, a read() method 1150 that takes an integer argument, and a readline() method that 1151 requires no arguments. Both methods should return bytes. 1152 Thus file-like object can be a binary file object opened for 1153 reading, a BytesIO object, or any other custom object that 1154 meets this interface. 1155 1156 If *buffers* is not None, it should be an iterable of buffer-enabled 1157 objects that is consumed each time the pickle stream references 1158 an out-of-band buffer view. Such buffers have been given in order 1159 to the *buffer_callback* of a Pickler object. 1160 1161 If *buffers* is None (the default), then the buffers are taken 1162 from the pickle stream, assuming they are serialized there. 1163 It is an error for *buffers* to be None if the pickle stream 1164 was produced with a non-None *buffer_callback*. 1165 1166 Other optional arguments are *fix_imports*, *encoding* and 1167 *errors*, which are used to control compatibility support for 1168 pickle stream generated by Python 2. If *fix_imports* is True, 1169 pickle will try to map the old Python 2 names to the new names 1170 used in Python 3. The *encoding* and *errors* tell pickle how 1171 to decode 8-bit string instances pickled by Python 2; these 1172 default to 'ASCII' and 'strict', respectively. *encoding* can be 1173 'bytes' to read theses 8-bit string instances as bytes objects. 1174 """ 1175 self._buffers = iter(buffers) if buffers is not None else None 1176 self._file_readline = file.readline 1177 self._file_read = file.read 1178 self.memo = {} 1179 self.encoding = encoding 1180 self.errors = errors 1181 self.proto = 0 1182 self.fix_imports = fix_imports 1183 1184 def load(self): 1185 """Read a pickled object representation from the open file. 1186 1187 Return the reconstituted object hierarchy specified in the file. 1188 """ 1189 # Check whether Unpickler was initialized correctly. This is 1190 # only needed to mimic the behavior of _pickle.Unpickler.dump(). 1191 if not hasattr(self, "_file_read"): 1192 raise UnpicklingError("Unpickler.__init__() was not called by " 1193 "%s.__init__()" % (self.__class__.__name__,)) 1194 self._unframer = _Unframer(self._file_read, self._file_readline) 1195 self.read = self._unframer.read 1196 self.readinto = self._unframer.readinto 1197 self.readline = self._unframer.readline 1198 self.metastack = [] 1199 self.stack = [] 1200 self.append = self.stack.append 1201 self.proto = 0 1202 read = self.read 1203 dispatch = self.dispatch 1204 try: 1205 while True: 1206 key = read(1) 1207 if not key: 1208 raise EOFError 1209 assert isinstance(key, bytes_types) 1210 dispatch[key[0]](self) 1211 except _Stop as stopinst: 1212 return stopinst.value 1213 1214 # Return a list of items pushed in the stack after last MARK instruction. 1215 def pop_mark(self): 1216 items = self.stack 1217 self.stack = self.metastack.pop() 1218 self.append = self.stack.append 1219 return items 1220 1221 def persistent_load(self, pid): 1222 raise UnpicklingError("unsupported persistent id encountered") 1223 1224 dispatch = {} 1225 1226 def load_proto(self): 1227 proto = self.read(1)[0] 1228 if not 0 <= proto <= HIGHEST_PROTOCOL: 1229 raise ValueError("unsupported pickle protocol: %d" % proto) 1230 self.proto = proto 1231 dispatch[PROTO[0]] = load_proto 1232 1233 def load_frame(self): 1234 frame_size, = unpack('<Q', self.read(8)) 1235 if frame_size > sys.maxsize: 1236 raise ValueError("frame size > sys.maxsize: %d" % frame_size) 1237 self._unframer.load_frame(frame_size) 1238 dispatch[FRAME[0]] = load_frame 1239 1240 def load_persid(self): 1241 try: 1242 pid = self.readline()[:-1].decode("ascii") 1243 except UnicodeDecodeError: 1244 raise UnpicklingError( 1245 "persistent IDs in protocol 0 must be ASCII strings") 1246 self.append(self.persistent_load(pid)) 1247 dispatch[PERSID[0]] = load_persid 1248 1249 def load_binpersid(self): 1250 pid = self.stack.pop() 1251 self.append(self.persistent_load(pid)) 1252 dispatch[BINPERSID[0]] = load_binpersid 1253 1254 def load_none(self): 1255 self.append(None) 1256 dispatch[NONE[0]] = load_none 1257 1258 def load_false(self): 1259 self.append(False) 1260 dispatch[NEWFALSE[0]] = load_false 1261 1262 def load_true(self): 1263 self.append(True) 1264 dispatch[NEWTRUE[0]] = load_true 1265 1266 def load_int(self): 1267 data = self.readline() 1268 if data == FALSE[1:]: 1269 val = False 1270 elif data == TRUE[1:]: 1271 val = True 1272 else: 1273 val = int(data, 0) 1274 self.append(val) 1275 dispatch[INT[0]] = load_int 1276 1277 def load_binint(self): 1278 self.append(unpack('<i', self.read(4))[0]) 1279 dispatch[BININT[0]] = load_binint 1280 1281 def load_binint1(self): 1282 self.append(self.read(1)[0]) 1283 dispatch[BININT1[0]] = load_binint1 1284 1285 def load_binint2(self): 1286 self.append(unpack('<H', self.read(2))[0]) 1287 dispatch[BININT2[0]] = load_binint2 1288 1289 def load_long(self): 1290 val = self.readline()[:-1] 1291 if val and val[-1] == b'L'[0]: 1292 val = val[:-1] 1293 self.append(int(val, 0)) 1294 dispatch[LONG[0]] = load_long 1295 1296 def load_long1(self): 1297 n = self.read(1)[0] 1298 data = self.read(n) 1299 self.append(decode_long(data)) 1300 dispatch[LONG1[0]] = load_long1 1301 1302 def load_long4(self): 1303 n, = unpack('<i', self.read(4)) 1304 if n < 0: 1305 # Corrupt or hostile pickle -- we never write one like this 1306 raise UnpicklingError("LONG pickle has negative byte count") 1307 data = self.read(n) 1308 self.append(decode_long(data)) 1309 dispatch[LONG4[0]] = load_long4 1310 1311 def load_float(self): 1312 self.append(float(self.readline()[:-1])) 1313 dispatch[FLOAT[0]] = load_float 1314 1315 def load_binfloat(self): 1316 self.append(unpack('>d', self.read(8))[0]) 1317 dispatch[BINFLOAT[0]] = load_binfloat 1318 1319 def _decode_string(self, value): 1320 # Used to allow strings from Python 2 to be decoded either as 1321 # bytes or Unicode strings. This should be used only with the 1322 # STRING, BINSTRING and SHORT_BINSTRING opcodes. 1323 if self.encoding == "bytes": 1324 return value 1325 else: 1326 return value.decode(self.encoding, self.errors) 1327 1328 def load_string(self): 1329 data = self.readline()[:-1] 1330 # Strip outermost quotes 1331 if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'': 1332 data = data[1:-1] 1333 else: 1334 raise UnpicklingError("the STRING opcode argument must be quoted") 1335 self.append(self._decode_string(codecs.escape_decode(data)[0])) 1336 dispatch[STRING[0]] = load_string 1337 1338 def load_binstring(self): 1339 # Deprecated BINSTRING uses signed 32-bit length 1340 len, = unpack('<i', self.read(4)) 1341 if len < 0: 1342 raise UnpicklingError("BINSTRING pickle has negative byte count") 1343 data = self.read(len) 1344 self.append(self._decode_string(data)) 1345 dispatch[BINSTRING[0]] = load_binstring 1346 1347 def load_binbytes(self): 1348 len, = unpack('<I', self.read(4)) 1349 if len > maxsize: 1350 raise UnpicklingError("BINBYTES exceeds system's maximum size " 1351 "of %d bytes" % maxsize) 1352 self.append(self.read(len)) 1353 dispatch[BINBYTES[0]] = load_binbytes 1354 1355 def load_unicode(self): 1356 self.append(str(self.readline()[:-1], 'raw-unicode-escape')) 1357 dispatch[UNICODE[0]] = load_unicode 1358 1359 def load_binunicode(self): 1360 len, = unpack('<I', self.read(4)) 1361 if len > maxsize: 1362 raise UnpicklingError("BINUNICODE exceeds system's maximum size " 1363 "of %d bytes" % maxsize) 1364 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1365 dispatch[BINUNICODE[0]] = load_binunicode 1366 1367 def load_binunicode8(self): 1368 len, = unpack('<Q', self.read(8)) 1369 if len > maxsize: 1370 raise UnpicklingError("BINUNICODE8 exceeds system's maximum size " 1371 "of %d bytes" % maxsize) 1372 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1373 dispatch[BINUNICODE8[0]] = load_binunicode8 1374 1375 def load_binbytes8(self): 1376 len, = unpack('<Q', self.read(8)) 1377 if len > maxsize: 1378 raise UnpicklingError("BINBYTES8 exceeds system's maximum size " 1379 "of %d bytes" % maxsize) 1380 self.append(self.read(len)) 1381 dispatch[BINBYTES8[0]] = load_binbytes8 1382 1383 def load_bytearray8(self): 1384 len, = unpack('<Q', self.read(8)) 1385 if len > maxsize: 1386 raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size " 1387 "of %d bytes" % maxsize) 1388 b = bytearray(len) 1389 self.readinto(b) 1390 self.append(b) 1391 dispatch[BYTEARRAY8[0]] = load_bytearray8 1392 1393 def load_next_buffer(self): 1394 if self._buffers is None: 1395 raise UnpicklingError("pickle stream refers to out-of-band data " 1396 "but no *buffers* argument was given") 1397 try: 1398 buf = next(self._buffers) 1399 except StopIteration: 1400 raise UnpicklingError("not enough out-of-band buffers") 1401 self.append(buf) 1402 dispatch[NEXT_BUFFER[0]] = load_next_buffer 1403 1404 def load_readonly_buffer(self): 1405 buf = self.stack[-1] 1406 with memoryview(buf) as m: 1407 if not m.readonly: 1408 self.stack[-1] = m.toreadonly() 1409 dispatch[READONLY_BUFFER[0]] = load_readonly_buffer 1410 1411 def load_short_binstring(self): 1412 len = self.read(1)[0] 1413 data = self.read(len) 1414 self.append(self._decode_string(data)) 1415 dispatch[SHORT_BINSTRING[0]] = load_short_binstring 1416 1417 def load_short_binbytes(self): 1418 len = self.read(1)[0] 1419 self.append(self.read(len)) 1420 dispatch[SHORT_BINBYTES[0]] = load_short_binbytes 1421 1422 def load_short_binunicode(self): 1423 len = self.read(1)[0] 1424 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1425 dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode 1426 1427 def load_tuple(self): 1428 items = self.pop_mark() 1429 self.append(tuple(items)) 1430 dispatch[TUPLE[0]] = load_tuple 1431 1432 def load_empty_tuple(self): 1433 self.append(()) 1434 dispatch[EMPTY_TUPLE[0]] = load_empty_tuple 1435 1436 def load_tuple1(self): 1437 self.stack[-1] = (self.stack[-1],) 1438 dispatch[TUPLE1[0]] = load_tuple1 1439 1440 def load_tuple2(self): 1441 self.stack[-2:] = [(self.stack[-2], self.stack[-1])] 1442 dispatch[TUPLE2[0]] = load_tuple2 1443 1444 def load_tuple3(self): 1445 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])] 1446 dispatch[TUPLE3[0]] = load_tuple3 1447 1448 def load_empty_list(self): 1449 self.append([]) 1450 dispatch[EMPTY_LIST[0]] = load_empty_list 1451 1452 def load_empty_dictionary(self): 1453 self.append({}) 1454 dispatch[EMPTY_DICT[0]] = load_empty_dictionary 1455 1456 def load_empty_set(self): 1457 self.append(set()) 1458 dispatch[EMPTY_SET[0]] = load_empty_set 1459 1460 def load_frozenset(self): 1461 items = self.pop_mark() 1462 self.append(frozenset(items)) 1463 dispatch[FROZENSET[0]] = load_frozenset 1464 1465 def load_list(self): 1466 items = self.pop_mark() 1467 self.append(items) 1468 dispatch[LIST[0]] = load_list 1469 1470 def load_dict(self): 1471 items = self.pop_mark() 1472 d = {items[i]: items[i+1] 1473 for i in range(0, len(items), 2)} 1474 self.append(d) 1475 dispatch[DICT[0]] = load_dict 1476 1477 # INST and OBJ differ only in how they get a class object. It's not 1478 # only sensible to do the rest in a common routine, the two routines 1479 # previously diverged and grew different bugs. 1480 # klass is the class to instantiate, and k points to the topmost mark 1481 # object, following which are the arguments for klass.__init__. 1482 def _instantiate(self, klass, args): 1483 if (args or not isinstance(klass, type) or 1484 hasattr(klass, "__getinitargs__")): 1485 try: 1486 value = klass(*args) 1487 except TypeError as err: 1488 raise TypeError("in constructor for %s: %s" % 1489 (klass.__name__, str(err)), sys.exc_info()[2]) 1490 else: 1491 value = klass.__new__(klass) 1492 self.append(value) 1493 1494 def load_inst(self): 1495 module = self.readline()[:-1].decode("ascii") 1496 name = self.readline()[:-1].decode("ascii") 1497 klass = self.find_class(module, name) 1498 self._instantiate(klass, self.pop_mark()) 1499 dispatch[INST[0]] = load_inst 1500 1501 def load_obj(self): 1502 # Stack is ... markobject classobject arg1 arg2 ... 1503 args = self.pop_mark() 1504 cls = args.pop(0) 1505 self._instantiate(cls, args) 1506 dispatch[OBJ[0]] = load_obj 1507 1508 def load_newobj(self): 1509 args = self.stack.pop() 1510 cls = self.stack.pop() 1511 obj = cls.__new__(cls, *args) 1512 self.append(obj) 1513 dispatch[NEWOBJ[0]] = load_newobj 1514 1515 def load_newobj_ex(self): 1516 kwargs = self.stack.pop() 1517 args = self.stack.pop() 1518 cls = self.stack.pop() 1519 obj = cls.__new__(cls, *args, **kwargs) 1520 self.append(obj) 1521 dispatch[NEWOBJ_EX[0]] = load_newobj_ex 1522 1523 def load_global(self): 1524 module = self.readline()[:-1].decode("utf-8") 1525 name = self.readline()[:-1].decode("utf-8") 1526 klass = self.find_class(module, name) 1527 self.append(klass) 1528 dispatch[GLOBAL[0]] = load_global 1529 1530 def load_stack_global(self): 1531 name = self.stack.pop() 1532 module = self.stack.pop() 1533 if type(name) is not str or type(module) is not str: 1534 raise UnpicklingError("STACK_GLOBAL requires str") 1535 self.append(self.find_class(module, name)) 1536 dispatch[STACK_GLOBAL[0]] = load_stack_global 1537 1538 def load_ext1(self): 1539 code = self.read(1)[0] 1540 self.get_extension(code) 1541 dispatch[EXT1[0]] = load_ext1 1542 1543 def load_ext2(self): 1544 code, = unpack('<H', self.read(2)) 1545 self.get_extension(code) 1546 dispatch[EXT2[0]] = load_ext2 1547 1548 def load_ext4(self): 1549 code, = unpack('<i', self.read(4)) 1550 self.get_extension(code) 1551 dispatch[EXT4[0]] = load_ext4 1552 1553 def get_extension(self, code): 1554 nil = [] 1555 obj = _extension_cache.get(code, nil) 1556 if obj is not nil: 1557 self.append(obj) 1558 return 1559 key = _inverted_registry.get(code) 1560 if not key: 1561 if code <= 0: # note that 0 is forbidden 1562 # Corrupt or hostile pickle. 1563 raise UnpicklingError("EXT specifies code <= 0") 1564 raise ValueError("unregistered extension code %d" % code) 1565 obj = self.find_class(*key) 1566 _extension_cache[code] = obj 1567 self.append(obj) 1568 1569 def find_class(self, module, name): 1570 # Subclasses may override this. 1571 sys.audit('pickle.find_class', module, name) 1572 if self.proto < 3 and self.fix_imports: 1573 if (module, name) in _compat_pickle.NAME_MAPPING: 1574 module, name = _compat_pickle.NAME_MAPPING[(module, name)] 1575 elif module in _compat_pickle.IMPORT_MAPPING: 1576 module = _compat_pickle.IMPORT_MAPPING[module] 1577 __import__(module, level=0) 1578 if self.proto >= 4: 1579 return _getattribute(sys.modules[module], name)[0] 1580 else: 1581 return getattr(sys.modules[module], name) 1582 1583 def load_reduce(self): 1584 stack = self.stack 1585 args = stack.pop() 1586 func = stack[-1] 1587 stack[-1] = func(*args) 1588 dispatch[REDUCE[0]] = load_reduce 1589 1590 def load_pop(self): 1591 if self.stack: 1592 del self.stack[-1] 1593 else: 1594 self.pop_mark() 1595 dispatch[POP[0]] = load_pop 1596 1597 def load_pop_mark(self): 1598 self.pop_mark() 1599 dispatch[POP_MARK[0]] = load_pop_mark 1600 1601 def load_dup(self): 1602 self.append(self.stack[-1]) 1603 dispatch[DUP[0]] = load_dup 1604 1605 def load_get(self): 1606 i = int(self.readline()[:-1]) 1607 self.append(self.memo[i]) 1608 dispatch[GET[0]] = load_get 1609 1610 def load_binget(self): 1611 i = self.read(1)[0] 1612 self.append(self.memo[i]) 1613 dispatch[BINGET[0]] = load_binget 1614 1615 def load_long_binget(self): 1616 i, = unpack('<I', self.read(4)) 1617 self.append(self.memo[i]) 1618 dispatch[LONG_BINGET[0]] = load_long_binget 1619 1620 def load_put(self): 1621 i = int(self.readline()[:-1]) 1622 if i < 0: 1623 raise ValueError("negative PUT argument") 1624 self.memo[i] = self.stack[-1] 1625 dispatch[PUT[0]] = load_put 1626 1627 def load_binput(self): 1628 i = self.read(1)[0] 1629 if i < 0: 1630 raise ValueError("negative BINPUT argument") 1631 self.memo[i] = self.stack[-1] 1632 dispatch[BINPUT[0]] = load_binput 1633 1634 def load_long_binput(self): 1635 i, = unpack('<I', self.read(4)) 1636 if i > maxsize: 1637 raise ValueError("negative LONG_BINPUT argument") 1638 self.memo[i] = self.stack[-1] 1639 dispatch[LONG_BINPUT[0]] = load_long_binput 1640 1641 def load_memoize(self): 1642 memo = self.memo 1643 memo[len(memo)] = self.stack[-1] 1644 dispatch[MEMOIZE[0]] = load_memoize 1645 1646 def load_append(self): 1647 stack = self.stack 1648 value = stack.pop() 1649 list = stack[-1] 1650 list.append(value) 1651 dispatch[APPEND[0]] = load_append 1652 1653 def load_appends(self): 1654 items = self.pop_mark() 1655 list_obj = self.stack[-1] 1656 try: 1657 extend = list_obj.extend 1658 except AttributeError: 1659 pass 1660 else: 1661 extend(items) 1662 return 1663 # Even if the PEP 307 requires extend() and append() methods, 1664 # fall back on append() if the object has no extend() method 1665 # for backward compatibility. 1666 append = list_obj.append 1667 for item in items: 1668 append(item) 1669 dispatch[APPENDS[0]] = load_appends 1670 1671 def load_setitem(self): 1672 stack = self.stack 1673 value = stack.pop() 1674 key = stack.pop() 1675 dict = stack[-1] 1676 dict[key] = value 1677 dispatch[SETITEM[0]] = load_setitem 1678 1679 def load_setitems(self): 1680 items = self.pop_mark() 1681 dict = self.stack[-1] 1682 for i in range(0, len(items), 2): 1683 dict[items[i]] = items[i + 1] 1684 dispatch[SETITEMS[0]] = load_setitems 1685 1686 def load_additems(self): 1687 items = self.pop_mark() 1688 set_obj = self.stack[-1] 1689 if isinstance(set_obj, set): 1690 set_obj.update(items) 1691 else: 1692 add = set_obj.add 1693 for item in items: 1694 add(item) 1695 dispatch[ADDITEMS[0]] = load_additems 1696 1697 def load_build(self): 1698 stack = self.stack 1699 state = stack.pop() 1700 inst = stack[-1] 1701 setstate = getattr(inst, "__setstate__", None) 1702 if setstate is not None: 1703 setstate(state) 1704 return 1705 slotstate = None 1706 if isinstance(state, tuple) and len(state) == 2: 1707 state, slotstate = state 1708 if state: 1709 inst_dict = inst.__dict__ 1710 intern = sys.intern 1711 for k, v in state.items(): 1712 if type(k) is str: 1713 inst_dict[intern(k)] = v 1714 else: 1715 inst_dict[k] = v 1716 if slotstate: 1717 for k, v in slotstate.items(): 1718 setattr(inst, k, v) 1719 dispatch[BUILD[0]] = load_build 1720 1721 def load_mark(self): 1722 self.metastack.append(self.stack) 1723 self.stack = [] 1724 self.append = self.stack.append 1725 dispatch[MARK[0]] = load_mark 1726 1727 def load_stop(self): 1728 value = self.stack.pop() 1729 raise _Stop(value) 1730 dispatch[STOP[0]] = load_stop 1731 1732 1733# Shorthands 1734 1735def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None): 1736 _Pickler(file, protocol, fix_imports=fix_imports, 1737 buffer_callback=buffer_callback).dump(obj) 1738 1739def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None): 1740 f = io.BytesIO() 1741 _Pickler(f, protocol, fix_imports=fix_imports, 1742 buffer_callback=buffer_callback).dump(obj) 1743 res = f.getvalue() 1744 assert isinstance(res, bytes_types) 1745 return res 1746 1747def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict", 1748 buffers=None): 1749 return _Unpickler(file, fix_imports=fix_imports, buffers=buffers, 1750 encoding=encoding, errors=errors).load() 1751 1752def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict", 1753 buffers=None): 1754 if isinstance(s, str): 1755 raise TypeError("Can't load pickle from unicode string") 1756 file = io.BytesIO(s) 1757 return _Unpickler(file, fix_imports=fix_imports, buffers=buffers, 1758 encoding=encoding, errors=errors).load() 1759 1760# Use the faster _pickle if possible 1761try: 1762 from _pickle import ( 1763 PickleError, 1764 PicklingError, 1765 UnpicklingError, 1766 Pickler, 1767 Unpickler, 1768 dump, 1769 dumps, 1770 load, 1771 loads 1772 ) 1773except ImportError: 1774 Pickler, Unpickler = _Pickler, _Unpickler 1775 dump, dumps, load, loads = _dump, _dumps, _load, _loads 1776 1777# Doctest 1778def _test(): 1779 import doctest 1780 return doctest.testmod() 1781 1782if __name__ == "__main__": 1783 import argparse 1784 parser = argparse.ArgumentParser( 1785 description='display contents of the pickle files') 1786 parser.add_argument( 1787 'pickle_file', type=argparse.FileType('br'), 1788 nargs='*', help='the pickle file') 1789 parser.add_argument( 1790 '-t', '--test', action='store_true', 1791 help='run self-test suite') 1792 parser.add_argument( 1793 '-v', action='store_true', 1794 help='run verbosely; only affects self-test run') 1795 args = parser.parse_args() 1796 if args.test: 1797 _test() 1798 else: 1799 if not args.pickle_file: 1800 parser.print_help() 1801 else: 1802 import pprint 1803 for f in args.pickle_file: 1804 obj = load(f) 1805 pprint.pprint(obj) 1806