• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import collections
2import copyreg
3import dbm
4import io
5import functools
6import os
7import math
8import pickle
9import pickletools
10import shutil
11import struct
12import sys
13import threading
14import unittest
15import weakref
16from textwrap import dedent
17from http.cookies import SimpleCookie
18
19try:
20    import _testbuffer
21except ImportError:
22    _testbuffer = None
23
24try:
25    import numpy as np
26except ImportError:
27    np = None
28
29from test import support
30from test.support import (
31    TestFailed, TESTFN, run_with_locale, no_tracing,
32    _2G, _4G, bigmemtest, reap_threads, forget,
33    )
34
35from pickle import bytes_types
36
37requires_32b = unittest.skipUnless(sys.maxsize < 2**32,
38                                   "test is only meaningful on 32-bit builds")
39
40# Tests that try a number of pickle protocols should have a
41#     for proto in protocols:
42# kind of outer loop.
43protocols = range(pickle.HIGHEST_PROTOCOL + 1)
44
45
46# Return True if opcode code appears in the pickle, else False.
47def opcode_in_pickle(code, pickle):
48    for op, dummy, dummy in pickletools.genops(pickle):
49        if op.code == code.decode("latin-1"):
50            return True
51    return False
52
53# Return the number of times opcode code appears in pickle.
54def count_opcode(code, pickle):
55    n = 0
56    for op, dummy, dummy in pickletools.genops(pickle):
57        if op.code == code.decode("latin-1"):
58            n += 1
59    return n
60
61
62class UnseekableIO(io.BytesIO):
63    def peek(self, *args):
64        raise NotImplementedError
65
66    def seekable(self):
67        return False
68
69    def seek(self, *args):
70        raise io.UnsupportedOperation
71
72    def tell(self):
73        raise io.UnsupportedOperation
74
75
76# We can't very well test the extension registry without putting known stuff
77# in it, but we have to be careful to restore its original state.  Code
78# should do this:
79#
80#     e = ExtensionSaver(extension_code)
81#     try:
82#         fiddle w/ the extension registry's stuff for extension_code
83#     finally:
84#         e.restore()
85
86class ExtensionSaver:
87    # Remember current registration for code (if any), and remove it (if
88    # there is one).
89    def __init__(self, code):
90        self.code = code
91        if code in copyreg._inverted_registry:
92            self.pair = copyreg._inverted_registry[code]
93            copyreg.remove_extension(self.pair[0], self.pair[1], code)
94        else:
95            self.pair = None
96
97    # Restore previous registration for code.
98    def restore(self):
99        code = self.code
100        curpair = copyreg._inverted_registry.get(code)
101        if curpair is not None:
102            copyreg.remove_extension(curpair[0], curpair[1], code)
103        pair = self.pair
104        if pair is not None:
105            copyreg.add_extension(pair[0], pair[1], code)
106
107class C:
108    def __eq__(self, other):
109        return self.__dict__ == other.__dict__
110
111class D(C):
112    def __init__(self, arg):
113        pass
114
115class E(C):
116    def __getinitargs__(self):
117        return ()
118
119class H(object):
120    pass
121
122# Hashable mutable key
123class K(object):
124    def __init__(self, value):
125        self.value = value
126
127    def __reduce__(self):
128        # Shouldn't support the recursion itself
129        return K, (self.value,)
130
131import __main__
132__main__.C = C
133C.__module__ = "__main__"
134__main__.D = D
135D.__module__ = "__main__"
136__main__.E = E
137E.__module__ = "__main__"
138__main__.H = H
139H.__module__ = "__main__"
140__main__.K = K
141K.__module__ = "__main__"
142
143class myint(int):
144    def __init__(self, x):
145        self.str = str(x)
146
147class initarg(C):
148
149    def __init__(self, a, b):
150        self.a = a
151        self.b = b
152
153    def __getinitargs__(self):
154        return self.a, self.b
155
156class metaclass(type):
157    pass
158
159class use_metaclass(object, metaclass=metaclass):
160    pass
161
162class pickling_metaclass(type):
163    def __eq__(self, other):
164        return (type(self) == type(other) and
165                self.reduce_args == other.reduce_args)
166
167    def __reduce__(self):
168        return (create_dynamic_class, self.reduce_args)
169
170def create_dynamic_class(name, bases):
171    result = pickling_metaclass(name, bases, dict())
172    result.reduce_args = (name, bases)
173    return result
174
175
176class ZeroCopyBytes(bytes):
177    readonly = True
178    c_contiguous = True
179    f_contiguous = True
180    zero_copy_reconstruct = True
181
182    def __reduce_ex__(self, protocol):
183        if protocol >= 5:
184            return type(self)._reconstruct, (pickle.PickleBuffer(self),), None
185        else:
186            return type(self)._reconstruct, (bytes(self),)
187
188    def __repr__(self):
189        return "{}({!r})".format(self.__class__.__name__, bytes(self))
190
191    __str__ = __repr__
192
193    @classmethod
194    def _reconstruct(cls, obj):
195        with memoryview(obj) as m:
196            obj = m.obj
197            if type(obj) is cls:
198                # Zero-copy
199                return obj
200            else:
201                return cls(obj)
202
203
204class ZeroCopyBytearray(bytearray):
205    readonly = False
206    c_contiguous = True
207    f_contiguous = True
208    zero_copy_reconstruct = True
209
210    def __reduce_ex__(self, protocol):
211        if protocol >= 5:
212            return type(self)._reconstruct, (pickle.PickleBuffer(self),), None
213        else:
214            return type(self)._reconstruct, (bytes(self),)
215
216    def __repr__(self):
217        return "{}({!r})".format(self.__class__.__name__, bytes(self))
218
219    __str__ = __repr__
220
221    @classmethod
222    def _reconstruct(cls, obj):
223        with memoryview(obj) as m:
224            obj = m.obj
225            if type(obj) is cls:
226                # Zero-copy
227                return obj
228            else:
229                return cls(obj)
230
231
232if _testbuffer is not None:
233
234    class PicklableNDArray:
235        # A not-really-zero-copy picklable ndarray, as the ndarray()
236        # constructor doesn't allow for it
237
238        zero_copy_reconstruct = False
239
240        def __init__(self, *args, **kwargs):
241            self.array = _testbuffer.ndarray(*args, **kwargs)
242
243        def __getitem__(self, idx):
244            cls = type(self)
245            new = cls.__new__(cls)
246            new.array = self.array[idx]
247            return new
248
249        @property
250        def readonly(self):
251            return self.array.readonly
252
253        @property
254        def c_contiguous(self):
255            return self.array.c_contiguous
256
257        @property
258        def f_contiguous(self):
259            return self.array.f_contiguous
260
261        def __eq__(self, other):
262            if not isinstance(other, PicklableNDArray):
263                return NotImplemented
264            return (other.array.format == self.array.format and
265                    other.array.shape == self.array.shape and
266                    other.array.strides == self.array.strides and
267                    other.array.readonly == self.array.readonly and
268                    other.array.tobytes() == self.array.tobytes())
269
270        def __ne__(self, other):
271            if not isinstance(other, PicklableNDArray):
272                return NotImplemented
273            return not (self == other)
274
275        def __repr__(self):
276            return (f"{type(self)}(shape={self.array.shape},"
277                    f"strides={self.array.strides}, "
278                    f"bytes={self.array.tobytes()})")
279
280        def __reduce_ex__(self, protocol):
281            if not self.array.contiguous:
282                raise NotImplementedError("Reconstructing a non-contiguous "
283                                          "ndarray does not seem possible")
284            ndarray_kwargs = {"shape": self.array.shape,
285                              "strides": self.array.strides,
286                              "format": self.array.format,
287                              "flags": (0 if self.readonly
288                                        else _testbuffer.ND_WRITABLE)}
289            pb = pickle.PickleBuffer(self.array)
290            if protocol >= 5:
291                return (type(self)._reconstruct,
292                        (pb, ndarray_kwargs))
293            else:
294                # Need to serialize the bytes in physical order
295                with pb.raw() as m:
296                    return (type(self)._reconstruct,
297                            (m.tobytes(), ndarray_kwargs))
298
299        @classmethod
300        def _reconstruct(cls, obj, kwargs):
301            with memoryview(obj) as m:
302                # For some reason, ndarray() wants a list of integers...
303                # XXX This only works if format == 'B'
304                items = list(m.tobytes())
305            return cls(items, **kwargs)
306
307
308# DATA0 .. DATA4 are the pickles we expect under the various protocols, for
309# the object returned by create_data().
310
311DATA0 = (
312    b'(lp0\nL0L\naL1L\naF2.0\n'
313    b'ac__builtin__\ncomple'
314    b'x\np1\n(F3.0\nF0.0\ntp2\n'
315    b'Rp3\naL1L\naL-1L\naL255'
316    b'L\naL-255L\naL-256L\naL'
317    b'65535L\naL-65535L\naL-'
318    b'65536L\naL2147483647L'
319    b'\naL-2147483647L\naL-2'
320    b'147483648L\na(Vabc\np4'
321    b'\ng4\nccopy_reg\n_recon'
322    b'structor\np5\n(c__main'
323    b'__\nC\np6\nc__builtin__'
324    b'\nobject\np7\nNtp8\nRp9\n'
325    b'(dp10\nVfoo\np11\nL1L\ns'
326    b'Vbar\np12\nL2L\nsbg9\ntp'
327    b'13\nag13\naL5L\na.'
328)
329
330# Disassembly of DATA0
331DATA0_DIS = """\
332    0: (    MARK
333    1: l        LIST       (MARK at 0)
334    2: p    PUT        0
335    5: L    LONG       0
336    9: a    APPEND
337   10: L    LONG       1
338   14: a    APPEND
339   15: F    FLOAT      2.0
340   20: a    APPEND
341   21: c    GLOBAL     '__builtin__ complex'
342   42: p    PUT        1
343   45: (    MARK
344   46: F        FLOAT      3.0
345   51: F        FLOAT      0.0
346   56: t        TUPLE      (MARK at 45)
347   57: p    PUT        2
348   60: R    REDUCE
349   61: p    PUT        3
350   64: a    APPEND
351   65: L    LONG       1
352   69: a    APPEND
353   70: L    LONG       -1
354   75: a    APPEND
355   76: L    LONG       255
356   82: a    APPEND
357   83: L    LONG       -255
358   90: a    APPEND
359   91: L    LONG       -256
360   98: a    APPEND
361   99: L    LONG       65535
362  107: a    APPEND
363  108: L    LONG       -65535
364  117: a    APPEND
365  118: L    LONG       -65536
366  127: a    APPEND
367  128: L    LONG       2147483647
368  141: a    APPEND
369  142: L    LONG       -2147483647
370  156: a    APPEND
371  157: L    LONG       -2147483648
372  171: a    APPEND
373  172: (    MARK
374  173: V        UNICODE    'abc'
375  178: p        PUT        4
376  181: g        GET        4
377  184: c        GLOBAL     'copy_reg _reconstructor'
378  209: p        PUT        5
379  212: (        MARK
380  213: c            GLOBAL     '__main__ C'
381  225: p            PUT        6
382  228: c            GLOBAL     '__builtin__ object'
383  248: p            PUT        7
384  251: N            NONE
385  252: t            TUPLE      (MARK at 212)
386  253: p        PUT        8
387  256: R        REDUCE
388  257: p        PUT        9
389  260: (        MARK
390  261: d            DICT       (MARK at 260)
391  262: p        PUT        10
392  266: V        UNICODE    'foo'
393  271: p        PUT        11
394  275: L        LONG       1
395  279: s        SETITEM
396  280: V        UNICODE    'bar'
397  285: p        PUT        12
398  289: L        LONG       2
399  293: s        SETITEM
400  294: b        BUILD
401  295: g        GET        9
402  298: t        TUPLE      (MARK at 172)
403  299: p    PUT        13
404  303: a    APPEND
405  304: g    GET        13
406  308: a    APPEND
407  309: L    LONG       5
408  313: a    APPEND
409  314: .    STOP
410highest protocol among opcodes = 0
411"""
412
413DATA1 = (
414    b']q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c__'
415    b'builtin__\ncomplex\nq\x01'
416    b'(G@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00t'
417    b'q\x02Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xffJ'
418    b'\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff\xff'
419    b'\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00ab'
420    b'cq\x04h\x04ccopy_reg\n_reco'
421    b'nstructor\nq\x05(c__main'
422    b'__\nC\nq\x06c__builtin__\n'
423    b'object\nq\x07Ntq\x08Rq\t}q\n('
424    b'X\x03\x00\x00\x00fooq\x0bK\x01X\x03\x00\x00\x00bar'
425    b'q\x0cK\x02ubh\ttq\rh\rK\x05e.'
426)
427
428# Disassembly of DATA1
429DATA1_DIS = """\
430    0: ]    EMPTY_LIST
431    1: q    BINPUT     0
432    3: (    MARK
433    4: K        BININT1    0
434    6: K        BININT1    1
435    8: G        BINFLOAT   2.0
436   17: c        GLOBAL     '__builtin__ complex'
437   38: q        BINPUT     1
438   40: (        MARK
439   41: G            BINFLOAT   3.0
440   50: G            BINFLOAT   0.0
441   59: t            TUPLE      (MARK at 40)
442   60: q        BINPUT     2
443   62: R        REDUCE
444   63: q        BINPUT     3
445   65: K        BININT1    1
446   67: J        BININT     -1
447   72: K        BININT1    255
448   74: J        BININT     -255
449   79: J        BININT     -256
450   84: M        BININT2    65535
451   87: J        BININT     -65535
452   92: J        BININT     -65536
453   97: J        BININT     2147483647
454  102: J        BININT     -2147483647
455  107: J        BININT     -2147483648
456  112: (        MARK
457  113: X            BINUNICODE 'abc'
458  121: q            BINPUT     4
459  123: h            BINGET     4
460  125: c            GLOBAL     'copy_reg _reconstructor'
461  150: q            BINPUT     5
462  152: (            MARK
463  153: c                GLOBAL     '__main__ C'
464  165: q                BINPUT     6
465  167: c                GLOBAL     '__builtin__ object'
466  187: q                BINPUT     7
467  189: N                NONE
468  190: t                TUPLE      (MARK at 152)
469  191: q            BINPUT     8
470  193: R            REDUCE
471  194: q            BINPUT     9
472  196: }            EMPTY_DICT
473  197: q            BINPUT     10
474  199: (            MARK
475  200: X                BINUNICODE 'foo'
476  208: q                BINPUT     11
477  210: K                BININT1    1
478  212: X                BINUNICODE 'bar'
479  220: q                BINPUT     12
480  222: K                BININT1    2
481  224: u                SETITEMS   (MARK at 199)
482  225: b            BUILD
483  226: h            BINGET     9
484  228: t            TUPLE      (MARK at 112)
485  229: q        BINPUT     13
486  231: h        BINGET     13
487  233: K        BININT1    5
488  235: e        APPENDS    (MARK at 3)
489  236: .    STOP
490highest protocol among opcodes = 1
491"""
492
493DATA2 = (
494    b'\x80\x02]q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c'
495    b'__builtin__\ncomplex\n'
496    b'q\x01G@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00'
497    b'\x86q\x02Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xff'
498    b'J\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff'
499    b'\xff\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00a'
500    b'bcq\x04h\x04c__main__\nC\nq\x05'
501    b')\x81q\x06}q\x07(X\x03\x00\x00\x00fooq\x08K\x01'
502    b'X\x03\x00\x00\x00barq\tK\x02ubh\x06tq\nh'
503    b'\nK\x05e.'
504)
505
506# Disassembly of DATA2
507DATA2_DIS = """\
508    0: \x80 PROTO      2
509    2: ]    EMPTY_LIST
510    3: q    BINPUT     0
511    5: (    MARK
512    6: K        BININT1    0
513    8: K        BININT1    1
514   10: G        BINFLOAT   2.0
515   19: c        GLOBAL     '__builtin__ complex'
516   40: q        BINPUT     1
517   42: G        BINFLOAT   3.0
518   51: G        BINFLOAT   0.0
519   60: \x86     TUPLE2
520   61: q        BINPUT     2
521   63: R        REDUCE
522   64: q        BINPUT     3
523   66: K        BININT1    1
524   68: J        BININT     -1
525   73: K        BININT1    255
526   75: J        BININT     -255
527   80: J        BININT     -256
528   85: M        BININT2    65535
529   88: J        BININT     -65535
530   93: J        BININT     -65536
531   98: J        BININT     2147483647
532  103: J        BININT     -2147483647
533  108: J        BININT     -2147483648
534  113: (        MARK
535  114: X            BINUNICODE 'abc'
536  122: q            BINPUT     4
537  124: h            BINGET     4
538  126: c            GLOBAL     '__main__ C'
539  138: q            BINPUT     5
540  140: )            EMPTY_TUPLE
541  141: \x81         NEWOBJ
542  142: q            BINPUT     6
543  144: }            EMPTY_DICT
544  145: q            BINPUT     7
545  147: (            MARK
546  148: X                BINUNICODE 'foo'
547  156: q                BINPUT     8
548  158: K                BININT1    1
549  160: X                BINUNICODE 'bar'
550  168: q                BINPUT     9
551  170: K                BININT1    2
552  172: u                SETITEMS   (MARK at 147)
553  173: b            BUILD
554  174: h            BINGET     6
555  176: t            TUPLE      (MARK at 113)
556  177: q        BINPUT     10
557  179: h        BINGET     10
558  181: K        BININT1    5
559  183: e        APPENDS    (MARK at 5)
560  184: .    STOP
561highest protocol among opcodes = 2
562"""
563
564DATA3 = (
565    b'\x80\x03]q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c'
566    b'builtins\ncomplex\nq\x01G'
567    b'@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00\x86q\x02'
568    b'Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xffJ\x00\xff'
569    b'\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff\xff\xff\x7f'
570    b'J\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00abcq'
571    b'\x04h\x04c__main__\nC\nq\x05)\x81q'
572    b'\x06}q\x07(X\x03\x00\x00\x00barq\x08K\x02X\x03\x00'
573    b'\x00\x00fooq\tK\x01ubh\x06tq\nh\nK\x05'
574    b'e.'
575)
576
577# Disassembly of DATA3
578DATA3_DIS = """\
579    0: \x80 PROTO      3
580    2: ]    EMPTY_LIST
581    3: q    BINPUT     0
582    5: (    MARK
583    6: K        BININT1    0
584    8: K        BININT1    1
585   10: G        BINFLOAT   2.0
586   19: c        GLOBAL     'builtins complex'
587   37: q        BINPUT     1
588   39: G        BINFLOAT   3.0
589   48: G        BINFLOAT   0.0
590   57: \x86     TUPLE2
591   58: q        BINPUT     2
592   60: R        REDUCE
593   61: q        BINPUT     3
594   63: K        BININT1    1
595   65: J        BININT     -1
596   70: K        BININT1    255
597   72: J        BININT     -255
598   77: J        BININT     -256
599   82: M        BININT2    65535
600   85: J        BININT     -65535
601   90: J        BININT     -65536
602   95: J        BININT     2147483647
603  100: J        BININT     -2147483647
604  105: J        BININT     -2147483648
605  110: (        MARK
606  111: X            BINUNICODE 'abc'
607  119: q            BINPUT     4
608  121: h            BINGET     4
609  123: c            GLOBAL     '__main__ C'
610  135: q            BINPUT     5
611  137: )            EMPTY_TUPLE
612  138: \x81         NEWOBJ
613  139: q            BINPUT     6
614  141: }            EMPTY_DICT
615  142: q            BINPUT     7
616  144: (            MARK
617  145: X                BINUNICODE 'bar'
618  153: q                BINPUT     8
619  155: K                BININT1    2
620  157: X                BINUNICODE 'foo'
621  165: q                BINPUT     9
622  167: K                BININT1    1
623  169: u                SETITEMS   (MARK at 144)
624  170: b            BUILD
625  171: h            BINGET     6
626  173: t            TUPLE      (MARK at 110)
627  174: q        BINPUT     10
628  176: h        BINGET     10
629  178: K        BININT1    5
630  180: e        APPENDS    (MARK at 5)
631  181: .    STOP
632highest protocol among opcodes = 2
633"""
634
635DATA4 = (
636    b'\x80\x04\x95\xa8\x00\x00\x00\x00\x00\x00\x00]\x94(K\x00K\x01G@'
637    b'\x00\x00\x00\x00\x00\x00\x00\x8c\x08builtins\x94\x8c\x07'
638    b'complex\x94\x93\x94G@\x08\x00\x00\x00\x00\x00\x00G'
639    b'\x00\x00\x00\x00\x00\x00\x00\x00\x86\x94R\x94K\x01J\xff\xff\xff\xffK'
640    b'\xffJ\x01\xff\xff\xffJ\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ'
641    b'\x00\x00\xff\xffJ\xff\xff\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80('
642    b'\x8c\x03abc\x94h\x06\x8c\x08__main__\x94\x8c'
643    b'\x01C\x94\x93\x94)\x81\x94}\x94(\x8c\x03bar\x94K\x02\x8c'
644    b'\x03foo\x94K\x01ubh\nt\x94h\x0eK\x05e.'
645)
646
647# Disassembly of DATA4
648DATA4_DIS = """\
649    0: \x80 PROTO      4
650    2: \x95 FRAME      168
651   11: ]    EMPTY_LIST
652   12: \x94 MEMOIZE
653   13: (    MARK
654   14: K        BININT1    0
655   16: K        BININT1    1
656   18: G        BINFLOAT   2.0
657   27: \x8c     SHORT_BINUNICODE 'builtins'
658   37: \x94     MEMOIZE
659   38: \x8c     SHORT_BINUNICODE 'complex'
660   47: \x94     MEMOIZE
661   48: \x93     STACK_GLOBAL
662   49: \x94     MEMOIZE
663   50: G        BINFLOAT   3.0
664   59: G        BINFLOAT   0.0
665   68: \x86     TUPLE2
666   69: \x94     MEMOIZE
667   70: R        REDUCE
668   71: \x94     MEMOIZE
669   72: K        BININT1    1
670   74: J        BININT     -1
671   79: K        BININT1    255
672   81: J        BININT     -255
673   86: J        BININT     -256
674   91: M        BININT2    65535
675   94: J        BININT     -65535
676   99: J        BININT     -65536
677  104: J        BININT     2147483647
678  109: J        BININT     -2147483647
679  114: J        BININT     -2147483648
680  119: (        MARK
681  120: \x8c         SHORT_BINUNICODE 'abc'
682  125: \x94         MEMOIZE
683  126: h            BINGET     6
684  128: \x8c         SHORT_BINUNICODE '__main__'
685  138: \x94         MEMOIZE
686  139: \x8c         SHORT_BINUNICODE 'C'
687  142: \x94         MEMOIZE
688  143: \x93         STACK_GLOBAL
689  144: \x94         MEMOIZE
690  145: )            EMPTY_TUPLE
691  146: \x81         NEWOBJ
692  147: \x94         MEMOIZE
693  148: }            EMPTY_DICT
694  149: \x94         MEMOIZE
695  150: (            MARK
696  151: \x8c             SHORT_BINUNICODE 'bar'
697  156: \x94             MEMOIZE
698  157: K                BININT1    2
699  159: \x8c             SHORT_BINUNICODE 'foo'
700  164: \x94             MEMOIZE
701  165: K                BININT1    1
702  167: u                SETITEMS   (MARK at 150)
703  168: b            BUILD
704  169: h            BINGET     10
705  171: t            TUPLE      (MARK at 119)
706  172: \x94     MEMOIZE
707  173: h        BINGET     14
708  175: K        BININT1    5
709  177: e        APPENDS    (MARK at 13)
710  178: .    STOP
711highest protocol among opcodes = 4
712"""
713
714# set([1,2]) pickled from 2.x with protocol 2
715DATA_SET = b'\x80\x02c__builtin__\nset\nq\x00]q\x01(K\x01K\x02e\x85q\x02Rq\x03.'
716
717# xrange(5) pickled from 2.x with protocol 2
718DATA_XRANGE = b'\x80\x02c__builtin__\nxrange\nq\x00K\x00K\x05K\x01\x87q\x01Rq\x02.'
719
720# a SimpleCookie() object pickled from 2.x with protocol 2
721DATA_COOKIE = (b'\x80\x02cCookie\nSimpleCookie\nq\x00)\x81q\x01U\x03key'
722               b'q\x02cCookie\nMorsel\nq\x03)\x81q\x04(U\x07commentq\x05U'
723               b'\x00q\x06U\x06domainq\x07h\x06U\x06secureq\x08h\x06U\x07'
724               b'expiresq\th\x06U\x07max-ageq\nh\x06U\x07versionq\x0bh\x06U'
725               b'\x04pathq\x0ch\x06U\x08httponlyq\rh\x06u}q\x0e(U\x0b'
726               b'coded_valueq\x0fU\x05valueq\x10h\x10h\x10h\x02h\x02ubs}q\x11b.')
727
728# set([3]) pickled from 2.x with protocol 2
729DATA_SET2 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01K\x03a\x85q\x02Rq\x03.'
730
731python2_exceptions_without_args = (
732    ArithmeticError,
733    AssertionError,
734    AttributeError,
735    BaseException,
736    BufferError,
737    BytesWarning,
738    DeprecationWarning,
739    EOFError,
740    EnvironmentError,
741    Exception,
742    FloatingPointError,
743    FutureWarning,
744    GeneratorExit,
745    IOError,
746    ImportError,
747    ImportWarning,
748    IndentationError,
749    IndexError,
750    KeyError,
751    KeyboardInterrupt,
752    LookupError,
753    MemoryError,
754    NameError,
755    NotImplementedError,
756    OSError,
757    OverflowError,
758    PendingDeprecationWarning,
759    ReferenceError,
760    RuntimeError,
761    RuntimeWarning,
762    # StandardError is gone in Python 3, we map it to Exception
763    StopIteration,
764    SyntaxError,
765    SyntaxWarning,
766    SystemError,
767    SystemExit,
768    TabError,
769    TypeError,
770    UnboundLocalError,
771    UnicodeError,
772    UnicodeWarning,
773    UserWarning,
774    ValueError,
775    Warning,
776    ZeroDivisionError,
777)
778
779exception_pickle = b'\x80\x02cexceptions\n?\nq\x00)Rq\x01.'
780
781# UnicodeEncodeError object pickled from 2.x with protocol 2
782DATA_UEERR = (b'\x80\x02cexceptions\nUnicodeEncodeError\n'
783              b'q\x00(U\x05asciiq\x01X\x03\x00\x00\x00fooq\x02K\x00K\x01'
784              b'U\x03badq\x03tq\x04Rq\x05.')
785
786
787def create_data():
788    c = C()
789    c.foo = 1
790    c.bar = 2
791    x = [0, 1, 2.0, 3.0+0j]
792    # Append some integer test cases at cPickle.c's internal size
793    # cutoffs.
794    uint1max = 0xff
795    uint2max = 0xffff
796    int4max = 0x7fffffff
797    x.extend([1, -1,
798              uint1max, -uint1max, -uint1max-1,
799              uint2max, -uint2max, -uint2max-1,
800               int4max,  -int4max,  -int4max-1])
801    y = ('abc', 'abc', c, c)
802    x.append(y)
803    x.append(y)
804    x.append(5)
805    return x
806
807
808class AbstractUnpickleTests(unittest.TestCase):
809    # Subclass must define self.loads.
810
811    _testdata = create_data()
812
813    def assert_is_copy(self, obj, objcopy, msg=None):
814        """Utility method to verify if two objects are copies of each others.
815        """
816        if msg is None:
817            msg = "{!r} is not a copy of {!r}".format(obj, objcopy)
818        self.assertEqual(obj, objcopy, msg=msg)
819        self.assertIs(type(obj), type(objcopy), msg=msg)
820        if hasattr(obj, '__dict__'):
821            self.assertDictEqual(obj.__dict__, objcopy.__dict__, msg=msg)
822            self.assertIsNot(obj.__dict__, objcopy.__dict__, msg=msg)
823        if hasattr(obj, '__slots__'):
824            self.assertListEqual(obj.__slots__, objcopy.__slots__, msg=msg)
825            for slot in obj.__slots__:
826                self.assertEqual(
827                    hasattr(obj, slot), hasattr(objcopy, slot), msg=msg)
828                self.assertEqual(getattr(obj, slot, None),
829                                 getattr(objcopy, slot, None), msg=msg)
830
831    def check_unpickling_error(self, errors, data):
832        with self.subTest(data=data), \
833             self.assertRaises(errors):
834            try:
835                self.loads(data)
836            except BaseException as exc:
837                if support.verbose > 1:
838                    print('%-32r - %s: %s' %
839                          (data, exc.__class__.__name__, exc))
840                raise
841
842    def test_load_from_data0(self):
843        self.assert_is_copy(self._testdata, self.loads(DATA0))
844
845    def test_load_from_data1(self):
846        self.assert_is_copy(self._testdata, self.loads(DATA1))
847
848    def test_load_from_data2(self):
849        self.assert_is_copy(self._testdata, self.loads(DATA2))
850
851    def test_load_from_data3(self):
852        self.assert_is_copy(self._testdata, self.loads(DATA3))
853
854    def test_load_from_data4(self):
855        self.assert_is_copy(self._testdata, self.loads(DATA4))
856
857    def test_load_classic_instance(self):
858        # See issue5180.  Test loading 2.x pickles that
859        # contain an instance of old style class.
860        for X, args in [(C, ()), (D, ('x',)), (E, ())]:
861            xname = X.__name__.encode('ascii')
862            # Protocol 0 (text mode pickle):
863            """
864             0: (    MARK
865             1: i        INST       '__main__ X' (MARK at 0)
866            13: p    PUT        0
867            16: (    MARK
868            17: d        DICT       (MARK at 16)
869            18: p    PUT        1
870            21: b    BUILD
871            22: .    STOP
872            """
873            pickle0 = (b"(i__main__\n"
874                       b"X\n"
875                       b"p0\n"
876                       b"(dp1\nb.").replace(b'X', xname)
877            self.assert_is_copy(X(*args), self.loads(pickle0))
878
879            # Protocol 1 (binary mode pickle)
880            """
881             0: (    MARK
882             1: c        GLOBAL     '__main__ X'
883            13: q        BINPUT     0
884            15: o        OBJ        (MARK at 0)
885            16: q    BINPUT     1
886            18: }    EMPTY_DICT
887            19: q    BINPUT     2
888            21: b    BUILD
889            22: .    STOP
890            """
891            pickle1 = (b'(c__main__\n'
892                       b'X\n'
893                       b'q\x00oq\x01}q\x02b.').replace(b'X', xname)
894            self.assert_is_copy(X(*args), self.loads(pickle1))
895
896            # Protocol 2 (pickle2 = b'\x80\x02' + pickle1)
897            """
898             0: \x80 PROTO      2
899             2: (    MARK
900             3: c        GLOBAL     '__main__ X'
901            15: q        BINPUT     0
902            17: o        OBJ        (MARK at 2)
903            18: q    BINPUT     1
904            20: }    EMPTY_DICT
905            21: q    BINPUT     2
906            23: b    BUILD
907            24: .    STOP
908            """
909            pickle2 = (b'\x80\x02(c__main__\n'
910                       b'X\n'
911                       b'q\x00oq\x01}q\x02b.').replace(b'X', xname)
912            self.assert_is_copy(X(*args), self.loads(pickle2))
913
914    def test_maxint64(self):
915        maxint64 = (1 << 63) - 1
916        data = b'I' + str(maxint64).encode("ascii") + b'\n.'
917        got = self.loads(data)
918        self.assert_is_copy(maxint64, got)
919
920        # Try too with a bogus literal.
921        data = b'I' + str(maxint64).encode("ascii") + b'JUNK\n.'
922        self.check_unpickling_error(ValueError, data)
923
924    def test_unpickle_from_2x(self):
925        # Unpickle non-trivial data from Python 2.x.
926        loaded = self.loads(DATA_SET)
927        self.assertEqual(loaded, set([1, 2]))
928        loaded = self.loads(DATA_XRANGE)
929        self.assertEqual(type(loaded), type(range(0)))
930        self.assertEqual(list(loaded), list(range(5)))
931        loaded = self.loads(DATA_COOKIE)
932        self.assertEqual(type(loaded), SimpleCookie)
933        self.assertEqual(list(loaded.keys()), ["key"])
934        self.assertEqual(loaded["key"].value, "value")
935
936        # Exception objects without arguments pickled from 2.x with protocol 2
937        for exc in python2_exceptions_without_args:
938            data = exception_pickle.replace(b'?', exc.__name__.encode("ascii"))
939            loaded = self.loads(data)
940            self.assertIs(type(loaded), exc)
941
942        # StandardError is mapped to Exception, test that separately
943        loaded = self.loads(exception_pickle.replace(b'?', b'StandardError'))
944        self.assertIs(type(loaded), Exception)
945
946        loaded = self.loads(DATA_UEERR)
947        self.assertIs(type(loaded), UnicodeEncodeError)
948        self.assertEqual(loaded.object, "foo")
949        self.assertEqual(loaded.encoding, "ascii")
950        self.assertEqual(loaded.start, 0)
951        self.assertEqual(loaded.end, 1)
952        self.assertEqual(loaded.reason, "bad")
953
954    def test_load_python2_str_as_bytes(self):
955        # From Python 2: pickle.dumps('a\x00\xa0', protocol=0)
956        self.assertEqual(self.loads(b"S'a\\x00\\xa0'\n.",
957                                    encoding="bytes"), b'a\x00\xa0')
958        # From Python 2: pickle.dumps('a\x00\xa0', protocol=1)
959        self.assertEqual(self.loads(b'U\x03a\x00\xa0.',
960                                    encoding="bytes"), b'a\x00\xa0')
961        # From Python 2: pickle.dumps('a\x00\xa0', protocol=2)
962        self.assertEqual(self.loads(b'\x80\x02U\x03a\x00\xa0.',
963                                    encoding="bytes"), b'a\x00\xa0')
964
965    def test_load_python2_unicode_as_str(self):
966        # From Python 2: pickle.dumps(u'Ï€', protocol=0)
967        self.assertEqual(self.loads(b'V\\u03c0\n.',
968                                    encoding='bytes'), 'Ï€')
969        # From Python 2: pickle.dumps(u'Ï€', protocol=1)
970        self.assertEqual(self.loads(b'X\x02\x00\x00\x00\xcf\x80.',
971                                    encoding="bytes"), 'Ï€')
972        # From Python 2: pickle.dumps(u'Ï€', protocol=2)
973        self.assertEqual(self.loads(b'\x80\x02X\x02\x00\x00\x00\xcf\x80.',
974                                    encoding="bytes"), 'Ï€')
975
976    def test_load_long_python2_str_as_bytes(self):
977        # From Python 2: pickle.dumps('x' * 300, protocol=1)
978        self.assertEqual(self.loads(pickle.BINSTRING +
979                                    struct.pack("<I", 300) +
980                                    b'x' * 300 + pickle.STOP,
981                                    encoding='bytes'), b'x' * 300)
982
983    def test_constants(self):
984        self.assertIsNone(self.loads(b'N.'))
985        self.assertIs(self.loads(b'\x88.'), True)
986        self.assertIs(self.loads(b'\x89.'), False)
987        self.assertIs(self.loads(b'I01\n.'), True)
988        self.assertIs(self.loads(b'I00\n.'), False)
989
990    def test_empty_bytestring(self):
991        # issue 11286
992        empty = self.loads(b'\x80\x03U\x00q\x00.', encoding='koi8-r')
993        self.assertEqual(empty, '')
994
995    def test_short_binbytes(self):
996        dumped = b'\x80\x03C\x04\xe2\x82\xac\x00.'
997        self.assertEqual(self.loads(dumped), b'\xe2\x82\xac\x00')
998
999    def test_binbytes(self):
1000        dumped = b'\x80\x03B\x04\x00\x00\x00\xe2\x82\xac\x00.'
1001        self.assertEqual(self.loads(dumped), b'\xe2\x82\xac\x00')
1002
1003    @requires_32b
1004    def test_negative_32b_binbytes(self):
1005        # On 32-bit builds, a BINBYTES of 2**31 or more is refused
1006        dumped = b'\x80\x03B\xff\xff\xff\xffxyzq\x00.'
1007        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
1008                                    dumped)
1009
1010    @requires_32b
1011    def test_negative_32b_binunicode(self):
1012        # On 32-bit builds, a BINUNICODE of 2**31 or more is refused
1013        dumped = b'\x80\x03X\xff\xff\xff\xffxyzq\x00.'
1014        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
1015                                    dumped)
1016
1017    def test_short_binunicode(self):
1018        dumped = b'\x80\x04\x8c\x04\xe2\x82\xac\x00.'
1019        self.assertEqual(self.loads(dumped), '\u20ac\x00')
1020
1021    def test_misc_get(self):
1022        self.check_unpickling_error(KeyError, b'g0\np0')
1023        self.assert_is_copy([(100,), (100,)],
1024                            self.loads(b'((Kdtp0\nh\x00l.))'))
1025
1026    def test_binbytes8(self):
1027        dumped = b'\x80\x04\x8e\4\0\0\0\0\0\0\0\xe2\x82\xac\x00.'
1028        self.assertEqual(self.loads(dumped), b'\xe2\x82\xac\x00')
1029
1030    def test_binunicode8(self):
1031        dumped = b'\x80\x04\x8d\4\0\0\0\0\0\0\0\xe2\x82\xac\x00.'
1032        self.assertEqual(self.loads(dumped), '\u20ac\x00')
1033
1034    def test_bytearray8(self):
1035        dumped = b'\x80\x05\x96\x03\x00\x00\x00\x00\x00\x00\x00xxx.'
1036        self.assertEqual(self.loads(dumped), bytearray(b'xxx'))
1037
1038    @requires_32b
1039    def test_large_32b_binbytes8(self):
1040        dumped = b'\x80\x04\x8e\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
1041        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
1042                                    dumped)
1043
1044    @requires_32b
1045    def test_large_32b_bytearray8(self):
1046        dumped = b'\x80\x05\x96\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
1047        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
1048                                    dumped)
1049
1050    @requires_32b
1051    def test_large_32b_binunicode8(self):
1052        dumped = b'\x80\x04\x8d\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
1053        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
1054                                    dumped)
1055
1056    def test_get(self):
1057        pickled = b'((lp100000\ng100000\nt.'
1058        unpickled = self.loads(pickled)
1059        self.assertEqual(unpickled, ([],)*2)
1060        self.assertIs(unpickled[0], unpickled[1])
1061
1062    def test_binget(self):
1063        pickled = b'(]q\xffh\xfft.'
1064        unpickled = self.loads(pickled)
1065        self.assertEqual(unpickled, ([],)*2)
1066        self.assertIs(unpickled[0], unpickled[1])
1067
1068    def test_long_binget(self):
1069        pickled = b'(]r\x00\x00\x01\x00j\x00\x00\x01\x00t.'
1070        unpickled = self.loads(pickled)
1071        self.assertEqual(unpickled, ([],)*2)
1072        self.assertIs(unpickled[0], unpickled[1])
1073
1074    def test_dup(self):
1075        pickled = b'((l2t.'
1076        unpickled = self.loads(pickled)
1077        self.assertEqual(unpickled, ([],)*2)
1078        self.assertIs(unpickled[0], unpickled[1])
1079
1080    def test_negative_put(self):
1081        # Issue #12847
1082        dumped = b'Va\np-1\n.'
1083        self.check_unpickling_error(ValueError, dumped)
1084
1085    @requires_32b
1086    def test_negative_32b_binput(self):
1087        # Issue #12847
1088        dumped = b'\x80\x03X\x01\x00\x00\x00ar\xff\xff\xff\xff.'
1089        self.check_unpickling_error(ValueError, dumped)
1090
1091    def test_badly_escaped_string(self):
1092        self.check_unpickling_error(ValueError, b"S'\\'\n.")
1093
1094    def test_badly_quoted_string(self):
1095        # Issue #17710
1096        badpickles = [b"S'\n.",
1097                      b'S"\n.',
1098                      b'S\' \n.',
1099                      b'S" \n.',
1100                      b'S\'"\n.',
1101                      b'S"\'\n.',
1102                      b"S' ' \n.",
1103                      b'S" " \n.',
1104                      b"S ''\n.",
1105                      b'S ""\n.',
1106                      b'S \n.',
1107                      b'S\n.',
1108                      b'S.']
1109        for p in badpickles:
1110            self.check_unpickling_error(pickle.UnpicklingError, p)
1111
1112    def test_correctly_quoted_string(self):
1113        goodpickles = [(b"S''\n.", ''),
1114                       (b'S""\n.', ''),
1115                       (b'S"\\n"\n.', '\n'),
1116                       (b"S'\\n'\n.", '\n')]
1117        for p, expected in goodpickles:
1118            self.assertEqual(self.loads(p), expected)
1119
1120    def test_frame_readline(self):
1121        pickled = b'\x80\x04\x95\x05\x00\x00\x00\x00\x00\x00\x00I42\n.'
1122        #    0: \x80 PROTO      4
1123        #    2: \x95 FRAME      5
1124        #   11: I    INT        42
1125        #   15: .    STOP
1126        self.assertEqual(self.loads(pickled), 42)
1127
1128    def test_compat_unpickle(self):
1129        # xrange(1, 7)
1130        pickled = b'\x80\x02c__builtin__\nxrange\nK\x01K\x07K\x01\x87R.'
1131        unpickled = self.loads(pickled)
1132        self.assertIs(type(unpickled), range)
1133        self.assertEqual(unpickled, range(1, 7))
1134        self.assertEqual(list(unpickled), [1, 2, 3, 4, 5, 6])
1135        # reduce
1136        pickled = b'\x80\x02c__builtin__\nreduce\n.'
1137        self.assertIs(self.loads(pickled), functools.reduce)
1138        # whichdb.whichdb
1139        pickled = b'\x80\x02cwhichdb\nwhichdb\n.'
1140        self.assertIs(self.loads(pickled), dbm.whichdb)
1141        # Exception(), StandardError()
1142        for name in (b'Exception', b'StandardError'):
1143            pickled = (b'\x80\x02cexceptions\n' + name + b'\nU\x03ugh\x85R.')
1144            unpickled = self.loads(pickled)
1145            self.assertIs(type(unpickled), Exception)
1146            self.assertEqual(str(unpickled), 'ugh')
1147        # UserDict.UserDict({1: 2}), UserDict.IterableUserDict({1: 2})
1148        for name in (b'UserDict', b'IterableUserDict'):
1149            pickled = (b'\x80\x02(cUserDict\n' + name +
1150                       b'\no}U\x04data}K\x01K\x02ssb.')
1151            unpickled = self.loads(pickled)
1152            self.assertIs(type(unpickled), collections.UserDict)
1153            self.assertEqual(unpickled, collections.UserDict({1: 2}))
1154
1155    def test_bad_stack(self):
1156        badpickles = [
1157            b'.',                       # STOP
1158            b'0',                       # POP
1159            b'1',                       # POP_MARK
1160            b'2',                       # DUP
1161            b'(2',
1162            b'R',                       # REDUCE
1163            b')R',
1164            b'a',                       # APPEND
1165            b'Na',
1166            b'b',                       # BUILD
1167            b'Nb',
1168            b'd',                       # DICT
1169            b'e',                       # APPENDS
1170            b'(e',
1171            b'ibuiltins\nlist\n',       # INST
1172            b'l',                       # LIST
1173            b'o',                       # OBJ
1174            b'(o',
1175            b'p1\n',                    # PUT
1176            b'q\x00',                   # BINPUT
1177            b'r\x00\x00\x00\x00',       # LONG_BINPUT
1178            b's',                       # SETITEM
1179            b'Ns',
1180            b'NNs',
1181            b't',                       # TUPLE
1182            b'u',                       # SETITEMS
1183            b'(u',
1184            b'}(Nu',
1185            b'\x81',                    # NEWOBJ
1186            b')\x81',
1187            b'\x85',                    # TUPLE1
1188            b'\x86',                    # TUPLE2
1189            b'N\x86',
1190            b'\x87',                    # TUPLE3
1191            b'N\x87',
1192            b'NN\x87',
1193            b'\x90',                    # ADDITEMS
1194            b'(\x90',
1195            b'\x91',                    # FROZENSET
1196            b'\x92',                    # NEWOBJ_EX
1197            b')}\x92',
1198            b'\x93',                    # STACK_GLOBAL
1199            b'Vlist\n\x93',
1200            b'\x94',                    # MEMOIZE
1201        ]
1202        for p in badpickles:
1203            self.check_unpickling_error(self.bad_stack_errors, p)
1204
1205    def test_bad_mark(self):
1206        badpickles = [
1207            b'N(.',                     # STOP
1208            b'N(2',                     # DUP
1209            b'cbuiltins\nlist\n)(R',    # REDUCE
1210            b'cbuiltins\nlist\n()R',
1211            b']N(a',                    # APPEND
1212                                        # BUILD
1213            b'cbuiltins\nValueError\n)R}(b',
1214            b'cbuiltins\nValueError\n)R(}b',
1215            b'(Nd',                     # DICT
1216            b'N(p1\n',                  # PUT
1217            b'N(q\x00',                 # BINPUT
1218            b'N(r\x00\x00\x00\x00',     # LONG_BINPUT
1219            b'}NN(s',                   # SETITEM
1220            b'}N(Ns',
1221            b'}(NNs',
1222            b'}((u',                    # SETITEMS
1223            b'cbuiltins\nlist\n)(\x81', # NEWOBJ
1224            b'cbuiltins\nlist\n()\x81',
1225            b'N(\x85',                  # TUPLE1
1226            b'NN(\x86',                 # TUPLE2
1227            b'N(N\x86',
1228            b'NNN(\x87',                # TUPLE3
1229            b'NN(N\x87',
1230            b'N(NN\x87',
1231            b']((\x90',                 # ADDITEMS
1232                                        # NEWOBJ_EX
1233            b'cbuiltins\nlist\n)}(\x92',
1234            b'cbuiltins\nlist\n)(}\x92',
1235            b'cbuiltins\nlist\n()}\x92',
1236                                        # STACK_GLOBAL
1237            b'Vbuiltins\n(Vlist\n\x93',
1238            b'Vbuiltins\nVlist\n(\x93',
1239            b'N(\x94',                  # MEMOIZE
1240        ]
1241        for p in badpickles:
1242            self.check_unpickling_error(self.bad_stack_errors, p)
1243
1244    def test_truncated_data(self):
1245        self.check_unpickling_error(EOFError, b'')
1246        self.check_unpickling_error(EOFError, b'N')
1247        badpickles = [
1248            b'B',                       # BINBYTES
1249            b'B\x03\x00\x00',
1250            b'B\x03\x00\x00\x00',
1251            b'B\x03\x00\x00\x00ab',
1252            b'C',                       # SHORT_BINBYTES
1253            b'C\x03',
1254            b'C\x03ab',
1255            b'F',                       # FLOAT
1256            b'F0.0',
1257            b'F0.00',
1258            b'G',                       # BINFLOAT
1259            b'G\x00\x00\x00\x00\x00\x00\x00',
1260            b'I',                       # INT
1261            b'I0',
1262            b'J',                       # BININT
1263            b'J\x00\x00\x00',
1264            b'K',                       # BININT1
1265            b'L',                       # LONG
1266            b'L0',
1267            b'L10',
1268            b'L0L',
1269            b'L10L',
1270            b'M',                       # BININT2
1271            b'M\x00',
1272            # b'P',                       # PERSID
1273            # b'Pabc',
1274            b'S',                       # STRING
1275            b"S'abc'",
1276            b'T',                       # BINSTRING
1277            b'T\x03\x00\x00',
1278            b'T\x03\x00\x00\x00',
1279            b'T\x03\x00\x00\x00ab',
1280            b'U',                       # SHORT_BINSTRING
1281            b'U\x03',
1282            b'U\x03ab',
1283            b'V',                       # UNICODE
1284            b'Vabc',
1285            b'X',                       # BINUNICODE
1286            b'X\x03\x00\x00',
1287            b'X\x03\x00\x00\x00',
1288            b'X\x03\x00\x00\x00ab',
1289            b'(c',                      # GLOBAL
1290            b'(cbuiltins',
1291            b'(cbuiltins\n',
1292            b'(cbuiltins\nlist',
1293            b'Ng',                      # GET
1294            b'Ng0',
1295            b'(i',                      # INST
1296            b'(ibuiltins',
1297            b'(ibuiltins\n',
1298            b'(ibuiltins\nlist',
1299            b'Nh',                      # BINGET
1300            b'Nj',                      # LONG_BINGET
1301            b'Nj\x00\x00\x00',
1302            b'Np',                      # PUT
1303            b'Np0',
1304            b'Nq',                      # BINPUT
1305            b'Nr',                      # LONG_BINPUT
1306            b'Nr\x00\x00\x00',
1307            b'\x80',                    # PROTO
1308            b'\x82',                    # EXT1
1309            b'\x83',                    # EXT2
1310            b'\x84\x01',
1311            b'\x84',                    # EXT4
1312            b'\x84\x01\x00\x00',
1313            b'\x8a',                    # LONG1
1314            b'\x8b',                    # LONG4
1315            b'\x8b\x00\x00\x00',
1316            b'\x8c',                    # SHORT_BINUNICODE
1317            b'\x8c\x03',
1318            b'\x8c\x03ab',
1319            b'\x8d',                    # BINUNICODE8
1320            b'\x8d\x03\x00\x00\x00\x00\x00\x00',
1321            b'\x8d\x03\x00\x00\x00\x00\x00\x00\x00',
1322            b'\x8d\x03\x00\x00\x00\x00\x00\x00\x00ab',
1323            b'\x8e',                    # BINBYTES8
1324            b'\x8e\x03\x00\x00\x00\x00\x00\x00',
1325            b'\x8e\x03\x00\x00\x00\x00\x00\x00\x00',
1326            b'\x8e\x03\x00\x00\x00\x00\x00\x00\x00ab',
1327            b'\x96',                    # BYTEARRAY8
1328            b'\x96\x03\x00\x00\x00\x00\x00\x00',
1329            b'\x96\x03\x00\x00\x00\x00\x00\x00\x00',
1330            b'\x96\x03\x00\x00\x00\x00\x00\x00\x00ab',
1331            b'\x95',                    # FRAME
1332            b'\x95\x02\x00\x00\x00\x00\x00\x00',
1333            b'\x95\x02\x00\x00\x00\x00\x00\x00\x00',
1334            b'\x95\x02\x00\x00\x00\x00\x00\x00\x00N',
1335        ]
1336        for p in badpickles:
1337            self.check_unpickling_error(self.truncated_errors, p)
1338
1339    @reap_threads
1340    def test_unpickle_module_race(self):
1341        # https://bugs.python.org/issue34572
1342        locker_module = dedent("""
1343        import threading
1344        barrier = threading.Barrier(2)
1345        """)
1346        locking_import_module = dedent("""
1347        import locker
1348        locker.barrier.wait()
1349        class ToBeUnpickled(object):
1350            pass
1351        """)
1352
1353        os.mkdir(TESTFN)
1354        self.addCleanup(shutil.rmtree, TESTFN)
1355        sys.path.insert(0, TESTFN)
1356        self.addCleanup(sys.path.remove, TESTFN)
1357        with open(os.path.join(TESTFN, "locker.py"), "wb") as f:
1358            f.write(locker_module.encode('utf-8'))
1359        with open(os.path.join(TESTFN, "locking_import.py"), "wb") as f:
1360            f.write(locking_import_module.encode('utf-8'))
1361        self.addCleanup(forget, "locker")
1362        self.addCleanup(forget, "locking_import")
1363
1364        import locker
1365
1366        pickle_bytes = (
1367            b'\x80\x03clocking_import\nToBeUnpickled\nq\x00)\x81q\x01.')
1368
1369        # Then try to unpickle two of these simultaneously
1370        # One of them will cause the module import, and we want it to block
1371        # until the other one either:
1372        #   - fails (before the patch for this issue)
1373        #   - blocks on the import lock for the module, as it should
1374        results = []
1375        barrier = threading.Barrier(3)
1376        def t():
1377            # This ensures the threads have all started
1378            # presumably barrier release is faster than thread startup
1379            barrier.wait()
1380            results.append(pickle.loads(pickle_bytes))
1381
1382        t1 = threading.Thread(target=t)
1383        t2 = threading.Thread(target=t)
1384        t1.start()
1385        t2.start()
1386
1387        barrier.wait()
1388        # could have delay here
1389        locker.barrier.wait()
1390
1391        t1.join()
1392        t2.join()
1393
1394        from locking_import import ToBeUnpickled
1395        self.assertEqual(
1396            [type(x) for x in results],
1397            [ToBeUnpickled] * 2)
1398
1399
1400
1401class AbstractPickleTests(unittest.TestCase):
1402    # Subclass must define self.dumps, self.loads.
1403
1404    optimized = False
1405
1406    _testdata = AbstractUnpickleTests._testdata
1407
1408    def setUp(self):
1409        pass
1410
1411    assert_is_copy = AbstractUnpickleTests.assert_is_copy
1412
1413    def test_misc(self):
1414        # test various datatypes not tested by testdata
1415        for proto in protocols:
1416            x = myint(4)
1417            s = self.dumps(x, proto)
1418            y = self.loads(s)
1419            self.assert_is_copy(x, y)
1420
1421            x = (1, ())
1422            s = self.dumps(x, proto)
1423            y = self.loads(s)
1424            self.assert_is_copy(x, y)
1425
1426            x = initarg(1, x)
1427            s = self.dumps(x, proto)
1428            y = self.loads(s)
1429            self.assert_is_copy(x, y)
1430
1431        # XXX test __reduce__ protocol?
1432
1433    def test_roundtrip_equality(self):
1434        expected = self._testdata
1435        for proto in protocols:
1436            s = self.dumps(expected, proto)
1437            got = self.loads(s)
1438            self.assert_is_copy(expected, got)
1439
1440    # There are gratuitous differences between pickles produced by
1441    # pickle and cPickle, largely because cPickle starts PUT indices at
1442    # 1 and pickle starts them at 0.  See XXX comment in cPickle's put2() --
1443    # there's a comment with an exclamation point there whose meaning
1444    # is a mystery.  cPickle also suppresses PUT for objects with a refcount
1445    # of 1.
1446    def dont_test_disassembly(self):
1447        from io import StringIO
1448        from pickletools import dis
1449
1450        for proto, expected in (0, DATA0_DIS), (1, DATA1_DIS):
1451            s = self.dumps(self._testdata, proto)
1452            filelike = StringIO()
1453            dis(s, out=filelike)
1454            got = filelike.getvalue()
1455            self.assertEqual(expected, got)
1456
1457    def test_recursive_list(self):
1458        l = []
1459        l.append(l)
1460        for proto in protocols:
1461            s = self.dumps(l, proto)
1462            x = self.loads(s)
1463            self.assertIsInstance(x, list)
1464            self.assertEqual(len(x), 1)
1465            self.assertIs(x[0], x)
1466
1467    def test_recursive_tuple_and_list(self):
1468        t = ([],)
1469        t[0].append(t)
1470        for proto in protocols:
1471            s = self.dumps(t, proto)
1472            x = self.loads(s)
1473            self.assertIsInstance(x, tuple)
1474            self.assertEqual(len(x), 1)
1475            self.assertIsInstance(x[0], list)
1476            self.assertEqual(len(x[0]), 1)
1477            self.assertIs(x[0][0], x)
1478
1479    def test_recursive_dict(self):
1480        d = {}
1481        d[1] = d
1482        for proto in protocols:
1483            s = self.dumps(d, proto)
1484            x = self.loads(s)
1485            self.assertIsInstance(x, dict)
1486            self.assertEqual(list(x.keys()), [1])
1487            self.assertIs(x[1], x)
1488
1489    def test_recursive_dict_key(self):
1490        d = {}
1491        k = K(d)
1492        d[k] = 1
1493        for proto in protocols:
1494            s = self.dumps(d, proto)
1495            x = self.loads(s)
1496            self.assertIsInstance(x, dict)
1497            self.assertEqual(len(x.keys()), 1)
1498            self.assertIsInstance(list(x.keys())[0], K)
1499            self.assertIs(list(x.keys())[0].value, x)
1500
1501    def test_recursive_set(self):
1502        y = set()
1503        k = K(y)
1504        y.add(k)
1505        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
1506            s = self.dumps(y, proto)
1507            x = self.loads(s)
1508            self.assertIsInstance(x, set)
1509            self.assertEqual(len(x), 1)
1510            self.assertIsInstance(list(x)[0], K)
1511            self.assertIs(list(x)[0].value, x)
1512
1513    def test_recursive_list_subclass(self):
1514        y = MyList()
1515        y.append(y)
1516        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
1517            s = self.dumps(y, proto)
1518            x = self.loads(s)
1519            self.assertIsInstance(x, MyList)
1520            self.assertEqual(len(x), 1)
1521            self.assertIs(x[0], x)
1522
1523    def test_recursive_dict_subclass(self):
1524        d = MyDict()
1525        d[1] = d
1526        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
1527            s = self.dumps(d, proto)
1528            x = self.loads(s)
1529            self.assertIsInstance(x, MyDict)
1530            self.assertEqual(list(x.keys()), [1])
1531            self.assertIs(x[1], x)
1532
1533    def test_recursive_dict_subclass_key(self):
1534        d = MyDict()
1535        k = K(d)
1536        d[k] = 1
1537        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
1538            s = self.dumps(d, proto)
1539            x = self.loads(s)
1540            self.assertIsInstance(x, MyDict)
1541            self.assertEqual(len(list(x.keys())), 1)
1542            self.assertIsInstance(list(x.keys())[0], K)
1543            self.assertIs(list(x.keys())[0].value, x)
1544
1545    def test_recursive_inst(self):
1546        i = C()
1547        i.attr = i
1548        for proto in protocols:
1549            s = self.dumps(i, proto)
1550            x = self.loads(s)
1551            self.assertIsInstance(x, C)
1552            self.assertEqual(dir(x), dir(i))
1553            self.assertIs(x.attr, x)
1554
1555    def test_recursive_multi(self):
1556        l = []
1557        d = {1:l}
1558        i = C()
1559        i.attr = d
1560        l.append(i)
1561        for proto in protocols:
1562            s = self.dumps(l, proto)
1563            x = self.loads(s)
1564            self.assertIsInstance(x, list)
1565            self.assertEqual(len(x), 1)
1566            self.assertEqual(dir(x[0]), dir(i))
1567            self.assertEqual(list(x[0].attr.keys()), [1])
1568            self.assertTrue(x[0].attr[1] is x)
1569
1570    def check_recursive_collection_and_inst(self, factory):
1571        h = H()
1572        y = factory([h])
1573        h.attr = y
1574        for proto in protocols:
1575            s = self.dumps(y, proto)
1576            x = self.loads(s)
1577            self.assertIsInstance(x, type(y))
1578            self.assertEqual(len(x), 1)
1579            self.assertIsInstance(list(x)[0], H)
1580            self.assertIs(list(x)[0].attr, x)
1581
1582    def test_recursive_list_and_inst(self):
1583        self.check_recursive_collection_and_inst(list)
1584
1585    def test_recursive_tuple_and_inst(self):
1586        self.check_recursive_collection_and_inst(tuple)
1587
1588    def test_recursive_dict_and_inst(self):
1589        self.check_recursive_collection_and_inst(dict.fromkeys)
1590
1591    def test_recursive_set_and_inst(self):
1592        self.check_recursive_collection_and_inst(set)
1593
1594    def test_recursive_frozenset_and_inst(self):
1595        self.check_recursive_collection_and_inst(frozenset)
1596
1597    def test_recursive_list_subclass_and_inst(self):
1598        self.check_recursive_collection_and_inst(MyList)
1599
1600    def test_recursive_tuple_subclass_and_inst(self):
1601        self.check_recursive_collection_and_inst(MyTuple)
1602
1603    def test_recursive_dict_subclass_and_inst(self):
1604        self.check_recursive_collection_and_inst(MyDict.fromkeys)
1605
1606    def test_recursive_set_subclass_and_inst(self):
1607        self.check_recursive_collection_and_inst(MySet)
1608
1609    def test_recursive_frozenset_subclass_and_inst(self):
1610        self.check_recursive_collection_and_inst(MyFrozenSet)
1611
1612    def test_unicode(self):
1613        endcases = ['', '<\\u>', '<\\\u1234>', '<\n>',
1614                    '<\\>', '<\\\U00012345>',
1615                    # surrogates
1616                    '<\udc80>']
1617        for proto in protocols:
1618            for u in endcases:
1619                p = self.dumps(u, proto)
1620                u2 = self.loads(p)
1621                self.assert_is_copy(u, u2)
1622
1623    def test_unicode_high_plane(self):
1624        t = '\U00012345'
1625        for proto in protocols:
1626            p = self.dumps(t, proto)
1627            t2 = self.loads(p)
1628            self.assert_is_copy(t, t2)
1629
1630    def test_bytes(self):
1631        for proto in protocols:
1632            for s in b'', b'xyz', b'xyz'*100:
1633                p = self.dumps(s, proto)
1634                self.assert_is_copy(s, self.loads(p))
1635            for s in [bytes([i]) for i in range(256)]:
1636                p = self.dumps(s, proto)
1637                self.assert_is_copy(s, self.loads(p))
1638            for s in [bytes([i, i]) for i in range(256)]:
1639                p = self.dumps(s, proto)
1640                self.assert_is_copy(s, self.loads(p))
1641
1642    def test_bytearray(self):
1643        for proto in protocols:
1644            for s in b'', b'xyz', b'xyz'*100:
1645                b = bytearray(s)
1646                p = self.dumps(b, proto)
1647                bb = self.loads(p)
1648                self.assertIsNot(bb, b)
1649                self.assert_is_copy(b, bb)
1650                if proto <= 3:
1651                    # bytearray is serialized using a global reference
1652                    self.assertIn(b'bytearray', p)
1653                    self.assertTrue(opcode_in_pickle(pickle.GLOBAL, p))
1654                elif proto == 4:
1655                    self.assertIn(b'bytearray', p)
1656                    self.assertTrue(opcode_in_pickle(pickle.STACK_GLOBAL, p))
1657                elif proto == 5:
1658                    self.assertNotIn(b'bytearray', p)
1659                    self.assertTrue(opcode_in_pickle(pickle.BYTEARRAY8, p))
1660
1661    def test_ints(self):
1662        for proto in protocols:
1663            n = sys.maxsize
1664            while n:
1665                for expected in (-n, n):
1666                    s = self.dumps(expected, proto)
1667                    n2 = self.loads(s)
1668                    self.assert_is_copy(expected, n2)
1669                n = n >> 1
1670
1671    def test_long(self):
1672        for proto in protocols:
1673            # 256 bytes is where LONG4 begins.
1674            for nbits in 1, 8, 8*254, 8*255, 8*256, 8*257:
1675                nbase = 1 << nbits
1676                for npos in nbase-1, nbase, nbase+1:
1677                    for n in npos, -npos:
1678                        pickle = self.dumps(n, proto)
1679                        got = self.loads(pickle)
1680                        self.assert_is_copy(n, got)
1681        # Try a monster.  This is quadratic-time in protos 0 & 1, so don't
1682        # bother with those.
1683        nbase = int("deadbeeffeedface", 16)
1684        nbase += nbase << 1000000
1685        for n in nbase, -nbase:
1686            p = self.dumps(n, 2)
1687            got = self.loads(p)
1688            # assert_is_copy is very expensive here as it precomputes
1689            # a failure message by computing the repr() of n and got,
1690            # we just do the check ourselves.
1691            self.assertIs(type(got), int)
1692            self.assertEqual(n, got)
1693
1694    def test_float(self):
1695        test_values = [0.0, 4.94e-324, 1e-310, 7e-308, 6.626e-34, 0.1, 0.5,
1696                       3.14, 263.44582062374053, 6.022e23, 1e30]
1697        test_values = test_values + [-x for x in test_values]
1698        for proto in protocols:
1699            for value in test_values:
1700                pickle = self.dumps(value, proto)
1701                got = self.loads(pickle)
1702                self.assert_is_copy(value, got)
1703
1704    @run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
1705    def test_float_format(self):
1706        # make sure that floats are formatted locale independent with proto 0
1707        self.assertEqual(self.dumps(1.2, 0)[0:3], b'F1.')
1708
1709    def test_reduce(self):
1710        for proto in protocols:
1711            inst = AAA()
1712            dumped = self.dumps(inst, proto)
1713            loaded = self.loads(dumped)
1714            self.assertEqual(loaded, REDUCE_A)
1715
1716    def test_getinitargs(self):
1717        for proto in protocols:
1718            inst = initarg(1, 2)
1719            dumped = self.dumps(inst, proto)
1720            loaded = self.loads(dumped)
1721            self.assert_is_copy(inst, loaded)
1722
1723    def test_metaclass(self):
1724        a = use_metaclass()
1725        for proto in protocols:
1726            s = self.dumps(a, proto)
1727            b = self.loads(s)
1728            self.assertEqual(a.__class__, b.__class__)
1729
1730    def test_dynamic_class(self):
1731        a = create_dynamic_class("my_dynamic_class", (object,))
1732        copyreg.pickle(pickling_metaclass, pickling_metaclass.__reduce__)
1733        for proto in protocols:
1734            s = self.dumps(a, proto)
1735            b = self.loads(s)
1736            self.assertEqual(a, b)
1737            self.assertIs(type(a), type(b))
1738
1739    def test_structseq(self):
1740        import time
1741        import os
1742
1743        t = time.localtime()
1744        for proto in protocols:
1745            s = self.dumps(t, proto)
1746            u = self.loads(s)
1747            self.assert_is_copy(t, u)
1748            t = os.stat(os.curdir)
1749            s = self.dumps(t, proto)
1750            u = self.loads(s)
1751            self.assert_is_copy(t, u)
1752            if hasattr(os, "statvfs"):
1753                t = os.statvfs(os.curdir)
1754                s = self.dumps(t, proto)
1755                u = self.loads(s)
1756                self.assert_is_copy(t, u)
1757
1758    def test_ellipsis(self):
1759        for proto in protocols:
1760            s = self.dumps(..., proto)
1761            u = self.loads(s)
1762            self.assertIs(..., u)
1763
1764    def test_notimplemented(self):
1765        for proto in protocols:
1766            s = self.dumps(NotImplemented, proto)
1767            u = self.loads(s)
1768            self.assertIs(NotImplemented, u)
1769
1770    def test_singleton_types(self):
1771        # Issue #6477: Test that types of built-in singletons can be pickled.
1772        singletons = [None, ..., NotImplemented]
1773        for singleton in singletons:
1774            for proto in protocols:
1775                s = self.dumps(type(singleton), proto)
1776                u = self.loads(s)
1777                self.assertIs(type(singleton), u)
1778
1779    # Tests for protocol 2
1780
1781    def test_proto(self):
1782        for proto in protocols:
1783            pickled = self.dumps(None, proto)
1784            if proto >= 2:
1785                proto_header = pickle.PROTO + bytes([proto])
1786                self.assertTrue(pickled.startswith(proto_header))
1787            else:
1788                self.assertEqual(count_opcode(pickle.PROTO, pickled), 0)
1789
1790        oob = protocols[-1] + 1     # a future protocol
1791        build_none = pickle.NONE + pickle.STOP
1792        badpickle = pickle.PROTO + bytes([oob]) + build_none
1793        try:
1794            self.loads(badpickle)
1795        except ValueError as err:
1796            self.assertIn("unsupported pickle protocol", str(err))
1797        else:
1798            self.fail("expected bad protocol number to raise ValueError")
1799
1800    def test_long1(self):
1801        x = 12345678910111213141516178920
1802        for proto in protocols:
1803            s = self.dumps(x, proto)
1804            y = self.loads(s)
1805            self.assert_is_copy(x, y)
1806            self.assertEqual(opcode_in_pickle(pickle.LONG1, s), proto >= 2)
1807
1808    def test_long4(self):
1809        x = 12345678910111213141516178920 << (256*8)
1810        for proto in protocols:
1811            s = self.dumps(x, proto)
1812            y = self.loads(s)
1813            self.assert_is_copy(x, y)
1814            self.assertEqual(opcode_in_pickle(pickle.LONG4, s), proto >= 2)
1815
1816    def test_short_tuples(self):
1817        # Map (proto, len(tuple)) to expected opcode.
1818        expected_opcode = {(0, 0): pickle.TUPLE,
1819                           (0, 1): pickle.TUPLE,
1820                           (0, 2): pickle.TUPLE,
1821                           (0, 3): pickle.TUPLE,
1822                           (0, 4): pickle.TUPLE,
1823
1824                           (1, 0): pickle.EMPTY_TUPLE,
1825                           (1, 1): pickle.TUPLE,
1826                           (1, 2): pickle.TUPLE,
1827                           (1, 3): pickle.TUPLE,
1828                           (1, 4): pickle.TUPLE,
1829
1830                           (2, 0): pickle.EMPTY_TUPLE,
1831                           (2, 1): pickle.TUPLE1,
1832                           (2, 2): pickle.TUPLE2,
1833                           (2, 3): pickle.TUPLE3,
1834                           (2, 4): pickle.TUPLE,
1835
1836                           (3, 0): pickle.EMPTY_TUPLE,
1837                           (3, 1): pickle.TUPLE1,
1838                           (3, 2): pickle.TUPLE2,
1839                           (3, 3): pickle.TUPLE3,
1840                           (3, 4): pickle.TUPLE,
1841                          }
1842        a = ()
1843        b = (1,)
1844        c = (1, 2)
1845        d = (1, 2, 3)
1846        e = (1, 2, 3, 4)
1847        for proto in protocols:
1848            for x in a, b, c, d, e:
1849                s = self.dumps(x, proto)
1850                y = self.loads(s)
1851                self.assert_is_copy(x, y)
1852                expected = expected_opcode[min(proto, 3), len(x)]
1853                self.assertTrue(opcode_in_pickle(expected, s))
1854
1855    def test_singletons(self):
1856        # Map (proto, singleton) to expected opcode.
1857        expected_opcode = {(0, None): pickle.NONE,
1858                           (1, None): pickle.NONE,
1859                           (2, None): pickle.NONE,
1860                           (3, None): pickle.NONE,
1861
1862                           (0, True): pickle.INT,
1863                           (1, True): pickle.INT,
1864                           (2, True): pickle.NEWTRUE,
1865                           (3, True): pickle.NEWTRUE,
1866
1867                           (0, False): pickle.INT,
1868                           (1, False): pickle.INT,
1869                           (2, False): pickle.NEWFALSE,
1870                           (3, False): pickle.NEWFALSE,
1871                          }
1872        for proto in protocols:
1873            for x in None, False, True:
1874                s = self.dumps(x, proto)
1875                y = self.loads(s)
1876                self.assertTrue(x is y, (proto, x, s, y))
1877                expected = expected_opcode[min(proto, 3), x]
1878                self.assertTrue(opcode_in_pickle(expected, s))
1879
1880    def test_newobj_tuple(self):
1881        x = MyTuple([1, 2, 3])
1882        x.foo = 42
1883        x.bar = "hello"
1884        for proto in protocols:
1885            s = self.dumps(x, proto)
1886            y = self.loads(s)
1887            self.assert_is_copy(x, y)
1888
1889    def test_newobj_list(self):
1890        x = MyList([1, 2, 3])
1891        x.foo = 42
1892        x.bar = "hello"
1893        for proto in protocols:
1894            s = self.dumps(x, proto)
1895            y = self.loads(s)
1896            self.assert_is_copy(x, y)
1897
1898    def test_newobj_generic(self):
1899        for proto in protocols:
1900            for C in myclasses:
1901                B = C.__base__
1902                x = C(C.sample)
1903                x.foo = 42
1904                s = self.dumps(x, proto)
1905                y = self.loads(s)
1906                detail = (proto, C, B, x, y, type(y))
1907                self.assert_is_copy(x, y) # XXX revisit
1908                self.assertEqual(B(x), B(y), detail)
1909                self.assertEqual(x.__dict__, y.__dict__, detail)
1910
1911    def test_newobj_proxies(self):
1912        # NEWOBJ should use the __class__ rather than the raw type
1913        classes = myclasses[:]
1914        # Cannot create weakproxies to these classes
1915        for c in (MyInt, MyTuple):
1916            classes.remove(c)
1917        for proto in protocols:
1918            for C in classes:
1919                B = C.__base__
1920                x = C(C.sample)
1921                x.foo = 42
1922                p = weakref.proxy(x)
1923                s = self.dumps(p, proto)
1924                y = self.loads(s)
1925                self.assertEqual(type(y), type(x))  # rather than type(p)
1926                detail = (proto, C, B, x, y, type(y))
1927                self.assertEqual(B(x), B(y), detail)
1928                self.assertEqual(x.__dict__, y.__dict__, detail)
1929
1930    def test_newobj_not_class(self):
1931        # Issue 24552
1932        global SimpleNewObj
1933        save = SimpleNewObj
1934        o = SimpleNewObj.__new__(SimpleNewObj)
1935        b = self.dumps(o, 4)
1936        try:
1937            SimpleNewObj = 42
1938            self.assertRaises((TypeError, pickle.UnpicklingError), self.loads, b)
1939        finally:
1940            SimpleNewObj = save
1941
1942    # Register a type with copyreg, with extension code extcode.  Pickle
1943    # an object of that type.  Check that the resulting pickle uses opcode
1944    # (EXT[124]) under proto 2, and not in proto 1.
1945
1946    def produce_global_ext(self, extcode, opcode):
1947        e = ExtensionSaver(extcode)
1948        try:
1949            copyreg.add_extension(__name__, "MyList", extcode)
1950            x = MyList([1, 2, 3])
1951            x.foo = 42
1952            x.bar = "hello"
1953
1954            # Dump using protocol 1 for comparison.
1955            s1 = self.dumps(x, 1)
1956            self.assertIn(__name__.encode("utf-8"), s1)
1957            self.assertIn(b"MyList", s1)
1958            self.assertFalse(opcode_in_pickle(opcode, s1))
1959
1960            y = self.loads(s1)
1961            self.assert_is_copy(x, y)
1962
1963            # Dump using protocol 2 for test.
1964            s2 = self.dumps(x, 2)
1965            self.assertNotIn(__name__.encode("utf-8"), s2)
1966            self.assertNotIn(b"MyList", s2)
1967            self.assertEqual(opcode_in_pickle(opcode, s2), True, repr(s2))
1968
1969            y = self.loads(s2)
1970            self.assert_is_copy(x, y)
1971        finally:
1972            e.restore()
1973
1974    def test_global_ext1(self):
1975        self.produce_global_ext(0x00000001, pickle.EXT1)  # smallest EXT1 code
1976        self.produce_global_ext(0x000000ff, pickle.EXT1)  # largest EXT1 code
1977
1978    def test_global_ext2(self):
1979        self.produce_global_ext(0x00000100, pickle.EXT2)  # smallest EXT2 code
1980        self.produce_global_ext(0x0000ffff, pickle.EXT2)  # largest EXT2 code
1981        self.produce_global_ext(0x0000abcd, pickle.EXT2)  # check endianness
1982
1983    def test_global_ext4(self):
1984        self.produce_global_ext(0x00010000, pickle.EXT4)  # smallest EXT4 code
1985        self.produce_global_ext(0x7fffffff, pickle.EXT4)  # largest EXT4 code
1986        self.produce_global_ext(0x12abcdef, pickle.EXT4)  # check endianness
1987
1988    def test_list_chunking(self):
1989        n = 10  # too small to chunk
1990        x = list(range(n))
1991        for proto in protocols:
1992            s = self.dumps(x, proto)
1993            y = self.loads(s)
1994            self.assert_is_copy(x, y)
1995            num_appends = count_opcode(pickle.APPENDS, s)
1996            self.assertEqual(num_appends, proto > 0)
1997
1998        n = 2500  # expect at least two chunks when proto > 0
1999        x = list(range(n))
2000        for proto in protocols:
2001            s = self.dumps(x, proto)
2002            y = self.loads(s)
2003            self.assert_is_copy(x, y)
2004            num_appends = count_opcode(pickle.APPENDS, s)
2005            if proto == 0:
2006                self.assertEqual(num_appends, 0)
2007            else:
2008                self.assertTrue(num_appends >= 2)
2009
2010    def test_dict_chunking(self):
2011        n = 10  # too small to chunk
2012        x = dict.fromkeys(range(n))
2013        for proto in protocols:
2014            s = self.dumps(x, proto)
2015            self.assertIsInstance(s, bytes_types)
2016            y = self.loads(s)
2017            self.assert_is_copy(x, y)
2018            num_setitems = count_opcode(pickle.SETITEMS, s)
2019            self.assertEqual(num_setitems, proto > 0)
2020
2021        n = 2500  # expect at least two chunks when proto > 0
2022        x = dict.fromkeys(range(n))
2023        for proto in protocols:
2024            s = self.dumps(x, proto)
2025            y = self.loads(s)
2026            self.assert_is_copy(x, y)
2027            num_setitems = count_opcode(pickle.SETITEMS, s)
2028            if proto == 0:
2029                self.assertEqual(num_setitems, 0)
2030            else:
2031                self.assertTrue(num_setitems >= 2)
2032
2033    def test_set_chunking(self):
2034        n = 10  # too small to chunk
2035        x = set(range(n))
2036        for proto in protocols:
2037            s = self.dumps(x, proto)
2038            y = self.loads(s)
2039            self.assert_is_copy(x, y)
2040            num_additems = count_opcode(pickle.ADDITEMS, s)
2041            if proto < 4:
2042                self.assertEqual(num_additems, 0)
2043            else:
2044                self.assertEqual(num_additems, 1)
2045
2046        n = 2500  # expect at least two chunks when proto >= 4
2047        x = set(range(n))
2048        for proto in protocols:
2049            s = self.dumps(x, proto)
2050            y = self.loads(s)
2051            self.assert_is_copy(x, y)
2052            num_additems = count_opcode(pickle.ADDITEMS, s)
2053            if proto < 4:
2054                self.assertEqual(num_additems, 0)
2055            else:
2056                self.assertGreaterEqual(num_additems, 2)
2057
2058    def test_simple_newobj(self):
2059        x = SimpleNewObj.__new__(SimpleNewObj, 0xface)  # avoid __init__
2060        x.abc = 666
2061        for proto in protocols:
2062            with self.subTest(proto=proto):
2063                s = self.dumps(x, proto)
2064                if proto < 1:
2065                    self.assertIn(b'\nI64206', s)  # INT
2066                else:
2067                    self.assertIn(b'M\xce\xfa', s)  # BININT2
2068                self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s),
2069                                 2 <= proto)
2070                self.assertFalse(opcode_in_pickle(pickle.NEWOBJ_EX, s))
2071                y = self.loads(s)   # will raise TypeError if __init__ called
2072                self.assert_is_copy(x, y)
2073
2074    def test_complex_newobj(self):
2075        x = ComplexNewObj.__new__(ComplexNewObj, 0xface)  # avoid __init__
2076        x.abc = 666
2077        for proto in protocols:
2078            with self.subTest(proto=proto):
2079                s = self.dumps(x, proto)
2080                if proto < 1:
2081                    self.assertIn(b'\nI64206', s)  # INT
2082                elif proto < 2:
2083                    self.assertIn(b'M\xce\xfa', s)  # BININT2
2084                elif proto < 4:
2085                    self.assertIn(b'X\x04\x00\x00\x00FACE', s)  # BINUNICODE
2086                else:
2087                    self.assertIn(b'\x8c\x04FACE', s)  # SHORT_BINUNICODE
2088                self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s),
2089                                 2 <= proto)
2090                self.assertFalse(opcode_in_pickle(pickle.NEWOBJ_EX, s))
2091                y = self.loads(s)   # will raise TypeError if __init__ called
2092                self.assert_is_copy(x, y)
2093
2094    def test_complex_newobj_ex(self):
2095        x = ComplexNewObjEx.__new__(ComplexNewObjEx, 0xface)  # avoid __init__
2096        x.abc = 666
2097        for proto in protocols:
2098            with self.subTest(proto=proto):
2099                s = self.dumps(x, proto)
2100                if proto < 1:
2101                    self.assertIn(b'\nI64206', s)  # INT
2102                elif proto < 2:
2103                    self.assertIn(b'M\xce\xfa', s)  # BININT2
2104                elif proto < 4:
2105                    self.assertIn(b'X\x04\x00\x00\x00FACE', s)  # BINUNICODE
2106                else:
2107                    self.assertIn(b'\x8c\x04FACE', s)  # SHORT_BINUNICODE
2108                self.assertFalse(opcode_in_pickle(pickle.NEWOBJ, s))
2109                self.assertEqual(opcode_in_pickle(pickle.NEWOBJ_EX, s),
2110                                 4 <= proto)
2111                y = self.loads(s)   # will raise TypeError if __init__ called
2112                self.assert_is_copy(x, y)
2113
2114    def test_newobj_list_slots(self):
2115        x = SlotList([1, 2, 3])
2116        x.foo = 42
2117        x.bar = "hello"
2118        s = self.dumps(x, 2)
2119        y = self.loads(s)
2120        self.assert_is_copy(x, y)
2121
2122    def test_reduce_overrides_default_reduce_ex(self):
2123        for proto in protocols:
2124            x = REX_one()
2125            self.assertEqual(x._reduce_called, 0)
2126            s = self.dumps(x, proto)
2127            self.assertEqual(x._reduce_called, 1)
2128            y = self.loads(s)
2129            self.assertEqual(y._reduce_called, 0)
2130
2131    def test_reduce_ex_called(self):
2132        for proto in protocols:
2133            x = REX_two()
2134            self.assertEqual(x._proto, None)
2135            s = self.dumps(x, proto)
2136            self.assertEqual(x._proto, proto)
2137            y = self.loads(s)
2138            self.assertEqual(y._proto, None)
2139
2140    def test_reduce_ex_overrides_reduce(self):
2141        for proto in protocols:
2142            x = REX_three()
2143            self.assertEqual(x._proto, None)
2144            s = self.dumps(x, proto)
2145            self.assertEqual(x._proto, proto)
2146            y = self.loads(s)
2147            self.assertEqual(y._proto, None)
2148
2149    def test_reduce_ex_calls_base(self):
2150        for proto in protocols:
2151            x = REX_four()
2152            self.assertEqual(x._proto, None)
2153            s = self.dumps(x, proto)
2154            self.assertEqual(x._proto, proto)
2155            y = self.loads(s)
2156            self.assertEqual(y._proto, proto)
2157
2158    def test_reduce_calls_base(self):
2159        for proto in protocols:
2160            x = REX_five()
2161            self.assertEqual(x._reduce_called, 0)
2162            s = self.dumps(x, proto)
2163            self.assertEqual(x._reduce_called, 1)
2164            y = self.loads(s)
2165            self.assertEqual(y._reduce_called, 1)
2166
2167    @no_tracing
2168    def test_bad_getattr(self):
2169        # Issue #3514: crash when there is an infinite loop in __getattr__
2170        x = BadGetattr()
2171        for proto in protocols:
2172            self.assertRaises(RuntimeError, self.dumps, x, proto)
2173
2174    def test_reduce_bad_iterator(self):
2175        # Issue4176: crash when 4th and 5th items of __reduce__()
2176        # are not iterators
2177        class C(object):
2178            def __reduce__(self):
2179                # 4th item is not an iterator
2180                return list, (), None, [], None
2181        class D(object):
2182            def __reduce__(self):
2183                # 5th item is not an iterator
2184                return dict, (), None, None, []
2185
2186        # Python implementation is less strict and also accepts iterables.
2187        for proto in protocols:
2188            try:
2189                self.dumps(C(), proto)
2190            except pickle.PicklingError:
2191                pass
2192            try:
2193                self.dumps(D(), proto)
2194            except pickle.PicklingError:
2195                pass
2196
2197    def test_many_puts_and_gets(self):
2198        # Test that internal data structures correctly deal with lots of
2199        # puts/gets.
2200        keys = ("aaa" + str(i) for i in range(100))
2201        large_dict = dict((k, [4, 5, 6]) for k in keys)
2202        obj = [dict(large_dict), dict(large_dict), dict(large_dict)]
2203
2204        for proto in protocols:
2205            with self.subTest(proto=proto):
2206                dumped = self.dumps(obj, proto)
2207                loaded = self.loads(dumped)
2208                self.assert_is_copy(obj, loaded)
2209
2210    def test_attribute_name_interning(self):
2211        # Test that attribute names of pickled objects are interned when
2212        # unpickling.
2213        for proto in protocols:
2214            x = C()
2215            x.foo = 42
2216            x.bar = "hello"
2217            s = self.dumps(x, proto)
2218            y = self.loads(s)
2219            x_keys = sorted(x.__dict__)
2220            y_keys = sorted(y.__dict__)
2221            for x_key, y_key in zip(x_keys, y_keys):
2222                self.assertIs(x_key, y_key)
2223
2224    def test_pickle_to_2x(self):
2225        # Pickle non-trivial data with protocol 2, expecting that it yields
2226        # the same result as Python 2.x did.
2227        # NOTE: this test is a bit too strong since we can produce different
2228        # bytecode that 2.x will still understand.
2229        dumped = self.dumps(range(5), 2)
2230        self.assertEqual(dumped, DATA_XRANGE)
2231        dumped = self.dumps(set([3]), 2)
2232        self.assertEqual(dumped, DATA_SET2)
2233
2234    def test_large_pickles(self):
2235        # Test the correctness of internal buffering routines when handling
2236        # large data.
2237        for proto in protocols:
2238            data = (1, min, b'xy' * (30 * 1024), len)
2239            dumped = self.dumps(data, proto)
2240            loaded = self.loads(dumped)
2241            self.assertEqual(len(loaded), len(data))
2242            self.assertEqual(loaded, data)
2243
2244    def test_int_pickling_efficiency(self):
2245        # Test compacity of int representation (see issue #12744)
2246        for proto in protocols:
2247            with self.subTest(proto=proto):
2248                pickles = [self.dumps(2**n, proto) for n in range(70)]
2249                sizes = list(map(len, pickles))
2250                # the size function is monotonic
2251                self.assertEqual(sorted(sizes), sizes)
2252                if proto >= 2:
2253                    for p in pickles:
2254                        self.assertFalse(opcode_in_pickle(pickle.LONG, p))
2255
2256    def _check_pickling_with_opcode(self, obj, opcode, proto):
2257        pickled = self.dumps(obj, proto)
2258        self.assertTrue(opcode_in_pickle(opcode, pickled))
2259        unpickled = self.loads(pickled)
2260        self.assertEqual(obj, unpickled)
2261
2262    def test_appends_on_non_lists(self):
2263        # Issue #17720
2264        obj = REX_six([1, 2, 3])
2265        for proto in protocols:
2266            if proto == 0:
2267                self._check_pickling_with_opcode(obj, pickle.APPEND, proto)
2268            else:
2269                self._check_pickling_with_opcode(obj, pickle.APPENDS, proto)
2270
2271    def test_setitems_on_non_dicts(self):
2272        obj = REX_seven({1: -1, 2: -2, 3: -3})
2273        for proto in protocols:
2274            if proto == 0:
2275                self._check_pickling_with_opcode(obj, pickle.SETITEM, proto)
2276            else:
2277                self._check_pickling_with_opcode(obj, pickle.SETITEMS, proto)
2278
2279    # Exercise framing (proto >= 4) for significant workloads
2280
2281    FRAME_SIZE_MIN = 4
2282    FRAME_SIZE_TARGET = 64 * 1024
2283
2284    def check_frame_opcodes(self, pickled):
2285        """
2286        Check the arguments of FRAME opcodes in a protocol 4+ pickle.
2287
2288        Note that binary objects that are larger than FRAME_SIZE_TARGET are not
2289        framed by default and are therefore considered a frame by themselves in
2290        the following consistency check.
2291        """
2292        frame_end = frameless_start = None
2293        frameless_opcodes = {'BINBYTES', 'BINUNICODE', 'BINBYTES8',
2294                             'BINUNICODE8', 'BYTEARRAY8'}
2295        for op, arg, pos in pickletools.genops(pickled):
2296            if frame_end is not None:
2297                self.assertLessEqual(pos, frame_end)
2298                if pos == frame_end:
2299                    frame_end = None
2300
2301            if frame_end is not None:  # framed
2302                self.assertNotEqual(op.name, 'FRAME')
2303                if op.name in frameless_opcodes:
2304                    # Only short bytes and str objects should be written
2305                    # in a frame
2306                    self.assertLessEqual(len(arg), self.FRAME_SIZE_TARGET)
2307
2308            else:  # not framed
2309                if (op.name == 'FRAME' or
2310                    (op.name in frameless_opcodes and
2311                     len(arg) > self.FRAME_SIZE_TARGET)):
2312                    # Frame or large bytes or str object
2313                    if frameless_start is not None:
2314                        # Only short data should be written outside of a frame
2315                        self.assertLess(pos - frameless_start,
2316                                        self.FRAME_SIZE_MIN)
2317                        frameless_start = None
2318                elif frameless_start is None and op.name != 'PROTO':
2319                    frameless_start = pos
2320
2321            if op.name == 'FRAME':
2322                self.assertGreaterEqual(arg, self.FRAME_SIZE_MIN)
2323                frame_end = pos + 9 + arg
2324
2325        pos = len(pickled)
2326        if frame_end is not None:
2327            self.assertEqual(frame_end, pos)
2328        elif frameless_start is not None:
2329            self.assertLess(pos - frameless_start, self.FRAME_SIZE_MIN)
2330
2331    @support.skip_if_pgo_task
2332    def test_framing_many_objects(self):
2333        obj = list(range(10**5))
2334        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2335            with self.subTest(proto=proto):
2336                pickled = self.dumps(obj, proto)
2337                unpickled = self.loads(pickled)
2338                self.assertEqual(obj, unpickled)
2339                bytes_per_frame = (len(pickled) /
2340                                   count_opcode(pickle.FRAME, pickled))
2341                self.assertGreater(bytes_per_frame,
2342                                   self.FRAME_SIZE_TARGET / 2)
2343                self.assertLessEqual(bytes_per_frame,
2344                                     self.FRAME_SIZE_TARGET * 1)
2345                self.check_frame_opcodes(pickled)
2346
2347    def test_framing_large_objects(self):
2348        N = 1024 * 1024
2349        small_items = [[i] for i in range(10)]
2350        obj = [b'x' * N, *small_items, b'y' * N, 'z' * N]
2351        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2352            for fast in [False, True]:
2353                with self.subTest(proto=proto, fast=fast):
2354                    if not fast:
2355                        # fast=False by default.
2356                        # This covers in-memory pickling with pickle.dumps().
2357                        pickled = self.dumps(obj, proto)
2358                    else:
2359                        # Pickler is required when fast=True.
2360                        if not hasattr(self, 'pickler'):
2361                            continue
2362                        buf = io.BytesIO()
2363                        pickler = self.pickler(buf, protocol=proto)
2364                        pickler.fast = fast
2365                        pickler.dump(obj)
2366                        pickled = buf.getvalue()
2367                    unpickled = self.loads(pickled)
2368                    # More informative error message in case of failure.
2369                    self.assertEqual([len(x) for x in obj],
2370                                     [len(x) for x in unpickled])
2371                    # Perform full equality check if the lengths match.
2372                    self.assertEqual(obj, unpickled)
2373                    n_frames = count_opcode(pickle.FRAME, pickled)
2374                    # A single frame for small objects between
2375                    # first two large objects.
2376                    self.assertEqual(n_frames, 1)
2377                    self.check_frame_opcodes(pickled)
2378
2379    def test_optional_frames(self):
2380        if pickle.HIGHEST_PROTOCOL < 4:
2381            return
2382
2383        def remove_frames(pickled, keep_frame=None):
2384            """Remove frame opcodes from the given pickle."""
2385            frame_starts = []
2386            # 1 byte for the opcode and 8 for the argument
2387            frame_opcode_size = 9
2388            for opcode, _, pos in pickletools.genops(pickled):
2389                if opcode.name == 'FRAME':
2390                    frame_starts.append(pos)
2391
2392            newpickle = bytearray()
2393            last_frame_end = 0
2394            for i, pos in enumerate(frame_starts):
2395                if keep_frame and keep_frame(i):
2396                    continue
2397                newpickle += pickled[last_frame_end:pos]
2398                last_frame_end = pos + frame_opcode_size
2399            newpickle += pickled[last_frame_end:]
2400            return newpickle
2401
2402        frame_size = self.FRAME_SIZE_TARGET
2403        num_frames = 20
2404        # Large byte objects (dict values) intermittent with small objects
2405        # (dict keys)
2406        for bytes_type in (bytes, bytearray):
2407            obj = {i: bytes_type([i]) * frame_size for i in range(num_frames)}
2408
2409            for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2410                pickled = self.dumps(obj, proto)
2411
2412                frameless_pickle = remove_frames(pickled)
2413                self.assertEqual(count_opcode(pickle.FRAME, frameless_pickle), 0)
2414                self.assertEqual(obj, self.loads(frameless_pickle))
2415
2416                some_frames_pickle = remove_frames(pickled, lambda i: i % 2)
2417                self.assertLess(count_opcode(pickle.FRAME, some_frames_pickle),
2418                                count_opcode(pickle.FRAME, pickled))
2419                self.assertEqual(obj, self.loads(some_frames_pickle))
2420
2421    @support.skip_if_pgo_task
2422    def test_framed_write_sizes_with_delayed_writer(self):
2423        class ChunkAccumulator:
2424            """Accumulate pickler output in a list of raw chunks."""
2425            def __init__(self):
2426                self.chunks = []
2427            def write(self, chunk):
2428                self.chunks.append(chunk)
2429            def concatenate_chunks(self):
2430                return b"".join(self.chunks)
2431
2432        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2433            objects = [(str(i).encode('ascii'), i % 42, {'i': str(i)})
2434                       for i in range(int(1e4))]
2435            # Add a large unique ASCII string
2436            objects.append('0123456789abcdef' *
2437                           (self.FRAME_SIZE_TARGET // 16 + 1))
2438
2439            # Protocol 4 packs groups of small objects into frames and issues
2440            # calls to write only once or twice per frame:
2441            # The C pickler issues one call to write per-frame (header and
2442            # contents) while Python pickler issues two calls to write: one for
2443            # the frame header and one for the frame binary contents.
2444            writer = ChunkAccumulator()
2445            self.pickler(writer, proto).dump(objects)
2446
2447            # Actually read the binary content of the chunks after the end
2448            # of the call to dump: any memoryview passed to write should not
2449            # be released otherwise this delayed access would not be possible.
2450            pickled = writer.concatenate_chunks()
2451            reconstructed = self.loads(pickled)
2452            self.assertEqual(reconstructed, objects)
2453            self.assertGreater(len(writer.chunks), 1)
2454
2455            # memoryviews should own the memory.
2456            del objects
2457            support.gc_collect()
2458            self.assertEqual(writer.concatenate_chunks(), pickled)
2459
2460            n_frames = (len(pickled) - 1) // self.FRAME_SIZE_TARGET + 1
2461            # There should be at least one call to write per frame
2462            self.assertGreaterEqual(len(writer.chunks), n_frames)
2463
2464            # but not too many either: there can be one for the proto,
2465            # one per-frame header, one per frame for the actual contents,
2466            # and two for the header.
2467            self.assertLessEqual(len(writer.chunks), 2 * n_frames + 3)
2468
2469            chunk_sizes = [len(c) for c in writer.chunks]
2470            large_sizes = [s for s in chunk_sizes
2471                           if s >= self.FRAME_SIZE_TARGET]
2472            medium_sizes = [s for s in chunk_sizes
2473                           if 9 < s < self.FRAME_SIZE_TARGET]
2474            small_sizes = [s for s in chunk_sizes if s <= 9]
2475
2476            # Large chunks should not be too large:
2477            for chunk_size in large_sizes:
2478                self.assertLess(chunk_size, 2 * self.FRAME_SIZE_TARGET,
2479                                chunk_sizes)
2480            # There shouldn't bee too many small chunks: the protocol header,
2481            # the frame headers and the large string headers are written
2482            # in small chunks.
2483            self.assertLessEqual(len(small_sizes),
2484                                 len(large_sizes) + len(medium_sizes) + 3,
2485                                 chunk_sizes)
2486
2487    def test_nested_names(self):
2488        global Nested
2489        class Nested:
2490            class A:
2491                class B:
2492                    class C:
2493                        pass
2494        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2495            for obj in [Nested.A, Nested.A.B, Nested.A.B.C]:
2496                with self.subTest(proto=proto, obj=obj):
2497                    unpickled = self.loads(self.dumps(obj, proto))
2498                    self.assertIs(obj, unpickled)
2499
2500    def test_recursive_nested_names(self):
2501        global Recursive
2502        class Recursive:
2503            pass
2504        Recursive.mod = sys.modules[Recursive.__module__]
2505        Recursive.__qualname__ = 'Recursive.mod.Recursive'
2506        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2507            with self.subTest(proto=proto):
2508                unpickled = self.loads(self.dumps(Recursive, proto))
2509                self.assertIs(unpickled, Recursive)
2510        del Recursive.mod # break reference loop
2511
2512    def test_py_methods(self):
2513        global PyMethodsTest
2514        class PyMethodsTest:
2515            @staticmethod
2516            def cheese():
2517                return "cheese"
2518            @classmethod
2519            def wine(cls):
2520                assert cls is PyMethodsTest
2521                return "wine"
2522            def biscuits(self):
2523                assert isinstance(self, PyMethodsTest)
2524                return "biscuits"
2525            class Nested:
2526                "Nested class"
2527                @staticmethod
2528                def ketchup():
2529                    return "ketchup"
2530                @classmethod
2531                def maple(cls):
2532                    assert cls is PyMethodsTest.Nested
2533                    return "maple"
2534                def pie(self):
2535                    assert isinstance(self, PyMethodsTest.Nested)
2536                    return "pie"
2537
2538        py_methods = (
2539            PyMethodsTest.cheese,
2540            PyMethodsTest.wine,
2541            PyMethodsTest().biscuits,
2542            PyMethodsTest.Nested.ketchup,
2543            PyMethodsTest.Nested.maple,
2544            PyMethodsTest.Nested().pie
2545        )
2546        py_unbound_methods = (
2547            (PyMethodsTest.biscuits, PyMethodsTest),
2548            (PyMethodsTest.Nested.pie, PyMethodsTest.Nested)
2549        )
2550        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2551            for method in py_methods:
2552                with self.subTest(proto=proto, method=method):
2553                    unpickled = self.loads(self.dumps(method, proto))
2554                    self.assertEqual(method(), unpickled())
2555            for method, cls in py_unbound_methods:
2556                obj = cls()
2557                with self.subTest(proto=proto, method=method):
2558                    unpickled = self.loads(self.dumps(method, proto))
2559                    self.assertEqual(method(obj), unpickled(obj))
2560
2561    def test_c_methods(self):
2562        global Subclass
2563        class Subclass(tuple):
2564            class Nested(str):
2565                pass
2566
2567        c_methods = (
2568            # bound built-in method
2569            ("abcd".index, ("c",)),
2570            # unbound built-in method
2571            (str.index, ("abcd", "c")),
2572            # bound "slot" method
2573            ([1, 2, 3].__len__, ()),
2574            # unbound "slot" method
2575            (list.__len__, ([1, 2, 3],)),
2576            # bound "coexist" method
2577            ({1, 2}.__contains__, (2,)),
2578            # unbound "coexist" method
2579            (set.__contains__, ({1, 2}, 2)),
2580            # built-in class method
2581            (dict.fromkeys, (("a", 1), ("b", 2))),
2582            # built-in static method
2583            (bytearray.maketrans, (b"abc", b"xyz")),
2584            # subclass methods
2585            (Subclass([1,2,2]).count, (2,)),
2586            (Subclass.count, (Subclass([1,2,2]), 2)),
2587            (Subclass.Nested("sweet").count, ("e",)),
2588            (Subclass.Nested.count, (Subclass.Nested("sweet"), "e")),
2589        )
2590        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2591            for method, args in c_methods:
2592                with self.subTest(proto=proto, method=method):
2593                    unpickled = self.loads(self.dumps(method, proto))
2594                    self.assertEqual(method(*args), unpickled(*args))
2595
2596    def test_compat_pickle(self):
2597        tests = [
2598            (range(1, 7), '__builtin__', 'xrange'),
2599            (map(int, '123'), 'itertools', 'imap'),
2600            (functools.reduce, '__builtin__', 'reduce'),
2601            (dbm.whichdb, 'whichdb', 'whichdb'),
2602            (Exception(), 'exceptions', 'Exception'),
2603            (collections.UserDict(), 'UserDict', 'IterableUserDict'),
2604            (collections.UserList(), 'UserList', 'UserList'),
2605            (collections.defaultdict(), 'collections', 'defaultdict'),
2606        ]
2607        for val, mod, name in tests:
2608            for proto in range(3):
2609                with self.subTest(type=type(val), proto=proto):
2610                    pickled = self.dumps(val, proto)
2611                    self.assertIn(('c%s\n%s' % (mod, name)).encode(), pickled)
2612                    self.assertIs(type(self.loads(pickled)), type(val))
2613
2614    def test_local_lookup_error(self):
2615        # Test that whichmodule() errors out cleanly when looking up
2616        # an assumed globally-reachable object fails.
2617        def f():
2618            pass
2619        # Since the function is local, lookup will fail
2620        for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
2621            with self.assertRaises((AttributeError, pickle.PicklingError)):
2622                pickletools.dis(self.dumps(f, proto))
2623        # Same without a __module__ attribute (exercises a different path
2624        # in _pickle.c).
2625        del f.__module__
2626        for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
2627            with self.assertRaises((AttributeError, pickle.PicklingError)):
2628                pickletools.dis(self.dumps(f, proto))
2629        # Yet a different path.
2630        f.__name__ = f.__qualname__
2631        for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
2632            with self.assertRaises((AttributeError, pickle.PicklingError)):
2633                pickletools.dis(self.dumps(f, proto))
2634
2635    #
2636    # PEP 574 tests below
2637    #
2638
2639    def buffer_like_objects(self):
2640        # Yield buffer-like objects with the bytestring "abcdef" in them
2641        bytestring = b"abcdefgh"
2642        yield ZeroCopyBytes(bytestring)
2643        yield ZeroCopyBytearray(bytestring)
2644        if _testbuffer is not None:
2645            items = list(bytestring)
2646            value = int.from_bytes(bytestring, byteorder='little')
2647            for flags in (0, _testbuffer.ND_WRITABLE):
2648                # 1-D, contiguous
2649                yield PicklableNDArray(items, format='B', shape=(8,),
2650                                       flags=flags)
2651                # 2-D, C-contiguous
2652                yield PicklableNDArray(items, format='B', shape=(4, 2),
2653                                       strides=(2, 1), flags=flags)
2654                # 2-D, Fortran-contiguous
2655                yield PicklableNDArray(items, format='B',
2656                                       shape=(4, 2), strides=(1, 4),
2657                                       flags=flags)
2658
2659    def test_in_band_buffers(self):
2660        # Test in-band buffers (PEP 574)
2661        for obj in self.buffer_like_objects():
2662            for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
2663                data = self.dumps(obj, proto)
2664                if obj.c_contiguous and proto >= 5:
2665                    # The raw memory bytes are serialized in physical order
2666                    self.assertIn(b"abcdefgh", data)
2667                self.assertEqual(count_opcode(pickle.NEXT_BUFFER, data), 0)
2668                if proto >= 5:
2669                    self.assertEqual(count_opcode(pickle.SHORT_BINBYTES, data),
2670                                     1 if obj.readonly else 0)
2671                    self.assertEqual(count_opcode(pickle.BYTEARRAY8, data),
2672                                     0 if obj.readonly else 1)
2673                    # Return a true value from buffer_callback should have
2674                    # the same effect
2675                    def buffer_callback(obj):
2676                        return True
2677                    data2 = self.dumps(obj, proto,
2678                                       buffer_callback=buffer_callback)
2679                    self.assertEqual(data2, data)
2680
2681                new = self.loads(data)
2682                # It's a copy
2683                self.assertIsNot(new, obj)
2684                self.assertIs(type(new), type(obj))
2685                self.assertEqual(new, obj)
2686
2687    # XXX Unfortunately cannot test non-contiguous array
2688    # (see comment in PicklableNDArray.__reduce_ex__)
2689
2690    def test_oob_buffers(self):
2691        # Test out-of-band buffers (PEP 574)
2692        for obj in self.buffer_like_objects():
2693            for proto in range(0, 5):
2694                # Need protocol >= 5 for buffer_callback
2695                with self.assertRaises(ValueError):
2696                    self.dumps(obj, proto,
2697                               buffer_callback=[].append)
2698            for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
2699                buffers = []
2700                buffer_callback = lambda pb: buffers.append(pb.raw())
2701                data = self.dumps(obj, proto,
2702                                  buffer_callback=buffer_callback)
2703                self.assertNotIn(b"abcdefgh", data)
2704                self.assertEqual(count_opcode(pickle.SHORT_BINBYTES, data), 0)
2705                self.assertEqual(count_opcode(pickle.BYTEARRAY8, data), 0)
2706                self.assertEqual(count_opcode(pickle.NEXT_BUFFER, data), 1)
2707                self.assertEqual(count_opcode(pickle.READONLY_BUFFER, data),
2708                                 1 if obj.readonly else 0)
2709
2710                if obj.c_contiguous:
2711                    self.assertEqual(bytes(buffers[0]), b"abcdefgh")
2712                # Need buffers argument to unpickle properly
2713                with self.assertRaises(pickle.UnpicklingError):
2714                    self.loads(data)
2715
2716                new = self.loads(data, buffers=buffers)
2717                if obj.zero_copy_reconstruct:
2718                    # Zero-copy achieved
2719                    self.assertIs(new, obj)
2720                else:
2721                    self.assertIs(type(new), type(obj))
2722                    self.assertEqual(new, obj)
2723                # Non-sequence buffers accepted too
2724                new = self.loads(data, buffers=iter(buffers))
2725                if obj.zero_copy_reconstruct:
2726                    # Zero-copy achieved
2727                    self.assertIs(new, obj)
2728                else:
2729                    self.assertIs(type(new), type(obj))
2730                    self.assertEqual(new, obj)
2731
2732    def test_oob_buffers_writable_to_readonly(self):
2733        # Test reconstructing readonly object from writable buffer
2734        obj = ZeroCopyBytes(b"foobar")
2735        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
2736            buffers = []
2737            buffer_callback = buffers.append
2738            data = self.dumps(obj, proto, buffer_callback=buffer_callback)
2739
2740            buffers = map(bytearray, buffers)
2741            new = self.loads(data, buffers=buffers)
2742            self.assertIs(type(new), type(obj))
2743            self.assertEqual(new, obj)
2744
2745    def test_picklebuffer_error(self):
2746        # PickleBuffer forbidden with protocol < 5
2747        pb = pickle.PickleBuffer(b"foobar")
2748        for proto in range(0, 5):
2749            with self.assertRaises(pickle.PickleError):
2750                self.dumps(pb, proto)
2751
2752    def test_buffer_callback_error(self):
2753        def buffer_callback(buffers):
2754            1/0
2755        pb = pickle.PickleBuffer(b"foobar")
2756        with self.assertRaises(ZeroDivisionError):
2757            self.dumps(pb, 5, buffer_callback=buffer_callback)
2758
2759    def test_buffers_error(self):
2760        pb = pickle.PickleBuffer(b"foobar")
2761        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
2762            data = self.dumps(pb, proto, buffer_callback=[].append)
2763            # Non iterable buffers
2764            with self.assertRaises(TypeError):
2765                self.loads(data, buffers=object())
2766            # Buffer iterable exhausts too early
2767            with self.assertRaises(pickle.UnpicklingError):
2768                self.loads(data, buffers=[])
2769
2770    def test_inband_accept_default_buffers_argument(self):
2771        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
2772            data_pickled = self.dumps(1, proto, buffer_callback=None)
2773            data = self.loads(data_pickled, buffers=None)
2774
2775    @unittest.skipIf(np is None, "Test needs Numpy")
2776    def test_buffers_numpy(self):
2777        def check_no_copy(x, y):
2778            np.testing.assert_equal(x, y)
2779            self.assertEqual(x.ctypes.data, y.ctypes.data)
2780
2781        def check_copy(x, y):
2782            np.testing.assert_equal(x, y)
2783            self.assertNotEqual(x.ctypes.data, y.ctypes.data)
2784
2785        def check_array(arr):
2786            # In-band
2787            for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
2788                data = self.dumps(arr, proto)
2789                new = self.loads(data)
2790                check_copy(arr, new)
2791            for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
2792                buffer_callback = lambda _: True
2793                data = self.dumps(arr, proto, buffer_callback=buffer_callback)
2794                new = self.loads(data)
2795                check_copy(arr, new)
2796            # Out-of-band
2797            for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
2798                buffers = []
2799                buffer_callback = buffers.append
2800                data = self.dumps(arr, proto, buffer_callback=buffer_callback)
2801                new = self.loads(data, buffers=buffers)
2802                if arr.flags.c_contiguous or arr.flags.f_contiguous:
2803                    check_no_copy(arr, new)
2804                else:
2805                    check_copy(arr, new)
2806
2807        # 1-D
2808        arr = np.arange(6)
2809        check_array(arr)
2810        # 1-D, non-contiguous
2811        check_array(arr[::2])
2812        # 2-D, C-contiguous
2813        arr = np.arange(12).reshape((3, 4))
2814        check_array(arr)
2815        # 2-D, F-contiguous
2816        check_array(arr.T)
2817        # 2-D, non-contiguous
2818        check_array(arr[::2])
2819
2820
2821class BigmemPickleTests(unittest.TestCase):
2822
2823    # Binary protocols can serialize longs of up to 2 GiB-1
2824
2825    @bigmemtest(size=_2G, memuse=3.6, dry_run=False)
2826    def test_huge_long_32b(self, size):
2827        data = 1 << (8 * size)
2828        try:
2829            for proto in protocols:
2830                if proto < 2:
2831                    continue
2832                with self.subTest(proto=proto):
2833                    with self.assertRaises((ValueError, OverflowError)):
2834                        self.dumps(data, protocol=proto)
2835        finally:
2836            data = None
2837
2838    # Protocol 3 can serialize up to 4 GiB-1 as a bytes object
2839    # (older protocols don't have a dedicated opcode for bytes and are
2840    # too inefficient)
2841
2842    @bigmemtest(size=_2G, memuse=2.5, dry_run=False)
2843    def test_huge_bytes_32b(self, size):
2844        data = b"abcd" * (size // 4)
2845        try:
2846            for proto in protocols:
2847                if proto < 3:
2848                    continue
2849                with self.subTest(proto=proto):
2850                    try:
2851                        pickled = self.dumps(data, protocol=proto)
2852                        header = (pickle.BINBYTES +
2853                                  struct.pack("<I", len(data)))
2854                        data_start = pickled.index(data)
2855                        self.assertEqual(
2856                            header,
2857                            pickled[data_start-len(header):data_start])
2858                    finally:
2859                        pickled = None
2860        finally:
2861            data = None
2862
2863    @bigmemtest(size=_4G, memuse=2.5, dry_run=False)
2864    def test_huge_bytes_64b(self, size):
2865        data = b"acbd" * (size // 4)
2866        try:
2867            for proto in protocols:
2868                if proto < 3:
2869                    continue
2870                with self.subTest(proto=proto):
2871                    if proto == 3:
2872                        # Protocol 3 does not support large bytes objects.
2873                        # Verify that we do not crash when processing one.
2874                        with self.assertRaises((ValueError, OverflowError)):
2875                            self.dumps(data, protocol=proto)
2876                        continue
2877                    try:
2878                        pickled = self.dumps(data, protocol=proto)
2879                        header = (pickle.BINBYTES8 +
2880                                  struct.pack("<Q", len(data)))
2881                        data_start = pickled.index(data)
2882                        self.assertEqual(
2883                            header,
2884                            pickled[data_start-len(header):data_start])
2885                    finally:
2886                        pickled = None
2887        finally:
2888            data = None
2889
2890    # All protocols use 1-byte per printable ASCII character; we add another
2891    # byte because the encoded form has to be copied into the internal buffer.
2892
2893    @bigmemtest(size=_2G, memuse=8, dry_run=False)
2894    def test_huge_str_32b(self, size):
2895        data = "abcd" * (size // 4)
2896        try:
2897            for proto in protocols:
2898                if proto == 0:
2899                    continue
2900                with self.subTest(proto=proto):
2901                    try:
2902                        pickled = self.dumps(data, protocol=proto)
2903                        header = (pickle.BINUNICODE +
2904                                  struct.pack("<I", len(data)))
2905                        data_start = pickled.index(b'abcd')
2906                        self.assertEqual(
2907                            header,
2908                            pickled[data_start-len(header):data_start])
2909                        self.assertEqual((pickled.rindex(b"abcd") + len(b"abcd") -
2910                                          pickled.index(b"abcd")), len(data))
2911                    finally:
2912                        pickled = None
2913        finally:
2914            data = None
2915
2916    # BINUNICODE (protocols 1, 2 and 3) cannot carry more than 2**32 - 1 bytes
2917    # of utf-8 encoded unicode. BINUNICODE8 (protocol 4) supports these huge
2918    # unicode strings however.
2919
2920    @bigmemtest(size=_4G, memuse=8, dry_run=False)
2921    def test_huge_str_64b(self, size):
2922        data = "abcd" * (size // 4)
2923        try:
2924            for proto in protocols:
2925                if proto == 0:
2926                    continue
2927                with self.subTest(proto=proto):
2928                    if proto < 4:
2929                        with self.assertRaises((ValueError, OverflowError)):
2930                            self.dumps(data, protocol=proto)
2931                        continue
2932                    try:
2933                        pickled = self.dumps(data, protocol=proto)
2934                        header = (pickle.BINUNICODE8 +
2935                                  struct.pack("<Q", len(data)))
2936                        data_start = pickled.index(b'abcd')
2937                        self.assertEqual(
2938                            header,
2939                            pickled[data_start-len(header):data_start])
2940                        self.assertEqual((pickled.rindex(b"abcd") + len(b"abcd") -
2941                                          pickled.index(b"abcd")), len(data))
2942                    finally:
2943                        pickled = None
2944        finally:
2945            data = None
2946
2947
2948# Test classes for reduce_ex
2949
2950class REX_one(object):
2951    """No __reduce_ex__ here, but inheriting it from object"""
2952    _reduce_called = 0
2953    def __reduce__(self):
2954        self._reduce_called = 1
2955        return REX_one, ()
2956
2957class REX_two(object):
2958    """No __reduce__ here, but inheriting it from object"""
2959    _proto = None
2960    def __reduce_ex__(self, proto):
2961        self._proto = proto
2962        return REX_two, ()
2963
2964class REX_three(object):
2965    _proto = None
2966    def __reduce_ex__(self, proto):
2967        self._proto = proto
2968        return REX_two, ()
2969    def __reduce__(self):
2970        raise TestFailed("This __reduce__ shouldn't be called")
2971
2972class REX_four(object):
2973    """Calling base class method should succeed"""
2974    _proto = None
2975    def __reduce_ex__(self, proto):
2976        self._proto = proto
2977        return object.__reduce_ex__(self, proto)
2978
2979class REX_five(object):
2980    """This one used to fail with infinite recursion"""
2981    _reduce_called = 0
2982    def __reduce__(self):
2983        self._reduce_called = 1
2984        return object.__reduce__(self)
2985
2986class REX_six(object):
2987    """This class is used to check the 4th argument (list iterator) of
2988    the reduce protocol.
2989    """
2990    def __init__(self, items=None):
2991        self.items = items if items is not None else []
2992    def __eq__(self, other):
2993        return type(self) is type(other) and self.items == other.items
2994    def append(self, item):
2995        self.items.append(item)
2996    def __reduce__(self):
2997        return type(self), (), None, iter(self.items), None
2998
2999class REX_seven(object):
3000    """This class is used to check the 5th argument (dict iterator) of
3001    the reduce protocol.
3002    """
3003    def __init__(self, table=None):
3004        self.table = table if table is not None else {}
3005    def __eq__(self, other):
3006        return type(self) is type(other) and self.table == other.table
3007    def __setitem__(self, key, value):
3008        self.table[key] = value
3009    def __reduce__(self):
3010        return type(self), (), None, None, iter(self.table.items())
3011
3012
3013# Test classes for newobj
3014
3015class MyInt(int):
3016    sample = 1
3017
3018class MyFloat(float):
3019    sample = 1.0
3020
3021class MyComplex(complex):
3022    sample = 1.0 + 0.0j
3023
3024class MyStr(str):
3025    sample = "hello"
3026
3027class MyUnicode(str):
3028    sample = "hello \u1234"
3029
3030class MyTuple(tuple):
3031    sample = (1, 2, 3)
3032
3033class MyList(list):
3034    sample = [1, 2, 3]
3035
3036class MyDict(dict):
3037    sample = {"a": 1, "b": 2}
3038
3039class MySet(set):
3040    sample = {"a", "b"}
3041
3042class MyFrozenSet(frozenset):
3043    sample = frozenset({"a", "b"})
3044
3045myclasses = [MyInt, MyFloat,
3046             MyComplex,
3047             MyStr, MyUnicode,
3048             MyTuple, MyList, MyDict, MySet, MyFrozenSet]
3049
3050
3051class SlotList(MyList):
3052    __slots__ = ["foo"]
3053
3054class SimpleNewObj(int):
3055    def __init__(self, *args, **kwargs):
3056        # raise an error, to make sure this isn't called
3057        raise TypeError("SimpleNewObj.__init__() didn't expect to get called")
3058    def __eq__(self, other):
3059        return int(self) == int(other) and self.__dict__ == other.__dict__
3060
3061class ComplexNewObj(SimpleNewObj):
3062    def __getnewargs__(self):
3063        return ('%X' % self, 16)
3064
3065class ComplexNewObjEx(SimpleNewObj):
3066    def __getnewargs_ex__(self):
3067        return ('%X' % self,), {'base': 16}
3068
3069class BadGetattr:
3070    def __getattr__(self, key):
3071        self.foo
3072
3073
3074class AbstractPickleModuleTests(unittest.TestCase):
3075
3076    def test_dump_closed_file(self):
3077        f = open(TESTFN, "wb")
3078        try:
3079            f.close()
3080            self.assertRaises(ValueError, self.dump, 123, f)
3081        finally:
3082            support.unlink(TESTFN)
3083
3084    def test_load_closed_file(self):
3085        f = open(TESTFN, "wb")
3086        try:
3087            f.close()
3088            self.assertRaises(ValueError, self.dump, 123, f)
3089        finally:
3090            support.unlink(TESTFN)
3091
3092    def test_load_from_and_dump_to_file(self):
3093        stream = io.BytesIO()
3094        data = [123, {}, 124]
3095        self.dump(data, stream)
3096        stream.seek(0)
3097        unpickled = self.load(stream)
3098        self.assertEqual(unpickled, data)
3099
3100    def test_highest_protocol(self):
3101        # Of course this needs to be changed when HIGHEST_PROTOCOL changes.
3102        self.assertEqual(pickle.HIGHEST_PROTOCOL, 5)
3103
3104    def test_callapi(self):
3105        f = io.BytesIO()
3106        # With and without keyword arguments
3107        self.dump(123, f, -1)
3108        self.dump(123, file=f, protocol=-1)
3109        self.dumps(123, -1)
3110        self.dumps(123, protocol=-1)
3111        self.Pickler(f, -1)
3112        self.Pickler(f, protocol=-1)
3113
3114    def test_dump_text_file(self):
3115        f = open(TESTFN, "w")
3116        try:
3117            for proto in protocols:
3118                self.assertRaises(TypeError, self.dump, 123, f, proto)
3119        finally:
3120            f.close()
3121            support.unlink(TESTFN)
3122
3123    def test_incomplete_input(self):
3124        s = io.BytesIO(b"X''.")
3125        self.assertRaises((EOFError, struct.error, pickle.UnpicklingError), self.load, s)
3126
3127    def test_bad_init(self):
3128        # Test issue3664 (pickle can segfault from a badly initialized Pickler).
3129        # Override initialization without calling __init__() of the superclass.
3130        class BadPickler(self.Pickler):
3131            def __init__(self): pass
3132
3133        class BadUnpickler(self.Unpickler):
3134            def __init__(self): pass
3135
3136        self.assertRaises(pickle.PicklingError, BadPickler().dump, 0)
3137        self.assertRaises(pickle.UnpicklingError, BadUnpickler().load)
3138
3139    def check_dumps_loads_oob_buffers(self, dumps, loads):
3140        # No need to do the full gamut of tests here, just enough to
3141        # check that dumps() and loads() redirect their arguments
3142        # to the underlying Pickler and Unpickler, respectively.
3143        obj = ZeroCopyBytes(b"foo")
3144
3145        for proto in range(0, 5):
3146            # Need protocol >= 5 for buffer_callback
3147            with self.assertRaises(ValueError):
3148                dumps(obj, protocol=proto,
3149                      buffer_callback=[].append)
3150        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
3151            buffers = []
3152            buffer_callback = buffers.append
3153            data = dumps(obj, protocol=proto,
3154                         buffer_callback=buffer_callback)
3155            self.assertNotIn(b"foo", data)
3156            self.assertEqual(bytes(buffers[0]), b"foo")
3157            # Need buffers argument to unpickle properly
3158            with self.assertRaises(pickle.UnpicklingError):
3159                loads(data)
3160            new = loads(data, buffers=buffers)
3161            self.assertIs(new, obj)
3162
3163    def test_dumps_loads_oob_buffers(self):
3164        # Test out-of-band buffers (PEP 574) with top-level dumps() and loads()
3165        self.check_dumps_loads_oob_buffers(self.dumps, self.loads)
3166
3167    def test_dump_load_oob_buffers(self):
3168        # Test out-of-band buffers (PEP 574) with top-level dump() and load()
3169        def dumps(obj, **kwargs):
3170            f = io.BytesIO()
3171            self.dump(obj, f, **kwargs)
3172            return f.getvalue()
3173
3174        def loads(data, **kwargs):
3175            f = io.BytesIO(data)
3176            return self.load(f, **kwargs)
3177
3178        self.check_dumps_loads_oob_buffers(dumps, loads)
3179
3180
3181class AbstractPersistentPicklerTests(unittest.TestCase):
3182
3183    # This class defines persistent_id() and persistent_load()
3184    # functions that should be used by the pickler.  All even integers
3185    # are pickled using persistent ids.
3186
3187    def persistent_id(self, object):
3188        if isinstance(object, int) and object % 2 == 0:
3189            self.id_count += 1
3190            return str(object)
3191        elif object == "test_false_value":
3192            self.false_count += 1
3193            return ""
3194        else:
3195            return None
3196
3197    def persistent_load(self, oid):
3198        if not oid:
3199            self.load_false_count += 1
3200            return "test_false_value"
3201        else:
3202            self.load_count += 1
3203            object = int(oid)
3204            assert object % 2 == 0
3205            return object
3206
3207    def test_persistence(self):
3208        L = list(range(10)) + ["test_false_value"]
3209        for proto in protocols:
3210            self.id_count = 0
3211            self.false_count = 0
3212            self.load_false_count = 0
3213            self.load_count = 0
3214            self.assertEqual(self.loads(self.dumps(L, proto)), L)
3215            self.assertEqual(self.id_count, 5)
3216            self.assertEqual(self.false_count, 1)
3217            self.assertEqual(self.load_count, 5)
3218            self.assertEqual(self.load_false_count, 1)
3219
3220
3221class AbstractIdentityPersistentPicklerTests(unittest.TestCase):
3222
3223    def persistent_id(self, obj):
3224        return obj
3225
3226    def persistent_load(self, pid):
3227        return pid
3228
3229    def _check_return_correct_type(self, obj, proto):
3230        unpickled = self.loads(self.dumps(obj, proto))
3231        self.assertIsInstance(unpickled, type(obj))
3232        self.assertEqual(unpickled, obj)
3233
3234    def test_return_correct_type(self):
3235        for proto in protocols:
3236            # Protocol 0 supports only ASCII strings.
3237            if proto == 0:
3238                self._check_return_correct_type("abc", 0)
3239            else:
3240                for obj in [b"abc\n", "abc\n", -1, -1.1 * 0.1, str]:
3241                    self._check_return_correct_type(obj, proto)
3242
3243    def test_protocol0_is_ascii_only(self):
3244        non_ascii_str = "\N{EMPTY SET}"
3245        self.assertRaises(pickle.PicklingError, self.dumps, non_ascii_str, 0)
3246        pickled = pickle.PERSID + non_ascii_str.encode('utf-8') + b'\n.'
3247        self.assertRaises(pickle.UnpicklingError, self.loads, pickled)
3248
3249
3250class AbstractPicklerUnpicklerObjectTests(unittest.TestCase):
3251
3252    pickler_class = None
3253    unpickler_class = None
3254
3255    def setUp(self):
3256        assert self.pickler_class
3257        assert self.unpickler_class
3258
3259    def test_clear_pickler_memo(self):
3260        # To test whether clear_memo() has any effect, we pickle an object,
3261        # then pickle it again without clearing the memo; the two serialized
3262        # forms should be different. If we clear_memo() and then pickle the
3263        # object again, the third serialized form should be identical to the
3264        # first one we obtained.
3265        data = ["abcdefg", "abcdefg", 44]
3266        for proto in protocols:
3267            f = io.BytesIO()
3268            pickler = self.pickler_class(f, proto)
3269
3270            pickler.dump(data)
3271            first_pickled = f.getvalue()
3272
3273            # Reset BytesIO object.
3274            f.seek(0)
3275            f.truncate()
3276
3277            pickler.dump(data)
3278            second_pickled = f.getvalue()
3279
3280            # Reset the Pickler and BytesIO objects.
3281            pickler.clear_memo()
3282            f.seek(0)
3283            f.truncate()
3284
3285            pickler.dump(data)
3286            third_pickled = f.getvalue()
3287
3288            self.assertNotEqual(first_pickled, second_pickled)
3289            self.assertEqual(first_pickled, third_pickled)
3290
3291    def test_priming_pickler_memo(self):
3292        # Verify that we can set the Pickler's memo attribute.
3293        data = ["abcdefg", "abcdefg", 44]
3294        f = io.BytesIO()
3295        pickler = self.pickler_class(f)
3296
3297        pickler.dump(data)
3298        first_pickled = f.getvalue()
3299
3300        f = io.BytesIO()
3301        primed = self.pickler_class(f)
3302        primed.memo = pickler.memo
3303
3304        primed.dump(data)
3305        primed_pickled = f.getvalue()
3306
3307        self.assertNotEqual(first_pickled, primed_pickled)
3308
3309    def test_priming_unpickler_memo(self):
3310        # Verify that we can set the Unpickler's memo attribute.
3311        data = ["abcdefg", "abcdefg", 44]
3312        f = io.BytesIO()
3313        pickler = self.pickler_class(f)
3314
3315        pickler.dump(data)
3316        first_pickled = f.getvalue()
3317
3318        f = io.BytesIO()
3319        primed = self.pickler_class(f)
3320        primed.memo = pickler.memo
3321
3322        primed.dump(data)
3323        primed_pickled = f.getvalue()
3324
3325        unpickler = self.unpickler_class(io.BytesIO(first_pickled))
3326        unpickled_data1 = unpickler.load()
3327
3328        self.assertEqual(unpickled_data1, data)
3329
3330        primed = self.unpickler_class(io.BytesIO(primed_pickled))
3331        primed.memo = unpickler.memo
3332        unpickled_data2 = primed.load()
3333
3334        primed.memo.clear()
3335
3336        self.assertEqual(unpickled_data2, data)
3337        self.assertTrue(unpickled_data2 is unpickled_data1)
3338
3339    def test_reusing_unpickler_objects(self):
3340        data1 = ["abcdefg", "abcdefg", 44]
3341        f = io.BytesIO()
3342        pickler = self.pickler_class(f)
3343        pickler.dump(data1)
3344        pickled1 = f.getvalue()
3345
3346        data2 = ["abcdefg", 44, 44]
3347        f = io.BytesIO()
3348        pickler = self.pickler_class(f)
3349        pickler.dump(data2)
3350        pickled2 = f.getvalue()
3351
3352        f = io.BytesIO()
3353        f.write(pickled1)
3354        f.seek(0)
3355        unpickler = self.unpickler_class(f)
3356        self.assertEqual(unpickler.load(), data1)
3357
3358        f.seek(0)
3359        f.truncate()
3360        f.write(pickled2)
3361        f.seek(0)
3362        self.assertEqual(unpickler.load(), data2)
3363
3364    def _check_multiple_unpicklings(self, ioclass):
3365        for proto in protocols:
3366            with self.subTest(proto=proto):
3367                data1 = [(x, str(x)) for x in range(2000)] + [b"abcde", len]
3368                f = ioclass()
3369                pickler = self.pickler_class(f, protocol=proto)
3370                pickler.dump(data1)
3371                pickled = f.getvalue()
3372
3373                N = 5
3374                f = ioclass(pickled * N)
3375                unpickler = self.unpickler_class(f)
3376                for i in range(N):
3377                    if f.seekable():
3378                        pos = f.tell()
3379                    self.assertEqual(unpickler.load(), data1)
3380                    if f.seekable():
3381                        self.assertEqual(f.tell(), pos + len(pickled))
3382                self.assertRaises(EOFError, unpickler.load)
3383
3384    def test_multiple_unpicklings_seekable(self):
3385        self._check_multiple_unpicklings(io.BytesIO)
3386
3387    def test_multiple_unpicklings_unseekable(self):
3388        self._check_multiple_unpicklings(UnseekableIO)
3389
3390    def test_unpickling_buffering_readline(self):
3391        # Issue #12687: the unpickler's buffering logic could fail with
3392        # text mode opcodes.
3393        data = list(range(10))
3394        for proto in protocols:
3395            for buf_size in range(1, 11):
3396                f = io.BufferedRandom(io.BytesIO(), buffer_size=buf_size)
3397                pickler = self.pickler_class(f, protocol=proto)
3398                pickler.dump(data)
3399                f.seek(0)
3400                unpickler = self.unpickler_class(f)
3401                self.assertEqual(unpickler.load(), data)
3402
3403
3404# Tests for dispatch_table attribute
3405
3406REDUCE_A = 'reduce_A'
3407
3408class AAA(object):
3409    def __reduce__(self):
3410        return str, (REDUCE_A,)
3411
3412class BBB(object):
3413    def __init__(self):
3414        # Add an instance attribute to enable state-saving routines at pickling
3415        # time.
3416        self.a = "some attribute"
3417
3418    def __setstate__(self, state):
3419        self.a = "BBB.__setstate__"
3420
3421
3422def setstate_bbb(obj, state):
3423    """Custom state setter for BBB objects
3424
3425    Such callable may be created by other persons than the ones who created the
3426    BBB class. If passed as the state_setter item of a custom reducer, this
3427    allows for custom state setting behavior of BBB objects. One can think of
3428    it as the analogous of list_setitems or dict_setitems but for foreign
3429    classes/functions.
3430    """
3431    obj.a = "custom state_setter"
3432
3433
3434
3435class AbstractCustomPicklerClass:
3436    """Pickler implementing a reducing hook using reducer_override."""
3437    def reducer_override(self, obj):
3438        obj_name = getattr(obj, "__name__", None)
3439
3440        if obj_name == 'f':
3441            # asking the pickler to save f as 5
3442            return int, (5, )
3443
3444        if obj_name == 'MyClass':
3445            return str, ('some str',)
3446
3447        elif obj_name == 'g':
3448            # in this case, the callback returns an invalid result (not a 2-5
3449            # tuple or a string), the pickler should raise a proper error.
3450            return False
3451
3452        elif obj_name == 'h':
3453            # Simulate a case when the reducer fails. The error should
3454            # be propagated to the original ``dump`` call.
3455            raise ValueError('The reducer just failed')
3456
3457        return NotImplemented
3458
3459class AbstractHookTests(unittest.TestCase):
3460    def test_pickler_hook(self):
3461        # test the ability of a custom, user-defined CPickler subclass to
3462        # override the default reducing routines of any type using the method
3463        # reducer_override
3464
3465        def f():
3466            pass
3467
3468        def g():
3469            pass
3470
3471        def h():
3472            pass
3473
3474        class MyClass:
3475            pass
3476
3477        for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
3478            with self.subTest(proto=proto):
3479                bio = io.BytesIO()
3480                p = self.pickler_class(bio, proto)
3481
3482                p.dump([f, MyClass, math.log])
3483                new_f, some_str, math_log = pickle.loads(bio.getvalue())
3484
3485                self.assertEqual(new_f, 5)
3486                self.assertEqual(some_str, 'some str')
3487                # math.log does not have its usual reducer overriden, so the
3488                # custom reduction callback should silently direct the pickler
3489                # to the default pickling by attribute, by returning
3490                # NotImplemented
3491                self.assertIs(math_log, math.log)
3492
3493                with self.assertRaises(pickle.PicklingError):
3494                    p.dump(g)
3495
3496                with self.assertRaisesRegex(
3497                        ValueError, 'The reducer just failed'):
3498                    p.dump(h)
3499
3500
3501class AbstractDispatchTableTests(unittest.TestCase):
3502
3503    def test_default_dispatch_table(self):
3504        # No dispatch_table attribute by default
3505        f = io.BytesIO()
3506        p = self.pickler_class(f, 0)
3507        with self.assertRaises(AttributeError):
3508            p.dispatch_table
3509        self.assertFalse(hasattr(p, 'dispatch_table'))
3510
3511    def test_class_dispatch_table(self):
3512        # A dispatch_table attribute can be specified class-wide
3513        dt = self.get_dispatch_table()
3514
3515        class MyPickler(self.pickler_class):
3516            dispatch_table = dt
3517
3518        def dumps(obj, protocol=None):
3519            f = io.BytesIO()
3520            p = MyPickler(f, protocol)
3521            self.assertEqual(p.dispatch_table, dt)
3522            p.dump(obj)
3523            return f.getvalue()
3524
3525        self._test_dispatch_table(dumps, dt)
3526
3527    def test_instance_dispatch_table(self):
3528        # A dispatch_table attribute can also be specified instance-wide
3529        dt = self.get_dispatch_table()
3530
3531        def dumps(obj, protocol=None):
3532            f = io.BytesIO()
3533            p = self.pickler_class(f, protocol)
3534            p.dispatch_table = dt
3535            self.assertEqual(p.dispatch_table, dt)
3536            p.dump(obj)
3537            return f.getvalue()
3538
3539        self._test_dispatch_table(dumps, dt)
3540
3541    def _test_dispatch_table(self, dumps, dispatch_table):
3542        def custom_load_dump(obj):
3543            return pickle.loads(dumps(obj, 0))
3544
3545        def default_load_dump(obj):
3546            return pickle.loads(pickle.dumps(obj, 0))
3547
3548        # pickling complex numbers using protocol 0 relies on copyreg
3549        # so check pickling a complex number still works
3550        z = 1 + 2j
3551        self.assertEqual(custom_load_dump(z), z)
3552        self.assertEqual(default_load_dump(z), z)
3553
3554        # modify pickling of complex
3555        REDUCE_1 = 'reduce_1'
3556        def reduce_1(obj):
3557            return str, (REDUCE_1,)
3558        dispatch_table[complex] = reduce_1
3559        self.assertEqual(custom_load_dump(z), REDUCE_1)
3560        self.assertEqual(default_load_dump(z), z)
3561
3562        # check picklability of AAA and BBB
3563        a = AAA()
3564        b = BBB()
3565        self.assertEqual(custom_load_dump(a), REDUCE_A)
3566        self.assertIsInstance(custom_load_dump(b), BBB)
3567        self.assertEqual(default_load_dump(a), REDUCE_A)
3568        self.assertIsInstance(default_load_dump(b), BBB)
3569
3570        # modify pickling of BBB
3571        dispatch_table[BBB] = reduce_1
3572        self.assertEqual(custom_load_dump(a), REDUCE_A)
3573        self.assertEqual(custom_load_dump(b), REDUCE_1)
3574        self.assertEqual(default_load_dump(a), REDUCE_A)
3575        self.assertIsInstance(default_load_dump(b), BBB)
3576
3577        # revert pickling of BBB and modify pickling of AAA
3578        REDUCE_2 = 'reduce_2'
3579        def reduce_2(obj):
3580            return str, (REDUCE_2,)
3581        dispatch_table[AAA] = reduce_2
3582        del dispatch_table[BBB]
3583        self.assertEqual(custom_load_dump(a), REDUCE_2)
3584        self.assertIsInstance(custom_load_dump(b), BBB)
3585        self.assertEqual(default_load_dump(a), REDUCE_A)
3586        self.assertIsInstance(default_load_dump(b), BBB)
3587
3588        # End-to-end testing of save_reduce with the state_setter keyword
3589        # argument. This is a dispatch_table test as the primary goal of
3590        # state_setter is to tweak objects reduction behavior.
3591        # In particular, state_setter is useful when the default __setstate__
3592        # behavior is not flexible enough.
3593
3594        # No custom reducer for b has been registered for now, so
3595        # BBB.__setstate__ should be used at unpickling time
3596        self.assertEqual(default_load_dump(b).a, "BBB.__setstate__")
3597
3598        def reduce_bbb(obj):
3599            return BBB, (), obj.__dict__, None, None, setstate_bbb
3600
3601        dispatch_table[BBB] = reduce_bbb
3602
3603        # The custom reducer reduce_bbb includes a state setter, that should
3604        # have priority over BBB.__setstate__
3605        self.assertEqual(custom_load_dump(b).a, "custom state_setter")
3606
3607
3608if __name__ == "__main__":
3609    # Print some stuff that can be used to rewrite DATA{0,1,2}
3610    from pickletools import dis
3611    x = create_data()
3612    for i in range(pickle.HIGHEST_PROTOCOL+1):
3613        p = pickle.dumps(x, i)
3614        print("DATA{0} = (".format(i))
3615        for j in range(0, len(p), 20):
3616            b = bytes(p[j:j+20])
3617            print("    {0!r}".format(b))
3618        print(")")
3619        print()
3620        print("# Disassembly of DATA{0}".format(i))
3621        print("DATA{0}_DIS = \"\"\"\\".format(i))
3622        dis(p)
3623        print("\"\"\"")
3624        print()
3625