• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* pickle accelerator C extensor: _pickle module.
2  *
3  * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4  * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5  * platforms. */
6 
7 #if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8 #  error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
9 #endif
10 
11 #include "Python.h"
12 #include "structmember.h"         // PyMemberDef
13 
14 PyDoc_STRVAR(pickle_module_doc,
15 "Optimized C implementation for the Python pickle module.");
16 
17 /*[clinic input]
18 module _pickle
19 class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
20 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
21 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
22 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
23 [clinic start generated code]*/
24 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
25 
26 /* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
27    Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
28    already includes it. */
29 enum {
30     HIGHEST_PROTOCOL = 5,
31     DEFAULT_PROTOCOL = 4
32 };
33 
34 /* Pickle opcodes. These must be kept updated with pickle.py.
35    Extensive docs are in pickletools.py. */
36 enum opcode {
37     MARK            = '(',
38     STOP            = '.',
39     POP             = '0',
40     POP_MARK        = '1',
41     DUP             = '2',
42     FLOAT           = 'F',
43     INT             = 'I',
44     BININT          = 'J',
45     BININT1         = 'K',
46     LONG            = 'L',
47     BININT2         = 'M',
48     NONE            = 'N',
49     PERSID          = 'P',
50     BINPERSID       = 'Q',
51     REDUCE          = 'R',
52     STRING          = 'S',
53     BINSTRING       = 'T',
54     SHORT_BINSTRING = 'U',
55     UNICODE         = 'V',
56     BINUNICODE      = 'X',
57     APPEND          = 'a',
58     BUILD           = 'b',
59     GLOBAL          = 'c',
60     DICT            = 'd',
61     EMPTY_DICT      = '}',
62     APPENDS         = 'e',
63     GET             = 'g',
64     BINGET          = 'h',
65     INST            = 'i',
66     LONG_BINGET     = 'j',
67     LIST            = 'l',
68     EMPTY_LIST      = ']',
69     OBJ             = 'o',
70     PUT             = 'p',
71     BINPUT          = 'q',
72     LONG_BINPUT     = 'r',
73     SETITEM         = 's',
74     TUPLE           = 't',
75     EMPTY_TUPLE     = ')',
76     SETITEMS        = 'u',
77     BINFLOAT        = 'G',
78 
79     /* Protocol 2. */
80     PROTO       = '\x80',
81     NEWOBJ      = '\x81',
82     EXT1        = '\x82',
83     EXT2        = '\x83',
84     EXT4        = '\x84',
85     TUPLE1      = '\x85',
86     TUPLE2      = '\x86',
87     TUPLE3      = '\x87',
88     NEWTRUE     = '\x88',
89     NEWFALSE    = '\x89',
90     LONG1       = '\x8a',
91     LONG4       = '\x8b',
92 
93     /* Protocol 3 (Python 3.x) */
94     BINBYTES       = 'B',
95     SHORT_BINBYTES = 'C',
96 
97     /* Protocol 4 */
98     SHORT_BINUNICODE = '\x8c',
99     BINUNICODE8      = '\x8d',
100     BINBYTES8        = '\x8e',
101     EMPTY_SET        = '\x8f',
102     ADDITEMS         = '\x90',
103     FROZENSET        = '\x91',
104     NEWOBJ_EX        = '\x92',
105     STACK_GLOBAL     = '\x93',
106     MEMOIZE          = '\x94',
107     FRAME            = '\x95',
108 
109     /* Protocol 5 */
110     BYTEARRAY8       = '\x96',
111     NEXT_BUFFER      = '\x97',
112     READONLY_BUFFER  = '\x98'
113 };
114 
115 enum {
116    /* Keep in synch with pickle.Pickler._BATCHSIZE.  This is how many elements
117       batch_list/dict() pumps out before doing APPENDS/SETITEMS.  Nothing will
118       break if this gets out of synch with pickle.py, but it's unclear that would
119       help anything either. */
120     BATCHSIZE = 1000,
121 
122     /* Nesting limit until Pickler, when running in "fast mode", starts
123        checking for self-referential data-structures. */
124     FAST_NESTING_LIMIT = 50,
125 
126     /* Initial size of the write buffer of Pickler. */
127     WRITE_BUF_SIZE = 4096,
128 
129     /* Prefetch size when unpickling (disabled on unpeekable streams) */
130     PREFETCH = 8192 * 16,
131 
132     FRAME_SIZE_MIN = 4,
133     FRAME_SIZE_TARGET = 64 * 1024,
134     FRAME_HEADER_SIZE = 9
135 };
136 
137 /*************************************************************************/
138 
139 /* State of the pickle module, per PEP 3121. */
140 typedef struct {
141     /* Exception classes for pickle. */
142     PyObject *PickleError;
143     PyObject *PicklingError;
144     PyObject *UnpicklingError;
145 
146     /* copyreg.dispatch_table, {type_object: pickling_function} */
147     PyObject *dispatch_table;
148 
149     /* For the extension opcodes EXT1, EXT2 and EXT4. */
150 
151     /* copyreg._extension_registry, {(module_name, function_name): code} */
152     PyObject *extension_registry;
153     /* copyreg._extension_cache, {code: object} */
154     PyObject *extension_cache;
155     /* copyreg._inverted_registry, {code: (module_name, function_name)} */
156     PyObject *inverted_registry;
157 
158     /* Import mappings for compatibility with Python 2.x */
159 
160     /* _compat_pickle.NAME_MAPPING,
161        {(oldmodule, oldname): (newmodule, newname)} */
162     PyObject *name_mapping_2to3;
163     /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
164     PyObject *import_mapping_2to3;
165     /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
166     PyObject *name_mapping_3to2;
167     PyObject *import_mapping_3to2;
168 
169     /* codecs.encode, used for saving bytes in older protocols */
170     PyObject *codecs_encode;
171     /* builtins.getattr, used for saving nested names with protocol < 4 */
172     PyObject *getattr;
173     /* functools.partial, used for implementing __newobj_ex__ with protocols
174        2 and 3 */
175     PyObject *partial;
176 } PickleState;
177 
178 /* Forward declaration of the _pickle module definition. */
179 static struct PyModuleDef _picklemodule;
180 
181 /* Given a module object, get its per-module state. */
182 static PickleState *
_Pickle_GetState(PyObject * module)183 _Pickle_GetState(PyObject *module)
184 {
185     return (PickleState *)PyModule_GetState(module);
186 }
187 
188 /* Find the module instance imported in the currently running sub-interpreter
189    and get its state. */
190 static PickleState *
_Pickle_GetGlobalState(void)191 _Pickle_GetGlobalState(void)
192 {
193     return _Pickle_GetState(PyState_FindModule(&_picklemodule));
194 }
195 
196 /* Clear the given pickle module state. */
197 static void
_Pickle_ClearState(PickleState * st)198 _Pickle_ClearState(PickleState *st)
199 {
200     Py_CLEAR(st->PickleError);
201     Py_CLEAR(st->PicklingError);
202     Py_CLEAR(st->UnpicklingError);
203     Py_CLEAR(st->dispatch_table);
204     Py_CLEAR(st->extension_registry);
205     Py_CLEAR(st->extension_cache);
206     Py_CLEAR(st->inverted_registry);
207     Py_CLEAR(st->name_mapping_2to3);
208     Py_CLEAR(st->import_mapping_2to3);
209     Py_CLEAR(st->name_mapping_3to2);
210     Py_CLEAR(st->import_mapping_3to2);
211     Py_CLEAR(st->codecs_encode);
212     Py_CLEAR(st->getattr);
213     Py_CLEAR(st->partial);
214 }
215 
216 /* Initialize the given pickle module state. */
217 static int
_Pickle_InitState(PickleState * st)218 _Pickle_InitState(PickleState *st)
219 {
220     PyObject *copyreg = NULL;
221     PyObject *compat_pickle = NULL;
222     PyObject *codecs = NULL;
223     PyObject *functools = NULL;
224     _Py_IDENTIFIER(getattr);
225 
226     st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
227     if (st->getattr == NULL)
228         goto error;
229 
230     copyreg = PyImport_ImportModule("copyreg");
231     if (!copyreg)
232         goto error;
233     st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
234     if (!st->dispatch_table)
235         goto error;
236     if (!PyDict_CheckExact(st->dispatch_table)) {
237         PyErr_Format(PyExc_RuntimeError,
238                      "copyreg.dispatch_table should be a dict, not %.200s",
239                      Py_TYPE(st->dispatch_table)->tp_name);
240         goto error;
241     }
242     st->extension_registry = \
243         PyObject_GetAttrString(copyreg, "_extension_registry");
244     if (!st->extension_registry)
245         goto error;
246     if (!PyDict_CheckExact(st->extension_registry)) {
247         PyErr_Format(PyExc_RuntimeError,
248                      "copyreg._extension_registry should be a dict, "
249                      "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
250         goto error;
251     }
252     st->inverted_registry = \
253         PyObject_GetAttrString(copyreg, "_inverted_registry");
254     if (!st->inverted_registry)
255         goto error;
256     if (!PyDict_CheckExact(st->inverted_registry)) {
257         PyErr_Format(PyExc_RuntimeError,
258                      "copyreg._inverted_registry should be a dict, "
259                      "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
260         goto error;
261     }
262     st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
263     if (!st->extension_cache)
264         goto error;
265     if (!PyDict_CheckExact(st->extension_cache)) {
266         PyErr_Format(PyExc_RuntimeError,
267                      "copyreg._extension_cache should be a dict, "
268                      "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
269         goto error;
270     }
271     Py_CLEAR(copyreg);
272 
273     /* Load the 2.x -> 3.x stdlib module mapping tables */
274     compat_pickle = PyImport_ImportModule("_compat_pickle");
275     if (!compat_pickle)
276         goto error;
277     st->name_mapping_2to3 = \
278         PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
279     if (!st->name_mapping_2to3)
280         goto error;
281     if (!PyDict_CheckExact(st->name_mapping_2to3)) {
282         PyErr_Format(PyExc_RuntimeError,
283                      "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
284                      Py_TYPE(st->name_mapping_2to3)->tp_name);
285         goto error;
286     }
287     st->import_mapping_2to3 = \
288         PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
289     if (!st->import_mapping_2to3)
290         goto error;
291     if (!PyDict_CheckExact(st->import_mapping_2to3)) {
292         PyErr_Format(PyExc_RuntimeError,
293                      "_compat_pickle.IMPORT_MAPPING should be a dict, "
294                      "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
295         goto error;
296     }
297     /* ... and the 3.x -> 2.x mapping tables */
298     st->name_mapping_3to2 = \
299         PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
300     if (!st->name_mapping_3to2)
301         goto error;
302     if (!PyDict_CheckExact(st->name_mapping_3to2)) {
303         PyErr_Format(PyExc_RuntimeError,
304                      "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
305                      "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
306         goto error;
307     }
308     st->import_mapping_3to2 = \
309         PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
310     if (!st->import_mapping_3to2)
311         goto error;
312     if (!PyDict_CheckExact(st->import_mapping_3to2)) {
313         PyErr_Format(PyExc_RuntimeError,
314                      "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
315                      "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
316         goto error;
317     }
318     Py_CLEAR(compat_pickle);
319 
320     codecs = PyImport_ImportModule("codecs");
321     if (codecs == NULL)
322         goto error;
323     st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
324     if (st->codecs_encode == NULL) {
325         goto error;
326     }
327     if (!PyCallable_Check(st->codecs_encode)) {
328         PyErr_Format(PyExc_RuntimeError,
329                      "codecs.encode should be a callable, not %.200s",
330                      Py_TYPE(st->codecs_encode)->tp_name);
331         goto error;
332     }
333     Py_CLEAR(codecs);
334 
335     functools = PyImport_ImportModule("functools");
336     if (!functools)
337         goto error;
338     st->partial = PyObject_GetAttrString(functools, "partial");
339     if (!st->partial)
340         goto error;
341     Py_CLEAR(functools);
342 
343     return 0;
344 
345   error:
346     Py_CLEAR(copyreg);
347     Py_CLEAR(compat_pickle);
348     Py_CLEAR(codecs);
349     Py_CLEAR(functools);
350     _Pickle_ClearState(st);
351     return -1;
352 }
353 
354 /* Helper for calling a function with a single argument quickly.
355 
356    This function steals the reference of the given argument. */
357 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)358 _Pickle_FastCall(PyObject *func, PyObject *obj)
359 {
360     PyObject *result;
361 
362     result = PyObject_CallOneArg(func, obj);
363     Py_DECREF(obj);
364     return result;
365 }
366 
367 /*************************************************************************/
368 
369 /* Retrieve and deconstruct a method for avoiding a reference cycle
370    (pickler -> bound method of pickler -> pickler) */
371 static int
init_method_ref(PyObject * self,_Py_Identifier * name,PyObject ** method_func,PyObject ** method_self)372 init_method_ref(PyObject *self, _Py_Identifier *name,
373                 PyObject **method_func, PyObject **method_self)
374 {
375     PyObject *func, *func2;
376     int ret;
377 
378     /* *method_func and *method_self should be consistent.  All refcount decrements
379        should be occurred after setting *method_self and *method_func. */
380     ret = _PyObject_LookupAttrId(self, name, &func);
381     if (func == NULL) {
382         *method_self = NULL;
383         Py_CLEAR(*method_func);
384         return ret;
385     }
386 
387     if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
388         /* Deconstruct a bound Python method */
389         func2 = PyMethod_GET_FUNCTION(func);
390         Py_INCREF(func2);
391         *method_self = self; /* borrowed */
392         Py_XSETREF(*method_func, func2);
393         Py_DECREF(func);
394         return 0;
395     }
396     else {
397         *method_self = NULL;
398         Py_XSETREF(*method_func, func);
399         return 0;
400     }
401 }
402 
403 /* Bind a method if it was deconstructed */
404 static PyObject *
reconstruct_method(PyObject * func,PyObject * self)405 reconstruct_method(PyObject *func, PyObject *self)
406 {
407     if (self) {
408         return PyMethod_New(func, self);
409     }
410     else {
411         Py_INCREF(func);
412         return func;
413     }
414 }
415 
416 static PyObject *
call_method(PyObject * func,PyObject * self,PyObject * obj)417 call_method(PyObject *func, PyObject *self, PyObject *obj)
418 {
419     if (self) {
420         return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
421     }
422     else {
423         return PyObject_CallOneArg(func, obj);
424     }
425 }
426 
427 /*************************************************************************/
428 
429 /* Internal data type used as the unpickling stack. */
430 typedef struct {
431     PyObject_VAR_HEAD
432     PyObject **data;
433     int mark_set;          /* is MARK set? */
434     Py_ssize_t fence;      /* position of top MARK or 0 */
435     Py_ssize_t allocated;  /* number of slots in data allocated */
436 } Pdata;
437 
438 static void
Pdata_dealloc(Pdata * self)439 Pdata_dealloc(Pdata *self)
440 {
441     Py_ssize_t i = Py_SIZE(self);
442     while (--i >= 0) {
443         Py_DECREF(self->data[i]);
444     }
445     PyMem_FREE(self->data);
446     PyObject_Del(self);
447 }
448 
449 static PyTypeObject Pdata_Type = {
450     PyVarObject_HEAD_INIT(NULL, 0)
451     "_pickle.Pdata",              /*tp_name*/
452     sizeof(Pdata),                /*tp_basicsize*/
453     sizeof(PyObject *),           /*tp_itemsize*/
454     (destructor)Pdata_dealloc,    /*tp_dealloc*/
455 };
456 
457 static PyObject *
Pdata_New(void)458 Pdata_New(void)
459 {
460     Pdata *self;
461 
462     if (!(self = PyObject_New(Pdata, &Pdata_Type)))
463         return NULL;
464     Py_SET_SIZE(self, 0);
465     self->mark_set = 0;
466     self->fence = 0;
467     self->allocated = 8;
468     self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
469     if (self->data)
470         return (PyObject *)self;
471     Py_DECREF(self);
472     return PyErr_NoMemory();
473 }
474 
475 
476 /* Retain only the initial clearto items.  If clearto >= the current
477  * number of items, this is a (non-erroneous) NOP.
478  */
479 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)480 Pdata_clear(Pdata *self, Py_ssize_t clearto)
481 {
482     Py_ssize_t i = Py_SIZE(self);
483 
484     assert(clearto >= self->fence);
485     if (clearto >= i)
486         return 0;
487 
488     while (--i >= clearto) {
489         Py_CLEAR(self->data[i]);
490     }
491     Py_SET_SIZE(self, clearto);
492     return 0;
493 }
494 
495 static int
Pdata_grow(Pdata * self)496 Pdata_grow(Pdata *self)
497 {
498     PyObject **data = self->data;
499     size_t allocated = (size_t)self->allocated;
500     size_t new_allocated;
501 
502     new_allocated = (allocated >> 3) + 6;
503     /* check for integer overflow */
504     if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
505         goto nomemory;
506     new_allocated += allocated;
507     PyMem_RESIZE(data, PyObject *, new_allocated);
508     if (data == NULL)
509         goto nomemory;
510 
511     self->data = data;
512     self->allocated = (Py_ssize_t)new_allocated;
513     return 0;
514 
515   nomemory:
516     PyErr_NoMemory();
517     return -1;
518 }
519 
520 static int
Pdata_stack_underflow(Pdata * self)521 Pdata_stack_underflow(Pdata *self)
522 {
523     PickleState *st = _Pickle_GetGlobalState();
524     PyErr_SetString(st->UnpicklingError,
525                     self->mark_set ?
526                     "unexpected MARK found" :
527                     "unpickling stack underflow");
528     return -1;
529 }
530 
531 /* D is a Pdata*.  Pop the topmost element and store it into V, which
532  * must be an lvalue holding PyObject*.  On stack underflow, UnpicklingError
533  * is raised and V is set to NULL.
534  */
535 static PyObject *
Pdata_pop(Pdata * self)536 Pdata_pop(Pdata *self)
537 {
538     if (Py_SIZE(self) <= self->fence) {
539         Pdata_stack_underflow(self);
540         return NULL;
541     }
542     Py_SET_SIZE(self, Py_SIZE(self) - 1);
543     return self->data[Py_SIZE(self)];
544 }
545 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
546 
547 static int
Pdata_push(Pdata * self,PyObject * obj)548 Pdata_push(Pdata *self, PyObject *obj)
549 {
550     if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
551         return -1;
552     }
553     self->data[Py_SIZE(self)] = obj;
554     Py_SET_SIZE(self, Py_SIZE(self) + 1);
555     return 0;
556 }
557 
558 /* Push an object on stack, transferring its ownership to the stack. */
559 #define PDATA_PUSH(D, O, ER) do {                               \
560         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
561 
562 /* Push an object on stack, adding a new reference to the object. */
563 #define PDATA_APPEND(D, O, ER) do {                             \
564         Py_INCREF((O));                                         \
565         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
566 
567 static PyObject *
Pdata_poptuple(Pdata * self,Py_ssize_t start)568 Pdata_poptuple(Pdata *self, Py_ssize_t start)
569 {
570     PyObject *tuple;
571     Py_ssize_t len, i, j;
572 
573     if (start < self->fence) {
574         Pdata_stack_underflow(self);
575         return NULL;
576     }
577     len = Py_SIZE(self) - start;
578     tuple = PyTuple_New(len);
579     if (tuple == NULL)
580         return NULL;
581     for (i = start, j = 0; j < len; i++, j++)
582         PyTuple_SET_ITEM(tuple, j, self->data[i]);
583 
584     Py_SET_SIZE(self, start);
585     return tuple;
586 }
587 
588 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)589 Pdata_poplist(Pdata *self, Py_ssize_t start)
590 {
591     PyObject *list;
592     Py_ssize_t len, i, j;
593 
594     len = Py_SIZE(self) - start;
595     list = PyList_New(len);
596     if (list == NULL)
597         return NULL;
598     for (i = start, j = 0; j < len; i++, j++)
599         PyList_SET_ITEM(list, j, self->data[i]);
600 
601     Py_SET_SIZE(self, start);
602     return list;
603 }
604 
605 typedef struct {
606     PyObject *me_key;
607     Py_ssize_t me_value;
608 } PyMemoEntry;
609 
610 typedef struct {
611     size_t mt_mask;
612     size_t mt_used;
613     size_t mt_allocated;
614     PyMemoEntry *mt_table;
615 } PyMemoTable;
616 
617 typedef struct PicklerObject {
618     PyObject_HEAD
619     PyMemoTable *memo;          /* Memo table, keep track of the seen
620                                    objects to support self-referential objects
621                                    pickling. */
622     PyObject *pers_func;        /* persistent_id() method, can be NULL */
623     PyObject *pers_func_self;   /* borrowed reference to self if pers_func
624                                    is an unbound method, NULL otherwise */
625     PyObject *dispatch_table;   /* private dispatch_table, can be NULL */
626     PyObject *reducer_override; /* hook for invoking user-defined callbacks
627                                    instead of save_global when pickling
628                                    functions and classes*/
629 
630     PyObject *write;            /* write() method of the output stream. */
631     PyObject *output_buffer;    /* Write into a local bytearray buffer before
632                                    flushing to the stream. */
633     Py_ssize_t output_len;      /* Length of output_buffer. */
634     Py_ssize_t max_output_len;  /* Allocation size of output_buffer. */
635     int proto;                  /* Pickle protocol number, >= 0 */
636     int bin;                    /* Boolean, true if proto > 0 */
637     int framing;                /* True when framing is enabled, proto >= 4 */
638     Py_ssize_t frame_start;     /* Position in output_buffer where the
639                                    current frame begins. -1 if there
640                                    is no frame currently open. */
641 
642     Py_ssize_t buf_size;        /* Size of the current buffered pickle data */
643     int fast;                   /* Enable fast mode if set to a true value.
644                                    The fast mode disable the usage of memo,
645                                    therefore speeding the pickling process by
646                                    not generating superfluous PUT opcodes. It
647                                    should not be used if with self-referential
648                                    objects. */
649     int fast_nesting;
650     int fix_imports;            /* Indicate whether Pickler should fix
651                                    the name of globals for Python 2.x. */
652     PyObject *fast_memo;
653     PyObject *buffer_callback;  /* Callback for out-of-band buffers, or NULL */
654 } PicklerObject;
655 
656 typedef struct UnpicklerObject {
657     PyObject_HEAD
658     Pdata *stack;               /* Pickle data stack, store unpickled objects. */
659 
660     /* The unpickler memo is just an array of PyObject *s. Using a dict
661        is unnecessary, since the keys are contiguous ints. */
662     PyObject **memo;
663     size_t memo_size;       /* Capacity of the memo array */
664     size_t memo_len;        /* Number of objects in the memo */
665 
666     PyObject *pers_func;        /* persistent_load() method, can be NULL. */
667     PyObject *pers_func_self;   /* borrowed reference to self if pers_func
668                                    is an unbound method, NULL otherwise */
669 
670     Py_buffer buffer;
671     char *input_buffer;
672     char *input_line;
673     Py_ssize_t input_len;
674     Py_ssize_t next_read_idx;
675     Py_ssize_t prefetched_idx;  /* index of first prefetched byte */
676 
677     PyObject *read;             /* read() method of the input stream. */
678     PyObject *readinto;         /* readinto() method of the input stream. */
679     PyObject *readline;         /* readline() method of the input stream. */
680     PyObject *peek;             /* peek() method of the input stream, or NULL */
681     PyObject *buffers;          /* iterable of out-of-band buffers, or NULL */
682 
683     char *encoding;             /* Name of the encoding to be used for
684                                    decoding strings pickled using Python
685                                    2.x. The default value is "ASCII" */
686     char *errors;               /* Name of errors handling scheme to used when
687                                    decoding strings. The default value is
688                                    "strict". */
689     Py_ssize_t *marks;          /* Mark stack, used for unpickling container
690                                    objects. */
691     Py_ssize_t num_marks;       /* Number of marks in the mark stack. */
692     Py_ssize_t marks_size;      /* Current allocated size of the mark stack. */
693     int proto;                  /* Protocol of the pickle loaded. */
694     int fix_imports;            /* Indicate whether Unpickler should fix
695                                    the name of globals pickled by Python 2.x. */
696 } UnpicklerObject;
697 
698 typedef struct {
699     PyObject_HEAD
700     PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
701 }  PicklerMemoProxyObject;
702 
703 typedef struct {
704     PyObject_HEAD
705     UnpicklerObject *unpickler;
706 } UnpicklerMemoProxyObject;
707 
708 /* Forward declarations */
709 static int save(PicklerObject *, PyObject *, int);
710 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
711 static PyTypeObject Pickler_Type;
712 static PyTypeObject Unpickler_Type;
713 
714 #include "clinic/_pickle.c.h"
715 
716 /*************************************************************************
717  A custom hashtable mapping void* to Python ints. This is used by the pickler
718  for memoization. Using a custom hashtable rather than PyDict allows us to skip
719  a bunch of unnecessary object creation. This makes a huge performance
720  difference. */
721 
722 #define MT_MINSIZE 8
723 #define PERTURB_SHIFT 5
724 
725 
726 static PyMemoTable *
PyMemoTable_New(void)727 PyMemoTable_New(void)
728 {
729     PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
730     if (memo == NULL) {
731         PyErr_NoMemory();
732         return NULL;
733     }
734 
735     memo->mt_used = 0;
736     memo->mt_allocated = MT_MINSIZE;
737     memo->mt_mask = MT_MINSIZE - 1;
738     memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
739     if (memo->mt_table == NULL) {
740         PyMem_FREE(memo);
741         PyErr_NoMemory();
742         return NULL;
743     }
744     memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
745 
746     return memo;
747 }
748 
749 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)750 PyMemoTable_Copy(PyMemoTable *self)
751 {
752     PyMemoTable *new = PyMemoTable_New();
753     if (new == NULL)
754         return NULL;
755 
756     new->mt_used = self->mt_used;
757     new->mt_allocated = self->mt_allocated;
758     new->mt_mask = self->mt_mask;
759     /* The table we get from _New() is probably smaller than we wanted.
760        Free it and allocate one that's the right size. */
761     PyMem_FREE(new->mt_table);
762     new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
763     if (new->mt_table == NULL) {
764         PyMem_FREE(new);
765         PyErr_NoMemory();
766         return NULL;
767     }
768     for (size_t i = 0; i < self->mt_allocated; i++) {
769         Py_XINCREF(self->mt_table[i].me_key);
770     }
771     memcpy(new->mt_table, self->mt_table,
772            sizeof(PyMemoEntry) * self->mt_allocated);
773 
774     return new;
775 }
776 
777 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)778 PyMemoTable_Size(PyMemoTable *self)
779 {
780     return self->mt_used;
781 }
782 
783 static int
PyMemoTable_Clear(PyMemoTable * self)784 PyMemoTable_Clear(PyMemoTable *self)
785 {
786     Py_ssize_t i = self->mt_allocated;
787 
788     while (--i >= 0) {
789         Py_XDECREF(self->mt_table[i].me_key);
790     }
791     self->mt_used = 0;
792     memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
793     return 0;
794 }
795 
796 static void
PyMemoTable_Del(PyMemoTable * self)797 PyMemoTable_Del(PyMemoTable *self)
798 {
799     if (self == NULL)
800         return;
801     PyMemoTable_Clear(self);
802 
803     PyMem_FREE(self->mt_table);
804     PyMem_FREE(self);
805 }
806 
807 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
808    can be considerably simpler than dictobject.c's lookdict(). */
809 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)810 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
811 {
812     size_t i;
813     size_t perturb;
814     size_t mask = self->mt_mask;
815     PyMemoEntry *table = self->mt_table;
816     PyMemoEntry *entry;
817     Py_hash_t hash = (Py_hash_t)key >> 3;
818 
819     i = hash & mask;
820     entry = &table[i];
821     if (entry->me_key == NULL || entry->me_key == key)
822         return entry;
823 
824     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
825         i = (i << 2) + i + perturb + 1;
826         entry = &table[i & mask];
827         if (entry->me_key == NULL || entry->me_key == key)
828             return entry;
829     }
830     Py_UNREACHABLE();
831 }
832 
833 /* Returns -1 on failure, 0 on success. */
834 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)835 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
836 {
837     PyMemoEntry *oldtable = NULL;
838     PyMemoEntry *oldentry, *newentry;
839     size_t new_size = MT_MINSIZE;
840     size_t to_process;
841 
842     assert(min_size > 0);
843 
844     if (min_size > PY_SSIZE_T_MAX) {
845         PyErr_NoMemory();
846         return -1;
847     }
848 
849     /* Find the smallest valid table size >= min_size. */
850     while (new_size < min_size) {
851         new_size <<= 1;
852     }
853     /* new_size needs to be a power of two. */
854     assert((new_size & (new_size - 1)) == 0);
855 
856     /* Allocate new table. */
857     oldtable = self->mt_table;
858     self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
859     if (self->mt_table == NULL) {
860         self->mt_table = oldtable;
861         PyErr_NoMemory();
862         return -1;
863     }
864     self->mt_allocated = new_size;
865     self->mt_mask = new_size - 1;
866     memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
867 
868     /* Copy entries from the old table. */
869     to_process = self->mt_used;
870     for (oldentry = oldtable; to_process > 0; oldentry++) {
871         if (oldentry->me_key != NULL) {
872             to_process--;
873             /* newentry is a pointer to a chunk of the new
874                mt_table, so we're setting the key:value pair
875                in-place. */
876             newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
877             newentry->me_key = oldentry->me_key;
878             newentry->me_value = oldentry->me_value;
879         }
880     }
881 
882     /* Deallocate the old table. */
883     PyMem_FREE(oldtable);
884     return 0;
885 }
886 
887 /* Returns NULL on failure, a pointer to the value otherwise. */
888 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)889 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
890 {
891     PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
892     if (entry->me_key == NULL)
893         return NULL;
894     return &entry->me_value;
895 }
896 
897 /* Returns -1 on failure, 0 on success. */
898 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)899 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
900 {
901     PyMemoEntry *entry;
902 
903     assert(key != NULL);
904 
905     entry = _PyMemoTable_Lookup(self, key);
906     if (entry->me_key != NULL) {
907         entry->me_value = value;
908         return 0;
909     }
910     Py_INCREF(key);
911     entry->me_key = key;
912     entry->me_value = value;
913     self->mt_used++;
914 
915     /* If we added a key, we can safely resize. Otherwise just return!
916      * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
917      *
918      * Quadrupling the size improves average table sparseness
919      * (reducing collisions) at the cost of some memory. It also halves
920      * the number of expensive resize operations in a growing memo table.
921      *
922      * Very large memo tables (over 50K items) use doubling instead.
923      * This may help applications with severe memory constraints.
924      */
925     if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
926         return 0;
927     }
928     // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
929     size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
930     return _PyMemoTable_ResizeTable(self, desired_size);
931 }
932 
933 #undef MT_MINSIZE
934 #undef PERTURB_SHIFT
935 
936 /*************************************************************************/
937 
938 
939 static int
_Pickler_ClearBuffer(PicklerObject * self)940 _Pickler_ClearBuffer(PicklerObject *self)
941 {
942     Py_XSETREF(self->output_buffer,
943               PyBytes_FromStringAndSize(NULL, self->max_output_len));
944     if (self->output_buffer == NULL)
945         return -1;
946     self->output_len = 0;
947     self->frame_start = -1;
948     return 0;
949 }
950 
951 static void
_write_size64(char * out,size_t value)952 _write_size64(char *out, size_t value)
953 {
954     size_t i;
955 
956     Py_BUILD_ASSERT(sizeof(size_t) <= 8);
957 
958     for (i = 0; i < sizeof(size_t); i++) {
959         out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
960     }
961     for (i = sizeof(size_t); i < 8; i++) {
962         out[i] = 0;
963     }
964 }
965 
966 static int
_Pickler_CommitFrame(PicklerObject * self)967 _Pickler_CommitFrame(PicklerObject *self)
968 {
969     size_t frame_len;
970     char *qdata;
971 
972     if (!self->framing || self->frame_start == -1)
973         return 0;
974     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
975     qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
976     if (frame_len >= FRAME_SIZE_MIN) {
977         qdata[0] = FRAME;
978         _write_size64(qdata + 1, frame_len);
979     }
980     else {
981         memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
982         self->output_len -= FRAME_HEADER_SIZE;
983     }
984     self->frame_start = -1;
985     return 0;
986 }
987 
988 static PyObject *
_Pickler_GetString(PicklerObject * self)989 _Pickler_GetString(PicklerObject *self)
990 {
991     PyObject *output_buffer = self->output_buffer;
992 
993     assert(self->output_buffer != NULL);
994 
995     if (_Pickler_CommitFrame(self))
996         return NULL;
997 
998     self->output_buffer = NULL;
999     /* Resize down to exact size */
1000     if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
1001         return NULL;
1002     return output_buffer;
1003 }
1004 
1005 static int
_Pickler_FlushToFile(PicklerObject * self)1006 _Pickler_FlushToFile(PicklerObject *self)
1007 {
1008     PyObject *output, *result;
1009 
1010     assert(self->write != NULL);
1011 
1012     /* This will commit the frame first */
1013     output = _Pickler_GetString(self);
1014     if (output == NULL)
1015         return -1;
1016 
1017     result = _Pickle_FastCall(self->write, output);
1018     Py_XDECREF(result);
1019     return (result == NULL) ? -1 : 0;
1020 }
1021 
1022 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1023 _Pickler_OpcodeBoundary(PicklerObject *self)
1024 {
1025     Py_ssize_t frame_len;
1026 
1027     if (!self->framing || self->frame_start == -1) {
1028         return 0;
1029     }
1030     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1031     if (frame_len >= FRAME_SIZE_TARGET) {
1032         if(_Pickler_CommitFrame(self)) {
1033             return -1;
1034         }
1035         /* Flush the content of the committed frame to the underlying
1036          * file and reuse the pickler buffer for the next frame so as
1037          * to limit memory usage when dumping large complex objects to
1038          * a file.
1039          *
1040          * self->write is NULL when called via dumps.
1041          */
1042         if (self->write != NULL) {
1043             if (_Pickler_FlushToFile(self) < 0) {
1044                 return -1;
1045             }
1046             if (_Pickler_ClearBuffer(self) < 0) {
1047                 return -1;
1048             }
1049         }
1050     }
1051     return 0;
1052 }
1053 
1054 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1055 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1056 {
1057     Py_ssize_t i, n, required;
1058     char *buffer;
1059     int need_new_frame;
1060 
1061     assert(s != NULL);
1062     need_new_frame = (self->framing && self->frame_start == -1);
1063 
1064     if (need_new_frame)
1065         n = data_len + FRAME_HEADER_SIZE;
1066     else
1067         n = data_len;
1068 
1069     required = self->output_len + n;
1070     if (required > self->max_output_len) {
1071         /* Make place in buffer for the pickle chunk */
1072         if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1073             PyErr_NoMemory();
1074             return -1;
1075         }
1076         self->max_output_len = (self->output_len + n) / 2 * 3;
1077         if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1078             return -1;
1079     }
1080     buffer = PyBytes_AS_STRING(self->output_buffer);
1081     if (need_new_frame) {
1082         /* Setup new frame */
1083         Py_ssize_t frame_start = self->output_len;
1084         self->frame_start = frame_start;
1085         for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1086             /* Write an invalid value, for debugging */
1087             buffer[frame_start + i] = 0xFE;
1088         }
1089         self->output_len += FRAME_HEADER_SIZE;
1090     }
1091     if (data_len < 8) {
1092         /* This is faster than memcpy when the string is short. */
1093         for (i = 0; i < data_len; i++) {
1094             buffer[self->output_len + i] = s[i];
1095         }
1096     }
1097     else {
1098         memcpy(buffer + self->output_len, s, data_len);
1099     }
1100     self->output_len += data_len;
1101     return data_len;
1102 }
1103 
1104 static PicklerObject *
_Pickler_New(void)1105 _Pickler_New(void)
1106 {
1107     PicklerObject *self;
1108 
1109     self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1110     if (self == NULL)
1111         return NULL;
1112 
1113     self->pers_func = NULL;
1114     self->dispatch_table = NULL;
1115     self->buffer_callback = NULL;
1116     self->write = NULL;
1117     self->proto = 0;
1118     self->bin = 0;
1119     self->framing = 0;
1120     self->frame_start = -1;
1121     self->fast = 0;
1122     self->fast_nesting = 0;
1123     self->fix_imports = 0;
1124     self->fast_memo = NULL;
1125     self->max_output_len = WRITE_BUF_SIZE;
1126     self->output_len = 0;
1127     self->reducer_override = NULL;
1128 
1129     self->memo = PyMemoTable_New();
1130     self->output_buffer = PyBytes_FromStringAndSize(NULL,
1131                                                     self->max_output_len);
1132 
1133     if (self->memo == NULL || self->output_buffer == NULL) {
1134         Py_DECREF(self);
1135         return NULL;
1136     }
1137 
1138     PyObject_GC_Track(self);
1139     return self;
1140 }
1141 
1142 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1143 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1144 {
1145     long proto;
1146 
1147     if (protocol == Py_None) {
1148         proto = DEFAULT_PROTOCOL;
1149     }
1150     else {
1151         proto = PyLong_AsLong(protocol);
1152         if (proto < 0) {
1153             if (proto == -1 && PyErr_Occurred())
1154                 return -1;
1155             proto = HIGHEST_PROTOCOL;
1156         }
1157         else if (proto > HIGHEST_PROTOCOL) {
1158             PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1159                          HIGHEST_PROTOCOL);
1160             return -1;
1161         }
1162     }
1163     self->proto = (int)proto;
1164     self->bin = proto > 0;
1165     self->fix_imports = fix_imports && proto < 3;
1166     return 0;
1167 }
1168 
1169 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1170    be called once on a freshly created Pickler. */
1171 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1172 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1173 {
1174     _Py_IDENTIFIER(write);
1175     assert(file != NULL);
1176     if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1177         return -1;
1178     }
1179     if (self->write == NULL) {
1180         PyErr_SetString(PyExc_TypeError,
1181                         "file must have a 'write' attribute");
1182         return -1;
1183     }
1184 
1185     return 0;
1186 }
1187 
1188 static int
_Pickler_SetBufferCallback(PicklerObject * self,PyObject * buffer_callback)1189 _Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1190 {
1191     if (buffer_callback == Py_None) {
1192         buffer_callback = NULL;
1193     }
1194     if (buffer_callback != NULL && self->proto < 5) {
1195         PyErr_SetString(PyExc_ValueError,
1196                         "buffer_callback needs protocol >= 5");
1197         return -1;
1198     }
1199 
1200     Py_XINCREF(buffer_callback);
1201     self->buffer_callback = buffer_callback;
1202     return 0;
1203 }
1204 
1205 /* Returns the size of the input on success, -1 on failure. This takes its
1206    own reference to `input`. */
1207 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1208 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1209 {
1210     if (self->buffer.buf != NULL)
1211         PyBuffer_Release(&self->buffer);
1212     if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1213         return -1;
1214     self->input_buffer = self->buffer.buf;
1215     self->input_len = self->buffer.len;
1216     self->next_read_idx = 0;
1217     self->prefetched_idx = self->input_len;
1218     return self->input_len;
1219 }
1220 
1221 static int
bad_readline(void)1222 bad_readline(void)
1223 {
1224     PickleState *st = _Pickle_GetGlobalState();
1225     PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1226     return -1;
1227 }
1228 
1229 /* Skip any consumed data that was only prefetched using peek() */
1230 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1231 _Unpickler_SkipConsumed(UnpicklerObject *self)
1232 {
1233     Py_ssize_t consumed;
1234     PyObject *r;
1235 
1236     consumed = self->next_read_idx - self->prefetched_idx;
1237     if (consumed <= 0)
1238         return 0;
1239 
1240     assert(self->peek);  /* otherwise we did something wrong */
1241     /* This makes a useless copy... */
1242     r = PyObject_CallFunction(self->read, "n", consumed);
1243     if (r == NULL)
1244         return -1;
1245     Py_DECREF(r);
1246 
1247     self->prefetched_idx = self->next_read_idx;
1248     return 0;
1249 }
1250 
1251 static const Py_ssize_t READ_WHOLE_LINE = -1;
1252 
1253 /* If reading from a file, we need to only pull the bytes we need, since there
1254    may be multiple pickle objects arranged contiguously in the same input
1255    buffer.
1256 
1257    If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1258    bytes from the input stream/buffer.
1259 
1260    Update the unpickler's input buffer with the newly-read data. Returns -1 on
1261    failure; on success, returns the number of bytes read from the file.
1262 
1263    On success, self->input_len will be 0; this is intentional so that when
1264    unpickling from a file, the "we've run out of data" code paths will trigger,
1265    causing the Unpickler to go back to the file for more data. Use the returned
1266    size to tell you how much data you can process. */
1267 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1268 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1269 {
1270     PyObject *data;
1271     Py_ssize_t read_size;
1272 
1273     assert(self->read != NULL);
1274 
1275     if (_Unpickler_SkipConsumed(self) < 0)
1276         return -1;
1277 
1278     if (n == READ_WHOLE_LINE) {
1279         data = PyObject_CallNoArgs(self->readline);
1280     }
1281     else {
1282         PyObject *len;
1283         /* Prefetch some data without advancing the file pointer, if possible */
1284         if (self->peek && n < PREFETCH) {
1285             len = PyLong_FromSsize_t(PREFETCH);
1286             if (len == NULL)
1287                 return -1;
1288             data = _Pickle_FastCall(self->peek, len);
1289             if (data == NULL) {
1290                 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1291                     return -1;
1292                 /* peek() is probably not supported by the given file object */
1293                 PyErr_Clear();
1294                 Py_CLEAR(self->peek);
1295             }
1296             else {
1297                 read_size = _Unpickler_SetStringInput(self, data);
1298                 Py_DECREF(data);
1299                 self->prefetched_idx = 0;
1300                 if (n <= read_size)
1301                     return n;
1302             }
1303         }
1304         len = PyLong_FromSsize_t(n);
1305         if (len == NULL)
1306             return -1;
1307         data = _Pickle_FastCall(self->read, len);
1308     }
1309     if (data == NULL)
1310         return -1;
1311 
1312     read_size = _Unpickler_SetStringInput(self, data);
1313     Py_DECREF(data);
1314     return read_size;
1315 }
1316 
1317 /* Don't call it directly: use _Unpickler_Read() */
1318 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,char ** s,Py_ssize_t n)1319 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1320 {
1321     Py_ssize_t num_read;
1322 
1323     *s = NULL;
1324     if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1325         PickleState *st = _Pickle_GetGlobalState();
1326         PyErr_SetString(st->UnpicklingError,
1327                         "read would overflow (invalid bytecode)");
1328         return -1;
1329     }
1330 
1331     /* This case is handled by the _Unpickler_Read() macro for efficiency */
1332     assert(self->next_read_idx + n > self->input_len);
1333 
1334     if (!self->read)
1335         return bad_readline();
1336 
1337     /* Extend the buffer to satisfy desired size */
1338     num_read = _Unpickler_ReadFromFile(self, n);
1339     if (num_read < 0)
1340         return -1;
1341     if (num_read < n)
1342         return bad_readline();
1343     *s = self->input_buffer;
1344     self->next_read_idx = n;
1345     return n;
1346 }
1347 
1348 /* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1349  *
1350  * This should only be used for non-small data reads where potentially
1351  * avoiding a copy is beneficial.  This method does not try to prefetch
1352  * more data into the input buffer.
1353  *
1354  * _Unpickler_Read() is recommended in most cases.
1355  */
1356 static Py_ssize_t
_Unpickler_ReadInto(UnpicklerObject * self,char * buf,Py_ssize_t n)1357 _Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
1358 {
1359     assert(n != READ_WHOLE_LINE);
1360 
1361     /* Read from available buffer data, if any */
1362     Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1363     if (in_buffer > 0) {
1364         Py_ssize_t to_read = Py_MIN(in_buffer, n);
1365         memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1366         self->next_read_idx += to_read;
1367         buf += to_read;
1368         n -= to_read;
1369         if (n == 0) {
1370             /* Entire read was satisfied from buffer */
1371             return n;
1372         }
1373     }
1374 
1375     /* Read from file */
1376     if (!self->read) {
1377         /* We're unpickling memory, this means the input is truncated */
1378         return bad_readline();
1379     }
1380     if (_Unpickler_SkipConsumed(self) < 0) {
1381         return -1;
1382     }
1383 
1384     if (!self->readinto) {
1385         /* readinto() not supported on file-like object, fall back to read()
1386          * and copy into destination buffer (bpo-39681) */
1387         PyObject* len = PyLong_FromSsize_t(n);
1388         if (len == NULL) {
1389             return -1;
1390         }
1391         PyObject* data = _Pickle_FastCall(self->read, len);
1392         if (data == NULL) {
1393             return -1;
1394         }
1395         if (!PyBytes_Check(data)) {
1396             PyErr_Format(PyExc_ValueError,
1397                          "read() returned non-bytes object (%R)",
1398                          Py_TYPE(data));
1399             Py_DECREF(data);
1400             return -1;
1401         }
1402         Py_ssize_t read_size = PyBytes_GET_SIZE(data);
1403         if (read_size < n) {
1404             Py_DECREF(data);
1405             return bad_readline();
1406         }
1407         memcpy(buf, PyBytes_AS_STRING(data), n);
1408         Py_DECREF(data);
1409         return n;
1410     }
1411 
1412     /* Call readinto() into user buffer */
1413     PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1414     if (buf_obj == NULL) {
1415         return -1;
1416     }
1417     PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1418     if (read_size_obj == NULL) {
1419         return -1;
1420     }
1421     Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1422     Py_DECREF(read_size_obj);
1423 
1424     if (read_size < 0) {
1425         if (!PyErr_Occurred()) {
1426             PyErr_SetString(PyExc_ValueError,
1427                             "readinto() returned negative size");
1428         }
1429         return -1;
1430     }
1431     if (read_size < n) {
1432         return bad_readline();
1433     }
1434     return n;
1435 }
1436 
1437 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1438 
1439    This should be used for all data reads, rather than accessing the unpickler's
1440    input buffer directly. This method deals correctly with reading from input
1441    streams, which the input buffer doesn't deal with.
1442 
1443    Note that when reading from a file-like object, self->next_read_idx won't
1444    be updated (it should remain at 0 for the entire unpickling process). You
1445    should use this function's return value to know how many bytes you can
1446    consume.
1447 
1448    Returns -1 (with an exception set) on failure. On success, return the
1449    number of chars read. */
1450 #define _Unpickler_Read(self, s, n) \
1451     (((n) <= (self)->input_len - (self)->next_read_idx)      \
1452      ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1453         (self)->next_read_idx += (n),                        \
1454         (n))                                                 \
1455      : _Unpickler_ReadImpl(self, (s), (n)))
1456 
1457 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1458 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1459                     char **result)
1460 {
1461     char *input_line = PyMem_Realloc(self->input_line, len + 1);
1462     if (input_line == NULL) {
1463         PyErr_NoMemory();
1464         return -1;
1465     }
1466 
1467     memcpy(input_line, line, len);
1468     input_line[len] = '\0';
1469     self->input_line = input_line;
1470     *result = self->input_line;
1471     return len;
1472 }
1473 
1474 /* Read a line from the input stream/buffer. If we run off the end of the input
1475    before hitting \n, raise an error.
1476 
1477    Returns the number of chars read, or -1 on failure. */
1478 static Py_ssize_t
_Unpickler_Readline(UnpicklerObject * self,char ** result)1479 _Unpickler_Readline(UnpicklerObject *self, char **result)
1480 {
1481     Py_ssize_t i, num_read;
1482 
1483     for (i = self->next_read_idx; i < self->input_len; i++) {
1484         if (self->input_buffer[i] == '\n') {
1485             char *line_start = self->input_buffer + self->next_read_idx;
1486             num_read = i - self->next_read_idx + 1;
1487             self->next_read_idx = i + 1;
1488             return _Unpickler_CopyLine(self, line_start, num_read, result);
1489         }
1490     }
1491     if (!self->read)
1492         return bad_readline();
1493 
1494     num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1495     if (num_read < 0)
1496         return -1;
1497     if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1498         return bad_readline();
1499     self->next_read_idx = num_read;
1500     return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1501 }
1502 
1503 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1504    will be modified in place. */
1505 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1506 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1507 {
1508     size_t i;
1509 
1510     assert(new_size > self->memo_size);
1511 
1512     PyObject **memo_new = self->memo;
1513     PyMem_RESIZE(memo_new, PyObject *, new_size);
1514     if (memo_new == NULL) {
1515         PyErr_NoMemory();
1516         return -1;
1517     }
1518     self->memo = memo_new;
1519     for (i = self->memo_size; i < new_size; i++)
1520         self->memo[i] = NULL;
1521     self->memo_size = new_size;
1522     return 0;
1523 }
1524 
1525 /* Returns NULL if idx is out of bounds. */
1526 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1527 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1528 {
1529     if (idx >= self->memo_size)
1530         return NULL;
1531 
1532     return self->memo[idx];
1533 }
1534 
1535 /* Returns -1 (with an exception set) on failure, 0 on success.
1536    This takes its own reference to `value`. */
1537 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1538 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1539 {
1540     PyObject *old_item;
1541 
1542     if (idx >= self->memo_size) {
1543         if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1544             return -1;
1545         assert(idx < self->memo_size);
1546     }
1547     Py_INCREF(value);
1548     old_item = self->memo[idx];
1549     self->memo[idx] = value;
1550     if (old_item != NULL) {
1551         Py_DECREF(old_item);
1552     }
1553     else {
1554         self->memo_len++;
1555     }
1556     return 0;
1557 }
1558 
1559 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1560 _Unpickler_NewMemo(Py_ssize_t new_size)
1561 {
1562     PyObject **memo = PyMem_NEW(PyObject *, new_size);
1563     if (memo == NULL) {
1564         PyErr_NoMemory();
1565         return NULL;
1566     }
1567     memset(memo, 0, new_size * sizeof(PyObject *));
1568     return memo;
1569 }
1570 
1571 /* Free the unpickler's memo, taking care to decref any items left in it. */
1572 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1573 _Unpickler_MemoCleanup(UnpicklerObject *self)
1574 {
1575     Py_ssize_t i;
1576     PyObject **memo = self->memo;
1577 
1578     if (self->memo == NULL)
1579         return;
1580     self->memo = NULL;
1581     i = self->memo_size;
1582     while (--i >= 0) {
1583         Py_XDECREF(memo[i]);
1584     }
1585     PyMem_FREE(memo);
1586 }
1587 
1588 static UnpicklerObject *
_Unpickler_New(void)1589 _Unpickler_New(void)
1590 {
1591     UnpicklerObject *self;
1592 
1593     self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1594     if (self == NULL)
1595         return NULL;
1596 
1597     self->pers_func = NULL;
1598     self->input_buffer = NULL;
1599     self->input_line = NULL;
1600     self->input_len = 0;
1601     self->next_read_idx = 0;
1602     self->prefetched_idx = 0;
1603     self->read = NULL;
1604     self->readinto = NULL;
1605     self->readline = NULL;
1606     self->peek = NULL;
1607     self->buffers = NULL;
1608     self->encoding = NULL;
1609     self->errors = NULL;
1610     self->marks = NULL;
1611     self->num_marks = 0;
1612     self->marks_size = 0;
1613     self->proto = 0;
1614     self->fix_imports = 0;
1615     memset(&self->buffer, 0, sizeof(Py_buffer));
1616     self->memo_size = 32;
1617     self->memo_len = 0;
1618     self->memo = _Unpickler_NewMemo(self->memo_size);
1619     self->stack = (Pdata *)Pdata_New();
1620 
1621     if (self->memo == NULL || self->stack == NULL) {
1622         Py_DECREF(self);
1623         return NULL;
1624     }
1625 
1626     PyObject_GC_Track(self);
1627     return self;
1628 }
1629 
1630 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1631    be called once on a freshly created Unpickler. */
1632 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1633 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1634 {
1635     _Py_IDENTIFIER(peek);
1636     _Py_IDENTIFIER(read);
1637     _Py_IDENTIFIER(readinto);
1638     _Py_IDENTIFIER(readline);
1639 
1640     /* Optional file methods */
1641     if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1642         return -1;
1643     }
1644     if (_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto) < 0) {
1645         return -1;
1646     }
1647     (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
1648     (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
1649     if (!self->readline || !self->read) {
1650         if (!PyErr_Occurred()) {
1651             PyErr_SetString(PyExc_TypeError,
1652                             "file must have 'read' and 'readline' attributes");
1653         }
1654         Py_CLEAR(self->read);
1655         Py_CLEAR(self->readinto);
1656         Py_CLEAR(self->readline);
1657         Py_CLEAR(self->peek);
1658         return -1;
1659     }
1660     return 0;
1661 }
1662 
1663 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1664    be called once on a freshly created Unpickler. */
1665 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1666 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1667                             const char *encoding,
1668                             const char *errors)
1669 {
1670     if (encoding == NULL)
1671         encoding = "ASCII";
1672     if (errors == NULL)
1673         errors = "strict";
1674 
1675     self->encoding = _PyMem_Strdup(encoding);
1676     self->errors = _PyMem_Strdup(errors);
1677     if (self->encoding == NULL || self->errors == NULL) {
1678         PyErr_NoMemory();
1679         return -1;
1680     }
1681     return 0;
1682 }
1683 
1684 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1685    be called once on a freshly created Unpickler. */
1686 static int
_Unpickler_SetBuffers(UnpicklerObject * self,PyObject * buffers)1687 _Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1688 {
1689     if (buffers == NULL || buffers == Py_None) {
1690         self->buffers = NULL;
1691     }
1692     else {
1693         self->buffers = PyObject_GetIter(buffers);
1694         if (self->buffers == NULL) {
1695             return -1;
1696         }
1697     }
1698     return 0;
1699 }
1700 
1701 /* Generate a GET opcode for an object stored in the memo. */
1702 static int
memo_get(PicklerObject * self,PyObject * key)1703 memo_get(PicklerObject *self, PyObject *key)
1704 {
1705     Py_ssize_t *value;
1706     char pdata[30];
1707     Py_ssize_t len;
1708 
1709     value = PyMemoTable_Get(self->memo, key);
1710     if (value == NULL)  {
1711         PyErr_SetObject(PyExc_KeyError, key);
1712         return -1;
1713     }
1714 
1715     if (!self->bin) {
1716         pdata[0] = GET;
1717         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1718                       "%" PY_FORMAT_SIZE_T "d\n", *value);
1719         len = strlen(pdata);
1720     }
1721     else {
1722         if (*value < 256) {
1723             pdata[0] = BINGET;
1724             pdata[1] = (unsigned char)(*value & 0xff);
1725             len = 2;
1726         }
1727         else if ((size_t)*value <= 0xffffffffUL) {
1728             pdata[0] = LONG_BINGET;
1729             pdata[1] = (unsigned char)(*value & 0xff);
1730             pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1731             pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1732             pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1733             len = 5;
1734         }
1735         else { /* unlikely */
1736             PickleState *st = _Pickle_GetGlobalState();
1737             PyErr_SetString(st->PicklingError,
1738                             "memo id too large for LONG_BINGET");
1739             return -1;
1740         }
1741     }
1742 
1743     if (_Pickler_Write(self, pdata, len) < 0)
1744         return -1;
1745 
1746     return 0;
1747 }
1748 
1749 /* Store an object in the memo, assign it a new unique ID based on the number
1750    of objects currently stored in the memo and generate a PUT opcode. */
1751 static int
memo_put(PicklerObject * self,PyObject * obj)1752 memo_put(PicklerObject *self, PyObject *obj)
1753 {
1754     char pdata[30];
1755     Py_ssize_t len;
1756     Py_ssize_t idx;
1757 
1758     const char memoize_op = MEMOIZE;
1759 
1760     if (self->fast)
1761         return 0;
1762 
1763     idx = PyMemoTable_Size(self->memo);
1764     if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1765         return -1;
1766 
1767     if (self->proto >= 4) {
1768         if (_Pickler_Write(self, &memoize_op, 1) < 0)
1769             return -1;
1770         return 0;
1771     }
1772     else if (!self->bin) {
1773         pdata[0] = PUT;
1774         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1775                       "%" PY_FORMAT_SIZE_T "d\n", idx);
1776         len = strlen(pdata);
1777     }
1778     else {
1779         if (idx < 256) {
1780             pdata[0] = BINPUT;
1781             pdata[1] = (unsigned char)idx;
1782             len = 2;
1783         }
1784         else if ((size_t)idx <= 0xffffffffUL) {
1785             pdata[0] = LONG_BINPUT;
1786             pdata[1] = (unsigned char)(idx & 0xff);
1787             pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1788             pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1789             pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1790             len = 5;
1791         }
1792         else { /* unlikely */
1793             PickleState *st = _Pickle_GetGlobalState();
1794             PyErr_SetString(st->PicklingError,
1795                             "memo id too large for LONG_BINPUT");
1796             return -1;
1797         }
1798     }
1799     if (_Pickler_Write(self, pdata, len) < 0)
1800         return -1;
1801 
1802     return 0;
1803 }
1804 
1805 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1806 get_dotted_path(PyObject *obj, PyObject *name)
1807 {
1808     _Py_static_string(PyId_dot, ".");
1809     PyObject *dotted_path;
1810     Py_ssize_t i, n;
1811 
1812     dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1813     if (dotted_path == NULL)
1814         return NULL;
1815     n = PyList_GET_SIZE(dotted_path);
1816     assert(n >= 1);
1817     for (i = 0; i < n; i++) {
1818         PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1819         if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1820             if (obj == NULL)
1821                 PyErr_Format(PyExc_AttributeError,
1822                              "Can't pickle local object %R", name);
1823             else
1824                 PyErr_Format(PyExc_AttributeError,
1825                              "Can't pickle local attribute %R on %R", name, obj);
1826             Py_DECREF(dotted_path);
1827             return NULL;
1828         }
1829     }
1830     return dotted_path;
1831 }
1832 
1833 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1834 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1835 {
1836     Py_ssize_t i, n;
1837     PyObject *parent = NULL;
1838 
1839     assert(PyList_CheckExact(names));
1840     Py_INCREF(obj);
1841     n = PyList_GET_SIZE(names);
1842     for (i = 0; i < n; i++) {
1843         PyObject *name = PyList_GET_ITEM(names, i);
1844         Py_XDECREF(parent);
1845         parent = obj;
1846         (void)_PyObject_LookupAttr(parent, name, &obj);
1847         if (obj == NULL) {
1848             Py_DECREF(parent);
1849             return NULL;
1850         }
1851     }
1852     if (pparent != NULL)
1853         *pparent = parent;
1854     else
1855         Py_XDECREF(parent);
1856     return obj;
1857 }
1858 
1859 
1860 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1861 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1862 {
1863     PyObject *dotted_path, *attr;
1864 
1865     if (allow_qualname) {
1866         dotted_path = get_dotted_path(obj, name);
1867         if (dotted_path == NULL)
1868             return NULL;
1869         attr = get_deep_attribute(obj, dotted_path, NULL);
1870         Py_DECREF(dotted_path);
1871     }
1872     else {
1873         (void)_PyObject_LookupAttr(obj, name, &attr);
1874     }
1875     if (attr == NULL && !PyErr_Occurred()) {
1876         PyErr_Format(PyExc_AttributeError,
1877                      "Can't get attribute %R on %R", name, obj);
1878     }
1879     return attr;
1880 }
1881 
1882 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1883 _checkmodule(PyObject *module_name, PyObject *module,
1884              PyObject *global, PyObject *dotted_path)
1885 {
1886     if (module == Py_None) {
1887         return -1;
1888     }
1889     if (PyUnicode_Check(module_name) &&
1890             _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1891         return -1;
1892     }
1893 
1894     PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1895     if (candidate == NULL) {
1896         return -1;
1897     }
1898     if (candidate != global) {
1899         Py_DECREF(candidate);
1900         return -1;
1901     }
1902     Py_DECREF(candidate);
1903     return 0;
1904 }
1905 
1906 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1907 whichmodule(PyObject *global, PyObject *dotted_path)
1908 {
1909     PyObject *module_name;
1910     PyObject *module = NULL;
1911     Py_ssize_t i;
1912     PyObject *modules;
1913     _Py_IDENTIFIER(__module__);
1914     _Py_IDENTIFIER(modules);
1915     _Py_IDENTIFIER(__main__);
1916 
1917     if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1918         return NULL;
1919     }
1920     if (module_name) {
1921         /* In some rare cases (e.g., bound methods of extension types),
1922            __module__ can be None. If it is so, then search sys.modules for
1923            the module of global. */
1924         if (module_name != Py_None)
1925             return module_name;
1926         Py_CLEAR(module_name);
1927     }
1928     assert(module_name == NULL);
1929 
1930     /* Fallback on walking sys.modules */
1931     modules = _PySys_GetObjectId(&PyId_modules);
1932     if (modules == NULL) {
1933         PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1934         return NULL;
1935     }
1936     if (PyDict_CheckExact(modules)) {
1937         i = 0;
1938         while (PyDict_Next(modules, &i, &module_name, &module)) {
1939             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1940                 Py_INCREF(module_name);
1941                 return module_name;
1942             }
1943             if (PyErr_Occurred()) {
1944                 return NULL;
1945             }
1946         }
1947     }
1948     else {
1949         PyObject *iterator = PyObject_GetIter(modules);
1950         if (iterator == NULL) {
1951             return NULL;
1952         }
1953         while ((module_name = PyIter_Next(iterator))) {
1954             module = PyObject_GetItem(modules, module_name);
1955             if (module == NULL) {
1956                 Py_DECREF(module_name);
1957                 Py_DECREF(iterator);
1958                 return NULL;
1959             }
1960             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1961                 Py_DECREF(module);
1962                 Py_DECREF(iterator);
1963                 return module_name;
1964             }
1965             Py_DECREF(module);
1966             Py_DECREF(module_name);
1967             if (PyErr_Occurred()) {
1968                 Py_DECREF(iterator);
1969                 return NULL;
1970             }
1971         }
1972         Py_DECREF(iterator);
1973     }
1974 
1975     /* If no module is found, use __main__. */
1976     module_name = _PyUnicode_FromId(&PyId___main__);
1977     Py_XINCREF(module_name);
1978     return module_name;
1979 }
1980 
1981 /* fast_save_enter() and fast_save_leave() are guards against recursive
1982    objects when Pickler is used with the "fast mode" (i.e., with object
1983    memoization disabled). If the nesting of a list or dict object exceed
1984    FAST_NESTING_LIMIT, these guards will start keeping an internal
1985    reference to the seen list or dict objects and check whether these objects
1986    are recursive. These are not strictly necessary, since save() has a
1987    hard-coded recursion limit, but they give a nicer error message than the
1988    typical RuntimeError. */
1989 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1990 fast_save_enter(PicklerObject *self, PyObject *obj)
1991 {
1992     /* if fast_nesting < 0, we're doing an error exit. */
1993     if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1994         PyObject *key = NULL;
1995         if (self->fast_memo == NULL) {
1996             self->fast_memo = PyDict_New();
1997             if (self->fast_memo == NULL) {
1998                 self->fast_nesting = -1;
1999                 return 0;
2000             }
2001         }
2002         key = PyLong_FromVoidPtr(obj);
2003         if (key == NULL) {
2004             self->fast_nesting = -1;
2005             return 0;
2006         }
2007         if (PyDict_GetItemWithError(self->fast_memo, key)) {
2008             Py_DECREF(key);
2009             PyErr_Format(PyExc_ValueError,
2010                          "fast mode: can't pickle cyclic objects "
2011                          "including object type %.200s at %p",
2012                          Py_TYPE(obj)->tp_name, obj);
2013             self->fast_nesting = -1;
2014             return 0;
2015         }
2016         if (PyErr_Occurred()) {
2017             Py_DECREF(key);
2018             self->fast_nesting = -1;
2019             return 0;
2020         }
2021         if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
2022             Py_DECREF(key);
2023             self->fast_nesting = -1;
2024             return 0;
2025         }
2026         Py_DECREF(key);
2027     }
2028     return 1;
2029 }
2030 
2031 static int
fast_save_leave(PicklerObject * self,PyObject * obj)2032 fast_save_leave(PicklerObject *self, PyObject *obj)
2033 {
2034     if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2035         PyObject *key = PyLong_FromVoidPtr(obj);
2036         if (key == NULL)
2037             return 0;
2038         if (PyDict_DelItem(self->fast_memo, key) < 0) {
2039             Py_DECREF(key);
2040             return 0;
2041         }
2042         Py_DECREF(key);
2043     }
2044     return 1;
2045 }
2046 
2047 static int
save_none(PicklerObject * self,PyObject * obj)2048 save_none(PicklerObject *self, PyObject *obj)
2049 {
2050     const char none_op = NONE;
2051     if (_Pickler_Write(self, &none_op, 1) < 0)
2052         return -1;
2053 
2054     return 0;
2055 }
2056 
2057 static int
save_bool(PicklerObject * self,PyObject * obj)2058 save_bool(PicklerObject *self, PyObject *obj)
2059 {
2060     if (self->proto >= 2) {
2061         const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
2062         if (_Pickler_Write(self, &bool_op, 1) < 0)
2063             return -1;
2064     }
2065     else {
2066         /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2067          * so that unpicklers written before bools were introduced unpickle them
2068          * as ints, but unpicklers after can recognize that bools were intended.
2069          * Note that protocol 2 added direct ways to pickle bools.
2070          */
2071         const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2072         if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2073             return -1;
2074     }
2075     return 0;
2076 }
2077 
2078 static int
save_long(PicklerObject * self,PyObject * obj)2079 save_long(PicklerObject *self, PyObject *obj)
2080 {
2081     PyObject *repr = NULL;
2082     Py_ssize_t size;
2083     long val;
2084     int overflow;
2085     int status = 0;
2086 
2087     val= PyLong_AsLongAndOverflow(obj, &overflow);
2088     if (!overflow && (sizeof(long) <= 4 ||
2089             (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2090     {
2091         /* result fits in a signed 4-byte integer.
2092 
2093            Note: we can't use -0x80000000L in the above condition because some
2094            compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2095            before applying the unary minus when sizeof(long) <= 4. The
2096            resulting value stays unsigned which is commonly not what we want,
2097            so MSVC happily warns us about it.  However, that result would have
2098            been fine because we guard for sizeof(long) <= 4 which turns the
2099            condition true in that particular case. */
2100         char pdata[32];
2101         Py_ssize_t len = 0;
2102 
2103         if (self->bin) {
2104             pdata[1] = (unsigned char)(val & 0xff);
2105             pdata[2] = (unsigned char)((val >> 8) & 0xff);
2106             pdata[3] = (unsigned char)((val >> 16) & 0xff);
2107             pdata[4] = (unsigned char)((val >> 24) & 0xff);
2108 
2109             if ((pdata[4] != 0) || (pdata[3] != 0)) {
2110                 pdata[0] = BININT;
2111                 len = 5;
2112             }
2113             else if (pdata[2] != 0) {
2114                 pdata[0] = BININT2;
2115                 len = 3;
2116             }
2117             else {
2118                 pdata[0] = BININT1;
2119                 len = 2;
2120             }
2121         }
2122         else {
2123             sprintf(pdata, "%c%ld\n", INT,  val);
2124             len = strlen(pdata);
2125         }
2126         if (_Pickler_Write(self, pdata, len) < 0)
2127             return -1;
2128 
2129         return 0;
2130     }
2131     assert(!PyErr_Occurred());
2132 
2133     if (self->proto >= 2) {
2134         /* Linear-time pickling. */
2135         size_t nbits;
2136         size_t nbytes;
2137         unsigned char *pdata;
2138         char header[5];
2139         int i;
2140         int sign = _PyLong_Sign(obj);
2141 
2142         if (sign == 0) {
2143             header[0] = LONG1;
2144             header[1] = 0;      /* It's 0 -- an empty bytestring. */
2145             if (_Pickler_Write(self, header, 2) < 0)
2146                 goto error;
2147             return 0;
2148         }
2149         nbits = _PyLong_NumBits(obj);
2150         if (nbits == (size_t)-1 && PyErr_Occurred())
2151             goto error;
2152         /* How many bytes do we need?  There are nbits >> 3 full
2153          * bytes of data, and nbits & 7 leftover bits.  If there
2154          * are any leftover bits, then we clearly need another
2155          * byte.  What's not so obvious is that we *probably*
2156          * need another byte even if there aren't any leftovers:
2157          * the most-significant bit of the most-significant byte
2158          * acts like a sign bit, and it's usually got a sense
2159          * opposite of the one we need.  The exception is ints
2160          * of the form -(2**(8*j-1)) for j > 0.  Such an int is
2161          * its own 256's-complement, so has the right sign bit
2162          * even without the extra byte.  That's a pain to check
2163          * for in advance, though, so we always grab an extra
2164          * byte at the start, and cut it back later if possible.
2165          */
2166         nbytes = (nbits >> 3) + 1;
2167         if (nbytes > 0x7fffffffL) {
2168             PyErr_SetString(PyExc_OverflowError,
2169                             "int too large to pickle");
2170             goto error;
2171         }
2172         repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2173         if (repr == NULL)
2174             goto error;
2175         pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2176         i = _PyLong_AsByteArray((PyLongObject *)obj,
2177                                 pdata, nbytes,
2178                                 1 /* little endian */ , 1 /* signed */ );
2179         if (i < 0)
2180             goto error;
2181         /* If the int is negative, this may be a byte more than
2182          * needed.  This is so iff the MSB is all redundant sign
2183          * bits.
2184          */
2185         if (sign < 0 &&
2186             nbytes > 1 &&
2187             pdata[nbytes - 1] == 0xff &&
2188             (pdata[nbytes - 2] & 0x80) != 0) {
2189             nbytes--;
2190         }
2191 
2192         if (nbytes < 256) {
2193             header[0] = LONG1;
2194             header[1] = (unsigned char)nbytes;
2195             size = 2;
2196         }
2197         else {
2198             header[0] = LONG4;
2199             size = (Py_ssize_t) nbytes;
2200             for (i = 1; i < 5; i++) {
2201                 header[i] = (unsigned char)(size & 0xff);
2202                 size >>= 8;
2203             }
2204             size = 5;
2205         }
2206         if (_Pickler_Write(self, header, size) < 0 ||
2207             _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2208             goto error;
2209     }
2210     else {
2211         const char long_op = LONG;
2212         const char *string;
2213 
2214         /* proto < 2: write the repr and newline.  This is quadratic-time (in
2215            the number of digits), in both directions.  We add a trailing 'L'
2216            to the repr, for compatibility with Python 2.x. */
2217 
2218         repr = PyObject_Repr(obj);
2219         if (repr == NULL)
2220             goto error;
2221 
2222         string = PyUnicode_AsUTF8AndSize(repr, &size);
2223         if (string == NULL)
2224             goto error;
2225 
2226         if (_Pickler_Write(self, &long_op, 1) < 0 ||
2227             _Pickler_Write(self, string, size) < 0 ||
2228             _Pickler_Write(self, "L\n", 2) < 0)
2229             goto error;
2230     }
2231 
2232     if (0) {
2233   error:
2234       status = -1;
2235     }
2236     Py_XDECREF(repr);
2237 
2238     return status;
2239 }
2240 
2241 static int
save_float(PicklerObject * self,PyObject * obj)2242 save_float(PicklerObject *self, PyObject *obj)
2243 {
2244     double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2245 
2246     if (self->bin) {
2247         char pdata[9];
2248         pdata[0] = BINFLOAT;
2249         if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2250             return -1;
2251         if (_Pickler_Write(self, pdata, 9) < 0)
2252             return -1;
2253    }
2254     else {
2255         int result = -1;
2256         char *buf = NULL;
2257         char op = FLOAT;
2258 
2259         if (_Pickler_Write(self, &op, 1) < 0)
2260             goto done;
2261 
2262         buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2263         if (!buf) {
2264             PyErr_NoMemory();
2265             goto done;
2266         }
2267 
2268         if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2269             goto done;
2270 
2271         if (_Pickler_Write(self, "\n", 1) < 0)
2272             goto done;
2273 
2274         result = 0;
2275 done:
2276         PyMem_Free(buf);
2277         return result;
2278     }
2279 
2280     return 0;
2281 }
2282 
2283 /* Perform direct write of the header and payload of the binary object.
2284 
2285    The large contiguous data is written directly into the underlying file
2286    object, bypassing the output_buffer of the Pickler.  We intentionally
2287    do not insert a protocol 4 frame opcode to make it possible to optimize
2288    file.read calls in the loader.
2289  */
2290 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2291 _Pickler_write_bytes(PicklerObject *self,
2292                      const char *header, Py_ssize_t header_size,
2293                      const char *data, Py_ssize_t data_size,
2294                      PyObject *payload)
2295 {
2296     int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2297     int framing = self->framing;
2298 
2299     if (bypass_buffer) {
2300         assert(self->output_buffer != NULL);
2301         /* Commit the previous frame. */
2302         if (_Pickler_CommitFrame(self)) {
2303             return -1;
2304         }
2305         /* Disable framing temporarily */
2306         self->framing = 0;
2307     }
2308 
2309     if (_Pickler_Write(self, header, header_size) < 0) {
2310         return -1;
2311     }
2312 
2313     if (bypass_buffer && self->write != NULL) {
2314         /* Bypass the in-memory buffer to directly stream large data
2315            into the underlying file object. */
2316         PyObject *result, *mem = NULL;
2317         /* Dump the output buffer to the file. */
2318         if (_Pickler_FlushToFile(self) < 0) {
2319             return -1;
2320         }
2321 
2322         /* Stream write the payload into the file without going through the
2323            output buffer. */
2324         if (payload == NULL) {
2325             /* TODO: It would be better to use a memoryview with a linked
2326                original string if this is possible. */
2327             payload = mem = PyBytes_FromStringAndSize(data, data_size);
2328             if (payload == NULL) {
2329                 return -1;
2330             }
2331         }
2332         result = PyObject_CallOneArg(self->write, payload);
2333         Py_XDECREF(mem);
2334         if (result == NULL) {
2335             return -1;
2336         }
2337         Py_DECREF(result);
2338 
2339         /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2340         if (_Pickler_ClearBuffer(self) < 0) {
2341             return -1;
2342         }
2343     }
2344     else {
2345         if (_Pickler_Write(self, data, data_size) < 0) {
2346             return -1;
2347         }
2348     }
2349 
2350     /* Re-enable framing for subsequent calls to _Pickler_Write. */
2351     self->framing = framing;
2352 
2353     return 0;
2354 }
2355 
2356 static int
_save_bytes_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2357 _save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
2358                  Py_ssize_t size)
2359 {
2360     assert(self->proto >= 3);
2361 
2362     char header[9];
2363     Py_ssize_t len;
2364 
2365     if (size < 0)
2366         return -1;
2367 
2368     if (size <= 0xff) {
2369         header[0] = SHORT_BINBYTES;
2370         header[1] = (unsigned char)size;
2371         len = 2;
2372     }
2373     else if ((size_t)size <= 0xffffffffUL) {
2374         header[0] = BINBYTES;
2375         header[1] = (unsigned char)(size & 0xff);
2376         header[2] = (unsigned char)((size >> 8) & 0xff);
2377         header[3] = (unsigned char)((size >> 16) & 0xff);
2378         header[4] = (unsigned char)((size >> 24) & 0xff);
2379         len = 5;
2380     }
2381     else if (self->proto >= 4) {
2382         header[0] = BINBYTES8;
2383         _write_size64(header + 1, size);
2384         len = 9;
2385     }
2386     else {
2387         PyErr_SetString(PyExc_OverflowError,
2388                         "serializing a bytes object larger than 4 GiB "
2389                         "requires pickle protocol 4 or higher");
2390         return -1;
2391     }
2392 
2393     if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2394         return -1;
2395     }
2396 
2397     if (memo_put(self, obj) < 0) {
2398         return -1;
2399     }
2400 
2401     return 0;
2402 }
2403 
2404 static int
save_bytes(PicklerObject * self,PyObject * obj)2405 save_bytes(PicklerObject *self, PyObject *obj)
2406 {
2407     if (self->proto < 3) {
2408         /* Older pickle protocols do not have an opcode for pickling bytes
2409            objects. Therefore, we need to fake the copy protocol (i.e.,
2410            the __reduce__ method) to permit bytes object unpickling.
2411 
2412            Here we use a hack to be compatible with Python 2. Since in Python
2413            2 'bytes' is just an alias for 'str' (which has different
2414            parameters than the actual bytes object), we use codecs.encode
2415            to create the appropriate 'str' object when unpickled using
2416            Python 2 *and* the appropriate 'bytes' object when unpickled
2417            using Python 3. Again this is a hack and we don't need to do this
2418            with newer protocols. */
2419         PyObject *reduce_value;
2420         int status;
2421 
2422         if (PyBytes_GET_SIZE(obj) == 0) {
2423             reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2424         }
2425         else {
2426             PickleState *st = _Pickle_GetGlobalState();
2427             PyObject *unicode_str =
2428                 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2429                                        PyBytes_GET_SIZE(obj),
2430                                        "strict");
2431             _Py_IDENTIFIER(latin1);
2432 
2433             if (unicode_str == NULL)
2434                 return -1;
2435             reduce_value = Py_BuildValue("(O(OO))",
2436                                          st->codecs_encode, unicode_str,
2437                                          _PyUnicode_FromId(&PyId_latin1));
2438             Py_DECREF(unicode_str);
2439         }
2440 
2441         if (reduce_value == NULL)
2442             return -1;
2443 
2444         /* save_reduce() will memoize the object automatically. */
2445         status = save_reduce(self, reduce_value, obj);
2446         Py_DECREF(reduce_value);
2447         return status;
2448     }
2449     else {
2450         return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
2451                                 PyBytes_GET_SIZE(obj));
2452     }
2453 }
2454 
2455 static int
_save_bytearray_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2456 _save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
2457                      Py_ssize_t size)
2458 {
2459     assert(self->proto >= 5);
2460 
2461     char header[9];
2462     Py_ssize_t len;
2463 
2464     if (size < 0)
2465         return -1;
2466 
2467     header[0] = BYTEARRAY8;
2468     _write_size64(header + 1, size);
2469     len = 9;
2470 
2471     if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2472         return -1;
2473     }
2474 
2475     if (memo_put(self, obj) < 0) {
2476         return -1;
2477     }
2478 
2479     return 0;
2480 }
2481 
2482 static int
save_bytearray(PicklerObject * self,PyObject * obj)2483 save_bytearray(PicklerObject *self, PyObject *obj)
2484 {
2485     if (self->proto < 5) {
2486         /* Older pickle protocols do not have an opcode for pickling
2487          * bytearrays. */
2488         PyObject *reduce_value = NULL;
2489         int status;
2490 
2491         if (PyByteArray_GET_SIZE(obj) == 0) {
2492             reduce_value = Py_BuildValue("(O())",
2493                                          (PyObject *) &PyByteArray_Type);
2494         }
2495         else {
2496             PyObject *bytes_obj = PyBytes_FromObject(obj);
2497             if (bytes_obj != NULL) {
2498                 reduce_value = Py_BuildValue("(O(O))",
2499                                              (PyObject *) &PyByteArray_Type,
2500                                              bytes_obj);
2501                 Py_DECREF(bytes_obj);
2502             }
2503         }
2504         if (reduce_value == NULL)
2505             return -1;
2506 
2507         /* save_reduce() will memoize the object automatically. */
2508         status = save_reduce(self, reduce_value, obj);
2509         Py_DECREF(reduce_value);
2510         return status;
2511     }
2512     else {
2513         return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
2514                                     PyByteArray_GET_SIZE(obj));
2515     }
2516 }
2517 
2518 static int
save_picklebuffer(PicklerObject * self,PyObject * obj)2519 save_picklebuffer(PicklerObject *self, PyObject *obj)
2520 {
2521     if (self->proto < 5) {
2522         PickleState *st = _Pickle_GetGlobalState();
2523         PyErr_SetString(st->PicklingError,
2524                         "PickleBuffer can only pickled with protocol >= 5");
2525         return -1;
2526     }
2527     const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2528     if (view == NULL) {
2529         return -1;
2530     }
2531     if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2532         PickleState *st = _Pickle_GetGlobalState();
2533         PyErr_SetString(st->PicklingError,
2534                         "PickleBuffer can not be pickled when "
2535                         "pointing to a non-contiguous buffer");
2536         return -1;
2537     }
2538     int in_band = 1;
2539     if (self->buffer_callback != NULL) {
2540         PyObject *ret = PyObject_CallOneArg(self->buffer_callback, obj);
2541         if (ret == NULL) {
2542             return -1;
2543         }
2544         in_band = PyObject_IsTrue(ret);
2545         Py_DECREF(ret);
2546         if (in_band == -1) {
2547             return -1;
2548         }
2549     }
2550     if (in_band) {
2551         /* Write data in-band */
2552         if (view->readonly) {
2553             return _save_bytes_data(self, obj, (const char*) view->buf,
2554                                     view->len);
2555         }
2556         else {
2557             return _save_bytearray_data(self, obj, (const char*) view->buf,
2558                                         view->len);
2559         }
2560     }
2561     else {
2562         /* Write data out-of-band */
2563         const char next_buffer_op = NEXT_BUFFER;
2564         if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2565             return -1;
2566         }
2567         if (view->readonly) {
2568             const char readonly_buffer_op = READONLY_BUFFER;
2569             if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2570                 return -1;
2571             }
2572         }
2573     }
2574     return 0;
2575 }
2576 
2577 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2578    backslash and newline characters to \uXXXX escapes. */
2579 static PyObject *
raw_unicode_escape(PyObject * obj)2580 raw_unicode_escape(PyObject *obj)
2581 {
2582     char *p;
2583     Py_ssize_t i, size;
2584     const void *data;
2585     unsigned int kind;
2586     _PyBytesWriter writer;
2587 
2588     if (PyUnicode_READY(obj))
2589         return NULL;
2590 
2591     _PyBytesWriter_Init(&writer);
2592 
2593     size = PyUnicode_GET_LENGTH(obj);
2594     data = PyUnicode_DATA(obj);
2595     kind = PyUnicode_KIND(obj);
2596 
2597     p = _PyBytesWriter_Alloc(&writer, size);
2598     if (p == NULL)
2599         goto error;
2600     writer.overallocate = 1;
2601 
2602     for (i=0; i < size; i++) {
2603         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2604         /* Map 32-bit characters to '\Uxxxxxxxx' */
2605         if (ch >= 0x10000) {
2606             /* -1: subtract 1 preallocated byte */
2607             p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2608             if (p == NULL)
2609                 goto error;
2610 
2611             *p++ = '\\';
2612             *p++ = 'U';
2613             *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2614             *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2615             *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2616             *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2617             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2618             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2619             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2620             *p++ = Py_hexdigits[ch & 15];
2621         }
2622         /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2623         else if (ch >= 256 ||
2624                  ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2625                  ch == 0x1a)
2626         {
2627             /* -1: subtract 1 preallocated byte */
2628             p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2629             if (p == NULL)
2630                 goto error;
2631 
2632             *p++ = '\\';
2633             *p++ = 'u';
2634             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2635             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2636             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2637             *p++ = Py_hexdigits[ch & 15];
2638         }
2639         /* Copy everything else as-is */
2640         else
2641             *p++ = (char) ch;
2642     }
2643 
2644     return _PyBytesWriter_Finish(&writer, p);
2645 
2646 error:
2647     _PyBytesWriter_Dealloc(&writer);
2648     return NULL;
2649 }
2650 
2651 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2652 write_unicode_binary(PicklerObject *self, PyObject *obj)
2653 {
2654     char header[9];
2655     Py_ssize_t len;
2656     PyObject *encoded = NULL;
2657     Py_ssize_t size;
2658     const char *data;
2659 
2660     if (PyUnicode_READY(obj))
2661         return -1;
2662 
2663     data = PyUnicode_AsUTF8AndSize(obj, &size);
2664     if (data == NULL) {
2665         /* Issue #8383: for strings with lone surrogates, fallback on the
2666            "surrogatepass" error handler. */
2667         PyErr_Clear();
2668         encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2669         if (encoded == NULL)
2670             return -1;
2671 
2672         data = PyBytes_AS_STRING(encoded);
2673         size = PyBytes_GET_SIZE(encoded);
2674     }
2675 
2676     assert(size >= 0);
2677     if (size <= 0xff && self->proto >= 4) {
2678         header[0] = SHORT_BINUNICODE;
2679         header[1] = (unsigned char)(size & 0xff);
2680         len = 2;
2681     }
2682     else if ((size_t)size <= 0xffffffffUL) {
2683         header[0] = BINUNICODE;
2684         header[1] = (unsigned char)(size & 0xff);
2685         header[2] = (unsigned char)((size >> 8) & 0xff);
2686         header[3] = (unsigned char)((size >> 16) & 0xff);
2687         header[4] = (unsigned char)((size >> 24) & 0xff);
2688         len = 5;
2689     }
2690     else if (self->proto >= 4) {
2691         header[0] = BINUNICODE8;
2692         _write_size64(header + 1, size);
2693         len = 9;
2694     }
2695     else {
2696         PyErr_SetString(PyExc_OverflowError,
2697                         "serializing a string larger than 4 GiB "
2698                         "requires pickle protocol 4 or higher");
2699         Py_XDECREF(encoded);
2700         return -1;
2701     }
2702 
2703     if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2704         Py_XDECREF(encoded);
2705         return -1;
2706     }
2707     Py_XDECREF(encoded);
2708     return 0;
2709 }
2710 
2711 static int
save_unicode(PicklerObject * self,PyObject * obj)2712 save_unicode(PicklerObject *self, PyObject *obj)
2713 {
2714     if (self->bin) {
2715         if (write_unicode_binary(self, obj) < 0)
2716             return -1;
2717     }
2718     else {
2719         PyObject *encoded;
2720         Py_ssize_t size;
2721         const char unicode_op = UNICODE;
2722 
2723         encoded = raw_unicode_escape(obj);
2724         if (encoded == NULL)
2725             return -1;
2726 
2727         if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2728             Py_DECREF(encoded);
2729             return -1;
2730         }
2731 
2732         size = PyBytes_GET_SIZE(encoded);
2733         if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2734             Py_DECREF(encoded);
2735             return -1;
2736         }
2737         Py_DECREF(encoded);
2738 
2739         if (_Pickler_Write(self, "\n", 1) < 0)
2740             return -1;
2741     }
2742     if (memo_put(self, obj) < 0)
2743         return -1;
2744 
2745     return 0;
2746 }
2747 
2748 /* A helper for save_tuple.  Push the len elements in tuple t on the stack. */
2749 static int
store_tuple_elements(PicklerObject * self,PyObject * t,Py_ssize_t len)2750 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2751 {
2752     Py_ssize_t i;
2753 
2754     assert(PyTuple_Size(t) == len);
2755 
2756     for (i = 0; i < len; i++) {
2757         PyObject *element = PyTuple_GET_ITEM(t, i);
2758 
2759         if (element == NULL)
2760             return -1;
2761         if (save(self, element, 0) < 0)
2762             return -1;
2763     }
2764 
2765     return 0;
2766 }
2767 
2768 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2769  * used across protocols to minimize the space needed to pickle them.
2770  * Tuples are also the only builtin immutable type that can be recursive
2771  * (a tuple can be reached from itself), and that requires some subtle
2772  * magic so that it works in all cases.  IOW, this is a long routine.
2773  */
2774 static int
save_tuple(PicklerObject * self,PyObject * obj)2775 save_tuple(PicklerObject *self, PyObject *obj)
2776 {
2777     Py_ssize_t len, i;
2778 
2779     const char mark_op = MARK;
2780     const char tuple_op = TUPLE;
2781     const char pop_op = POP;
2782     const char pop_mark_op = POP_MARK;
2783     const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2784 
2785     if ((len = PyTuple_Size(obj)) < 0)
2786         return -1;
2787 
2788     if (len == 0) {
2789         char pdata[2];
2790 
2791         if (self->proto) {
2792             pdata[0] = EMPTY_TUPLE;
2793             len = 1;
2794         }
2795         else {
2796             pdata[0] = MARK;
2797             pdata[1] = TUPLE;
2798             len = 2;
2799         }
2800         if (_Pickler_Write(self, pdata, len) < 0)
2801             return -1;
2802         return 0;
2803     }
2804 
2805     /* The tuple isn't in the memo now.  If it shows up there after
2806      * saving the tuple elements, the tuple must be recursive, in
2807      * which case we'll pop everything we put on the stack, and fetch
2808      * its value from the memo.
2809      */
2810     if (len <= 3 && self->proto >= 2) {
2811         /* Use TUPLE{1,2,3} opcodes. */
2812         if (store_tuple_elements(self, obj, len) < 0)
2813             return -1;
2814 
2815         if (PyMemoTable_Get(self->memo, obj)) {
2816             /* pop the len elements */
2817             for (i = 0; i < len; i++)
2818                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2819                     return -1;
2820             /* fetch from memo */
2821             if (memo_get(self, obj) < 0)
2822                 return -1;
2823 
2824             return 0;
2825         }
2826         else { /* Not recursive. */
2827             if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2828                 return -1;
2829         }
2830         goto memoize;
2831     }
2832 
2833     /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2834      * Generate MARK e1 e2 ... TUPLE
2835      */
2836     if (_Pickler_Write(self, &mark_op, 1) < 0)
2837         return -1;
2838 
2839     if (store_tuple_elements(self, obj, len) < 0)
2840         return -1;
2841 
2842     if (PyMemoTable_Get(self->memo, obj)) {
2843         /* pop the stack stuff we pushed */
2844         if (self->bin) {
2845             if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2846                 return -1;
2847         }
2848         else {
2849             /* Note that we pop one more than len, to remove
2850              * the MARK too.
2851              */
2852             for (i = 0; i <= len; i++)
2853                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2854                     return -1;
2855         }
2856         /* fetch from memo */
2857         if (memo_get(self, obj) < 0)
2858             return -1;
2859 
2860         return 0;
2861     }
2862     else { /* Not recursive. */
2863         if (_Pickler_Write(self, &tuple_op, 1) < 0)
2864             return -1;
2865     }
2866 
2867   memoize:
2868     if (memo_put(self, obj) < 0)
2869         return -1;
2870 
2871     return 0;
2872 }
2873 
2874 /* iter is an iterator giving items, and we batch up chunks of
2875  *     MARK item item ... item APPENDS
2876  * opcode sequences.  Calling code should have arranged to first create an
2877  * empty list, or list-like object, for the APPENDS to operate on.
2878  * Returns 0 on success, <0 on error.
2879  */
2880 static int
batch_list(PicklerObject * self,PyObject * iter)2881 batch_list(PicklerObject *self, PyObject *iter)
2882 {
2883     PyObject *obj = NULL;
2884     PyObject *firstitem = NULL;
2885     int i, n;
2886 
2887     const char mark_op = MARK;
2888     const char append_op = APPEND;
2889     const char appends_op = APPENDS;
2890 
2891     assert(iter != NULL);
2892 
2893     /* XXX: I think this function could be made faster by avoiding the
2894        iterator interface and fetching objects directly from list using
2895        PyList_GET_ITEM.
2896     */
2897 
2898     if (self->proto == 0) {
2899         /* APPENDS isn't available; do one at a time. */
2900         for (;;) {
2901             obj = PyIter_Next(iter);
2902             if (obj == NULL) {
2903                 if (PyErr_Occurred())
2904                     return -1;
2905                 break;
2906             }
2907             i = save(self, obj, 0);
2908             Py_DECREF(obj);
2909             if (i < 0)
2910                 return -1;
2911             if (_Pickler_Write(self, &append_op, 1) < 0)
2912                 return -1;
2913         }
2914         return 0;
2915     }
2916 
2917     /* proto > 0:  write in batches of BATCHSIZE. */
2918     do {
2919         /* Get first item */
2920         firstitem = PyIter_Next(iter);
2921         if (firstitem == NULL) {
2922             if (PyErr_Occurred())
2923                 goto error;
2924 
2925             /* nothing more to add */
2926             break;
2927         }
2928 
2929         /* Try to get a second item */
2930         obj = PyIter_Next(iter);
2931         if (obj == NULL) {
2932             if (PyErr_Occurred())
2933                 goto error;
2934 
2935             /* Only one item to write */
2936             if (save(self, firstitem, 0) < 0)
2937                 goto error;
2938             if (_Pickler_Write(self, &append_op, 1) < 0)
2939                 goto error;
2940             Py_CLEAR(firstitem);
2941             break;
2942         }
2943 
2944         /* More than one item to write */
2945 
2946         /* Pump out MARK, items, APPENDS. */
2947         if (_Pickler_Write(self, &mark_op, 1) < 0)
2948             goto error;
2949 
2950         if (save(self, firstitem, 0) < 0)
2951             goto error;
2952         Py_CLEAR(firstitem);
2953         n = 1;
2954 
2955         /* Fetch and save up to BATCHSIZE items */
2956         while (obj) {
2957             if (save(self, obj, 0) < 0)
2958                 goto error;
2959             Py_CLEAR(obj);
2960             n += 1;
2961 
2962             if (n == BATCHSIZE)
2963                 break;
2964 
2965             obj = PyIter_Next(iter);
2966             if (obj == NULL) {
2967                 if (PyErr_Occurred())
2968                     goto error;
2969                 break;
2970             }
2971         }
2972 
2973         if (_Pickler_Write(self, &appends_op, 1) < 0)
2974             goto error;
2975 
2976     } while (n == BATCHSIZE);
2977     return 0;
2978 
2979   error:
2980     Py_XDECREF(firstitem);
2981     Py_XDECREF(obj);
2982     return -1;
2983 }
2984 
2985 /* This is a variant of batch_list() above, specialized for lists (with no
2986  * support for list subclasses). Like batch_list(), we batch up chunks of
2987  *     MARK item item ... item APPENDS
2988  * opcode sequences.  Calling code should have arranged to first create an
2989  * empty list, or list-like object, for the APPENDS to operate on.
2990  * Returns 0 on success, -1 on error.
2991  *
2992  * This version is considerably faster than batch_list(), if less general.
2993  *
2994  * Note that this only works for protocols > 0.
2995  */
2996 static int
batch_list_exact(PicklerObject * self,PyObject * obj)2997 batch_list_exact(PicklerObject *self, PyObject *obj)
2998 {
2999     PyObject *item = NULL;
3000     Py_ssize_t this_batch, total;
3001 
3002     const char append_op = APPEND;
3003     const char appends_op = APPENDS;
3004     const char mark_op = MARK;
3005 
3006     assert(obj != NULL);
3007     assert(self->proto > 0);
3008     assert(PyList_CheckExact(obj));
3009 
3010     if (PyList_GET_SIZE(obj) == 1) {
3011         item = PyList_GET_ITEM(obj, 0);
3012         if (save(self, item, 0) < 0)
3013             return -1;
3014         if (_Pickler_Write(self, &append_op, 1) < 0)
3015             return -1;
3016         return 0;
3017     }
3018 
3019     /* Write in batches of BATCHSIZE. */
3020     total = 0;
3021     do {
3022         this_batch = 0;
3023         if (_Pickler_Write(self, &mark_op, 1) < 0)
3024             return -1;
3025         while (total < PyList_GET_SIZE(obj)) {
3026             item = PyList_GET_ITEM(obj, total);
3027             if (save(self, item, 0) < 0)
3028                 return -1;
3029             total++;
3030             if (++this_batch == BATCHSIZE)
3031                 break;
3032         }
3033         if (_Pickler_Write(self, &appends_op, 1) < 0)
3034             return -1;
3035 
3036     } while (total < PyList_GET_SIZE(obj));
3037 
3038     return 0;
3039 }
3040 
3041 static int
save_list(PicklerObject * self,PyObject * obj)3042 save_list(PicklerObject *self, PyObject *obj)
3043 {
3044     char header[3];
3045     Py_ssize_t len;
3046     int status = 0;
3047 
3048     if (self->fast && !fast_save_enter(self, obj))
3049         goto error;
3050 
3051     /* Create an empty list. */
3052     if (self->bin) {
3053         header[0] = EMPTY_LIST;
3054         len = 1;
3055     }
3056     else {
3057         header[0] = MARK;
3058         header[1] = LIST;
3059         len = 2;
3060     }
3061 
3062     if (_Pickler_Write(self, header, len) < 0)
3063         goto error;
3064 
3065     /* Get list length, and bow out early if empty. */
3066     if ((len = PyList_Size(obj)) < 0)
3067         goto error;
3068 
3069     if (memo_put(self, obj) < 0)
3070         goto error;
3071 
3072     if (len != 0) {
3073         /* Materialize the list elements. */
3074         if (PyList_CheckExact(obj) && self->proto > 0) {
3075             if (Py_EnterRecursiveCall(" while pickling an object"))
3076                 goto error;
3077             status = batch_list_exact(self, obj);
3078             Py_LeaveRecursiveCall();
3079         } else {
3080             PyObject *iter = PyObject_GetIter(obj);
3081             if (iter == NULL)
3082                 goto error;
3083 
3084             if (Py_EnterRecursiveCall(" while pickling an object")) {
3085                 Py_DECREF(iter);
3086                 goto error;
3087             }
3088             status = batch_list(self, iter);
3089             Py_LeaveRecursiveCall();
3090             Py_DECREF(iter);
3091         }
3092     }
3093     if (0) {
3094   error:
3095         status = -1;
3096     }
3097 
3098     if (self->fast && !fast_save_leave(self, obj))
3099         status = -1;
3100 
3101     return status;
3102 }
3103 
3104 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3105  *     MARK key value ... key value SETITEMS
3106  * opcode sequences.  Calling code should have arranged to first create an
3107  * empty dict, or dict-like object, for the SETITEMS to operate on.
3108  * Returns 0 on success, <0 on error.
3109  *
3110  * This is very much like batch_list().  The difference between saving
3111  * elements directly, and picking apart two-tuples, is so long-winded at
3112  * the C level, though, that attempts to combine these routines were too
3113  * ugly to bear.
3114  */
3115 static int
batch_dict(PicklerObject * self,PyObject * iter)3116 batch_dict(PicklerObject *self, PyObject *iter)
3117 {
3118     PyObject *obj = NULL;
3119     PyObject *firstitem = NULL;
3120     int i, n;
3121 
3122     const char mark_op = MARK;
3123     const char setitem_op = SETITEM;
3124     const char setitems_op = SETITEMS;
3125 
3126     assert(iter != NULL);
3127 
3128     if (self->proto == 0) {
3129         /* SETITEMS isn't available; do one at a time. */
3130         for (;;) {
3131             obj = PyIter_Next(iter);
3132             if (obj == NULL) {
3133                 if (PyErr_Occurred())
3134                     return -1;
3135                 break;
3136             }
3137             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3138                 PyErr_SetString(PyExc_TypeError, "dict items "
3139                                 "iterator must return 2-tuples");
3140                 return -1;
3141             }
3142             i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
3143             if (i >= 0)
3144                 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
3145             Py_DECREF(obj);
3146             if (i < 0)
3147                 return -1;
3148             if (_Pickler_Write(self, &setitem_op, 1) < 0)
3149                 return -1;
3150         }
3151         return 0;
3152     }
3153 
3154     /* proto > 0:  write in batches of BATCHSIZE. */
3155     do {
3156         /* Get first item */
3157         firstitem = PyIter_Next(iter);
3158         if (firstitem == NULL) {
3159             if (PyErr_Occurred())
3160                 goto error;
3161 
3162             /* nothing more to add */
3163             break;
3164         }
3165         if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3166             PyErr_SetString(PyExc_TypeError, "dict items "
3167                                 "iterator must return 2-tuples");
3168             goto error;
3169         }
3170 
3171         /* Try to get a second item */
3172         obj = PyIter_Next(iter);
3173         if (obj == NULL) {
3174             if (PyErr_Occurred())
3175                 goto error;
3176 
3177             /* Only one item to write */
3178             if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3179                 goto error;
3180             if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3181                 goto error;
3182             if (_Pickler_Write(self, &setitem_op, 1) < 0)
3183                 goto error;
3184             Py_CLEAR(firstitem);
3185             break;
3186         }
3187 
3188         /* More than one item to write */
3189 
3190         /* Pump out MARK, items, SETITEMS. */
3191         if (_Pickler_Write(self, &mark_op, 1) < 0)
3192             goto error;
3193 
3194         if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3195             goto error;
3196         if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3197             goto error;
3198         Py_CLEAR(firstitem);
3199         n = 1;
3200 
3201         /* Fetch and save up to BATCHSIZE items */
3202         while (obj) {
3203             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3204                 PyErr_SetString(PyExc_TypeError, "dict items "
3205                     "iterator must return 2-tuples");
3206                 goto error;
3207             }
3208             if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3209                 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3210                 goto error;
3211             Py_CLEAR(obj);
3212             n += 1;
3213 
3214             if (n == BATCHSIZE)
3215                 break;
3216 
3217             obj = PyIter_Next(iter);
3218             if (obj == NULL) {
3219                 if (PyErr_Occurred())
3220                     goto error;
3221                 break;
3222             }
3223         }
3224 
3225         if (_Pickler_Write(self, &setitems_op, 1) < 0)
3226             goto error;
3227 
3228     } while (n == BATCHSIZE);
3229     return 0;
3230 
3231   error:
3232     Py_XDECREF(firstitem);
3233     Py_XDECREF(obj);
3234     return -1;
3235 }
3236 
3237 /* This is a variant of batch_dict() above that specializes for dicts, with no
3238  * support for dict subclasses. Like batch_dict(), we batch up chunks of
3239  *     MARK key value ... key value SETITEMS
3240  * opcode sequences.  Calling code should have arranged to first create an
3241  * empty dict, or dict-like object, for the SETITEMS to operate on.
3242  * Returns 0 on success, -1 on error.
3243  *
3244  * Note that this currently doesn't work for protocol 0.
3245  */
3246 static int
batch_dict_exact(PicklerObject * self,PyObject * obj)3247 batch_dict_exact(PicklerObject *self, PyObject *obj)
3248 {
3249     PyObject *key = NULL, *value = NULL;
3250     int i;
3251     Py_ssize_t dict_size, ppos = 0;
3252 
3253     const char mark_op = MARK;
3254     const char setitem_op = SETITEM;
3255     const char setitems_op = SETITEMS;
3256 
3257     assert(obj != NULL && PyDict_CheckExact(obj));
3258     assert(self->proto > 0);
3259 
3260     dict_size = PyDict_GET_SIZE(obj);
3261 
3262     /* Special-case len(d) == 1 to save space. */
3263     if (dict_size == 1) {
3264         PyDict_Next(obj, &ppos, &key, &value);
3265         if (save(self, key, 0) < 0)
3266             return -1;
3267         if (save(self, value, 0) < 0)
3268             return -1;
3269         if (_Pickler_Write(self, &setitem_op, 1) < 0)
3270             return -1;
3271         return 0;
3272     }
3273 
3274     /* Write in batches of BATCHSIZE. */
3275     do {
3276         i = 0;
3277         if (_Pickler_Write(self, &mark_op, 1) < 0)
3278             return -1;
3279         while (PyDict_Next(obj, &ppos, &key, &value)) {
3280             if (save(self, key, 0) < 0)
3281                 return -1;
3282             if (save(self, value, 0) < 0)
3283                 return -1;
3284             if (++i == BATCHSIZE)
3285                 break;
3286         }
3287         if (_Pickler_Write(self, &setitems_op, 1) < 0)
3288             return -1;
3289         if (PyDict_GET_SIZE(obj) != dict_size) {
3290             PyErr_Format(
3291                 PyExc_RuntimeError,
3292                 "dictionary changed size during iteration");
3293             return -1;
3294         }
3295 
3296     } while (i == BATCHSIZE);
3297     return 0;
3298 }
3299 
3300 static int
save_dict(PicklerObject * self,PyObject * obj)3301 save_dict(PicklerObject *self, PyObject *obj)
3302 {
3303     PyObject *items, *iter;
3304     char header[3];
3305     Py_ssize_t len;
3306     int status = 0;
3307     assert(PyDict_Check(obj));
3308 
3309     if (self->fast && !fast_save_enter(self, obj))
3310         goto error;
3311 
3312     /* Create an empty dict. */
3313     if (self->bin) {
3314         header[0] = EMPTY_DICT;
3315         len = 1;
3316     }
3317     else {
3318         header[0] = MARK;
3319         header[1] = DICT;
3320         len = 2;
3321     }
3322 
3323     if (_Pickler_Write(self, header, len) < 0)
3324         goto error;
3325 
3326     if (memo_put(self, obj) < 0)
3327         goto error;
3328 
3329     if (PyDict_GET_SIZE(obj)) {
3330         /* Save the dict items. */
3331         if (PyDict_CheckExact(obj) && self->proto > 0) {
3332             /* We can take certain shortcuts if we know this is a dict and
3333                not a dict subclass. */
3334             if (Py_EnterRecursiveCall(" while pickling an object"))
3335                 goto error;
3336             status = batch_dict_exact(self, obj);
3337             Py_LeaveRecursiveCall();
3338         } else {
3339             _Py_IDENTIFIER(items);
3340 
3341             items = _PyObject_CallMethodIdNoArgs(obj, &PyId_items);
3342             if (items == NULL)
3343                 goto error;
3344             iter = PyObject_GetIter(items);
3345             Py_DECREF(items);
3346             if (iter == NULL)
3347                 goto error;
3348             if (Py_EnterRecursiveCall(" while pickling an object")) {
3349                 Py_DECREF(iter);
3350                 goto error;
3351             }
3352             status = batch_dict(self, iter);
3353             Py_LeaveRecursiveCall();
3354             Py_DECREF(iter);
3355         }
3356     }
3357 
3358     if (0) {
3359   error:
3360         status = -1;
3361     }
3362 
3363     if (self->fast && !fast_save_leave(self, obj))
3364         status = -1;
3365 
3366     return status;
3367 }
3368 
3369 static int
save_set(PicklerObject * self,PyObject * obj)3370 save_set(PicklerObject *self, PyObject *obj)
3371 {
3372     PyObject *item;
3373     int i;
3374     Py_ssize_t set_size, ppos = 0;
3375     Py_hash_t hash;
3376 
3377     const char empty_set_op = EMPTY_SET;
3378     const char mark_op = MARK;
3379     const char additems_op = ADDITEMS;
3380 
3381     if (self->proto < 4) {
3382         PyObject *items;
3383         PyObject *reduce_value;
3384         int status;
3385 
3386         items = PySequence_List(obj);
3387         if (items == NULL) {
3388             return -1;
3389         }
3390         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3391         Py_DECREF(items);
3392         if (reduce_value == NULL) {
3393             return -1;
3394         }
3395         /* save_reduce() will memoize the object automatically. */
3396         status = save_reduce(self, reduce_value, obj);
3397         Py_DECREF(reduce_value);
3398         return status;
3399     }
3400 
3401     if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3402         return -1;
3403 
3404     if (memo_put(self, obj) < 0)
3405         return -1;
3406 
3407     set_size = PySet_GET_SIZE(obj);
3408     if (set_size == 0)
3409         return 0;  /* nothing to do */
3410 
3411     /* Write in batches of BATCHSIZE. */
3412     do {
3413         i = 0;
3414         if (_Pickler_Write(self, &mark_op, 1) < 0)
3415             return -1;
3416         while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3417             if (save(self, item, 0) < 0)
3418                 return -1;
3419             if (++i == BATCHSIZE)
3420                 break;
3421         }
3422         if (_Pickler_Write(self, &additems_op, 1) < 0)
3423             return -1;
3424         if (PySet_GET_SIZE(obj) != set_size) {
3425             PyErr_Format(
3426                 PyExc_RuntimeError,
3427                 "set changed size during iteration");
3428             return -1;
3429         }
3430     } while (i == BATCHSIZE);
3431 
3432     return 0;
3433 }
3434 
3435 static int
save_frozenset(PicklerObject * self,PyObject * obj)3436 save_frozenset(PicklerObject *self, PyObject *obj)
3437 {
3438     PyObject *iter;
3439 
3440     const char mark_op = MARK;
3441     const char frozenset_op = FROZENSET;
3442 
3443     if (self->fast && !fast_save_enter(self, obj))
3444         return -1;
3445 
3446     if (self->proto < 4) {
3447         PyObject *items;
3448         PyObject *reduce_value;
3449         int status;
3450 
3451         items = PySequence_List(obj);
3452         if (items == NULL) {
3453             return -1;
3454         }
3455         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3456                                      items);
3457         Py_DECREF(items);
3458         if (reduce_value == NULL) {
3459             return -1;
3460         }
3461         /* save_reduce() will memoize the object automatically. */
3462         status = save_reduce(self, reduce_value, obj);
3463         Py_DECREF(reduce_value);
3464         return status;
3465     }
3466 
3467     if (_Pickler_Write(self, &mark_op, 1) < 0)
3468         return -1;
3469 
3470     iter = PyObject_GetIter(obj);
3471     if (iter == NULL) {
3472         return -1;
3473     }
3474     for (;;) {
3475         PyObject *item;
3476 
3477         item = PyIter_Next(iter);
3478         if (item == NULL) {
3479             if (PyErr_Occurred()) {
3480                 Py_DECREF(iter);
3481                 return -1;
3482             }
3483             break;
3484         }
3485         if (save(self, item, 0) < 0) {
3486             Py_DECREF(item);
3487             Py_DECREF(iter);
3488             return -1;
3489         }
3490         Py_DECREF(item);
3491     }
3492     Py_DECREF(iter);
3493 
3494     /* If the object is already in the memo, this means it is
3495        recursive. In this case, throw away everything we put on the
3496        stack, and fetch the object back from the memo. */
3497     if (PyMemoTable_Get(self->memo, obj)) {
3498         const char pop_mark_op = POP_MARK;
3499 
3500         if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3501             return -1;
3502         if (memo_get(self, obj) < 0)
3503             return -1;
3504         return 0;
3505     }
3506 
3507     if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3508         return -1;
3509     if (memo_put(self, obj) < 0)
3510         return -1;
3511 
3512     return 0;
3513 }
3514 
3515 static int
fix_imports(PyObject ** module_name,PyObject ** global_name)3516 fix_imports(PyObject **module_name, PyObject **global_name)
3517 {
3518     PyObject *key;
3519     PyObject *item;
3520     PickleState *st = _Pickle_GetGlobalState();
3521 
3522     key = PyTuple_Pack(2, *module_name, *global_name);
3523     if (key == NULL)
3524         return -1;
3525     item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3526     Py_DECREF(key);
3527     if (item) {
3528         PyObject *fixed_module_name;
3529         PyObject *fixed_global_name;
3530 
3531         if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3532             PyErr_Format(PyExc_RuntimeError,
3533                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3534                          "should be 2-tuples, not %.200s",
3535                          Py_TYPE(item)->tp_name);
3536             return -1;
3537         }
3538         fixed_module_name = PyTuple_GET_ITEM(item, 0);
3539         fixed_global_name = PyTuple_GET_ITEM(item, 1);
3540         if (!PyUnicode_Check(fixed_module_name) ||
3541             !PyUnicode_Check(fixed_global_name)) {
3542             PyErr_Format(PyExc_RuntimeError,
3543                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3544                          "should be pairs of str, not (%.200s, %.200s)",
3545                          Py_TYPE(fixed_module_name)->tp_name,
3546                          Py_TYPE(fixed_global_name)->tp_name);
3547             return -1;
3548         }
3549 
3550         Py_CLEAR(*module_name);
3551         Py_CLEAR(*global_name);
3552         Py_INCREF(fixed_module_name);
3553         Py_INCREF(fixed_global_name);
3554         *module_name = fixed_module_name;
3555         *global_name = fixed_global_name;
3556         return 0;
3557     }
3558     else if (PyErr_Occurred()) {
3559         return -1;
3560     }
3561 
3562     item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3563     if (item) {
3564         if (!PyUnicode_Check(item)) {
3565             PyErr_Format(PyExc_RuntimeError,
3566                          "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3567                          "should be strings, not %.200s",
3568                          Py_TYPE(item)->tp_name);
3569             return -1;
3570         }
3571         Py_INCREF(item);
3572         Py_XSETREF(*module_name, item);
3573     }
3574     else if (PyErr_Occurred()) {
3575         return -1;
3576     }
3577 
3578     return 0;
3579 }
3580 
3581 static int
save_global(PicklerObject * self,PyObject * obj,PyObject * name)3582 save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3583 {
3584     PyObject *global_name = NULL;
3585     PyObject *module_name = NULL;
3586     PyObject *module = NULL;
3587     PyObject *parent = NULL;
3588     PyObject *dotted_path = NULL;
3589     PyObject *lastname = NULL;
3590     PyObject *cls;
3591     PickleState *st = _Pickle_GetGlobalState();
3592     int status = 0;
3593     _Py_IDENTIFIER(__name__);
3594     _Py_IDENTIFIER(__qualname__);
3595 
3596     const char global_op = GLOBAL;
3597 
3598     if (name) {
3599         Py_INCREF(name);
3600         global_name = name;
3601     }
3602     else {
3603         if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3604             goto error;
3605         if (global_name == NULL) {
3606             global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3607             if (global_name == NULL)
3608                 goto error;
3609         }
3610     }
3611 
3612     dotted_path = get_dotted_path(module, global_name);
3613     if (dotted_path == NULL)
3614         goto error;
3615     module_name = whichmodule(obj, dotted_path);
3616     if (module_name == NULL)
3617         goto error;
3618 
3619     /* XXX: Change to use the import C API directly with level=0 to disallow
3620        relative imports.
3621 
3622        XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3623        builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3624        custom import functions (IMHO, this would be a nice security
3625        feature). The import C API would need to be extended to support the
3626        extra parameters of __import__ to fix that. */
3627     module = PyImport_Import(module_name);
3628     if (module == NULL) {
3629         PyErr_Format(st->PicklingError,
3630                      "Can't pickle %R: import of module %R failed",
3631                      obj, module_name);
3632         goto error;
3633     }
3634     lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3635     Py_INCREF(lastname);
3636     cls = get_deep_attribute(module, dotted_path, &parent);
3637     Py_CLEAR(dotted_path);
3638     if (cls == NULL) {
3639         PyErr_Format(st->PicklingError,
3640                      "Can't pickle %R: attribute lookup %S on %S failed",
3641                      obj, global_name, module_name);
3642         goto error;
3643     }
3644     if (cls != obj) {
3645         Py_DECREF(cls);
3646         PyErr_Format(st->PicklingError,
3647                      "Can't pickle %R: it's not the same object as %S.%S",
3648                      obj, module_name, global_name);
3649         goto error;
3650     }
3651     Py_DECREF(cls);
3652 
3653     if (self->proto >= 2) {
3654         /* See whether this is in the extension registry, and if
3655          * so generate an EXT opcode.
3656          */
3657         PyObject *extension_key;
3658         PyObject *code_obj;      /* extension code as Python object */
3659         long code;               /* extension code as C value */
3660         char pdata[5];
3661         Py_ssize_t n;
3662 
3663         extension_key = PyTuple_Pack(2, module_name, global_name);
3664         if (extension_key == NULL) {
3665             goto error;
3666         }
3667         code_obj = PyDict_GetItemWithError(st->extension_registry,
3668                                            extension_key);
3669         Py_DECREF(extension_key);
3670         /* The object is not registered in the extension registry.
3671            This is the most likely code path. */
3672         if (code_obj == NULL) {
3673             if (PyErr_Occurred()) {
3674                 goto error;
3675             }
3676             goto gen_global;
3677         }
3678 
3679         /* XXX: pickle.py doesn't check neither the type, nor the range
3680            of the value returned by the extension_registry. It should for
3681            consistency. */
3682 
3683         /* Verify code_obj has the right type and value. */
3684         if (!PyLong_Check(code_obj)) {
3685             PyErr_Format(st->PicklingError,
3686                          "Can't pickle %R: extension code %R isn't an integer",
3687                          obj, code_obj);
3688             goto error;
3689         }
3690         code = PyLong_AS_LONG(code_obj);
3691         if (code <= 0 || code > 0x7fffffffL) {
3692             if (!PyErr_Occurred())
3693                 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3694                              "code %ld is out of range", obj, code);
3695             goto error;
3696         }
3697 
3698         /* Generate an EXT opcode. */
3699         if (code <= 0xff) {
3700             pdata[0] = EXT1;
3701             pdata[1] = (unsigned char)code;
3702             n = 2;
3703         }
3704         else if (code <= 0xffff) {
3705             pdata[0] = EXT2;
3706             pdata[1] = (unsigned char)(code & 0xff);
3707             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3708             n = 3;
3709         }
3710         else {
3711             pdata[0] = EXT4;
3712             pdata[1] = (unsigned char)(code & 0xff);
3713             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3714             pdata[3] = (unsigned char)((code >> 16) & 0xff);
3715             pdata[4] = (unsigned char)((code >> 24) & 0xff);
3716             n = 5;
3717         }
3718 
3719         if (_Pickler_Write(self, pdata, n) < 0)
3720             goto error;
3721     }
3722     else {
3723   gen_global:
3724         if (parent == module) {
3725             Py_INCREF(lastname);
3726             Py_DECREF(global_name);
3727             global_name = lastname;
3728         }
3729         if (self->proto >= 4) {
3730             const char stack_global_op = STACK_GLOBAL;
3731 
3732             if (save(self, module_name, 0) < 0)
3733                 goto error;
3734             if (save(self, global_name, 0) < 0)
3735                 goto error;
3736 
3737             if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3738                 goto error;
3739         }
3740         else if (parent != module) {
3741             PickleState *st = _Pickle_GetGlobalState();
3742             PyObject *reduce_value = Py_BuildValue("(O(OO))",
3743                                         st->getattr, parent, lastname);
3744             if (reduce_value == NULL)
3745                 goto error;
3746             status = save_reduce(self, reduce_value, NULL);
3747             Py_DECREF(reduce_value);
3748             if (status < 0)
3749                 goto error;
3750         }
3751         else {
3752             /* Generate a normal global opcode if we are using a pickle
3753                protocol < 4, or if the object is not registered in the
3754                extension registry. */
3755             PyObject *encoded;
3756             PyObject *(*unicode_encoder)(PyObject *);
3757 
3758             if (_Pickler_Write(self, &global_op, 1) < 0)
3759                 goto error;
3760 
3761             /* For protocol < 3 and if the user didn't request against doing
3762                so, we convert module names to the old 2.x module names. */
3763             if (self->proto < 3 && self->fix_imports) {
3764                 if (fix_imports(&module_name, &global_name) < 0) {
3765                     goto error;
3766                 }
3767             }
3768 
3769             /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3770                both the module name and the global name using UTF-8. We do so
3771                only when we are using the pickle protocol newer than version
3772                3. This is to ensure compatibility with older Unpickler running
3773                on Python 2.x. */
3774             if (self->proto == 3) {
3775                 unicode_encoder = PyUnicode_AsUTF8String;
3776             }
3777             else {
3778                 unicode_encoder = PyUnicode_AsASCIIString;
3779             }
3780             encoded = unicode_encoder(module_name);
3781             if (encoded == NULL) {
3782                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3783                     PyErr_Format(st->PicklingError,
3784                                  "can't pickle module identifier '%S' using "
3785                                  "pickle protocol %i",
3786                                  module_name, self->proto);
3787                 goto error;
3788             }
3789             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3790                                PyBytes_GET_SIZE(encoded)) < 0) {
3791                 Py_DECREF(encoded);
3792                 goto error;
3793             }
3794             Py_DECREF(encoded);
3795             if(_Pickler_Write(self, "\n", 1) < 0)
3796                 goto error;
3797 
3798             /* Save the name of the module. */
3799             encoded = unicode_encoder(global_name);
3800             if (encoded == NULL) {
3801                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3802                     PyErr_Format(st->PicklingError,
3803                                  "can't pickle global identifier '%S' using "
3804                                  "pickle protocol %i",
3805                                  global_name, self->proto);
3806                 goto error;
3807             }
3808             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3809                                PyBytes_GET_SIZE(encoded)) < 0) {
3810                 Py_DECREF(encoded);
3811                 goto error;
3812             }
3813             Py_DECREF(encoded);
3814             if (_Pickler_Write(self, "\n", 1) < 0)
3815                 goto error;
3816         }
3817         /* Memoize the object. */
3818         if (memo_put(self, obj) < 0)
3819             goto error;
3820     }
3821 
3822     if (0) {
3823   error:
3824         status = -1;
3825     }
3826     Py_XDECREF(module_name);
3827     Py_XDECREF(global_name);
3828     Py_XDECREF(module);
3829     Py_XDECREF(parent);
3830     Py_XDECREF(dotted_path);
3831     Py_XDECREF(lastname);
3832 
3833     return status;
3834 }
3835 
3836 static int
save_singleton_type(PicklerObject * self,PyObject * obj,PyObject * singleton)3837 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3838 {
3839     PyObject *reduce_value;
3840     int status;
3841 
3842     reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3843     if (reduce_value == NULL) {
3844         return -1;
3845     }
3846     status = save_reduce(self, reduce_value, obj);
3847     Py_DECREF(reduce_value);
3848     return status;
3849 }
3850 
3851 static int
save_type(PicklerObject * self,PyObject * obj)3852 save_type(PicklerObject *self, PyObject *obj)
3853 {
3854     if (obj == (PyObject *)&_PyNone_Type) {
3855         return save_singleton_type(self, obj, Py_None);
3856     }
3857     else if (obj == (PyObject *)&PyEllipsis_Type) {
3858         return save_singleton_type(self, obj, Py_Ellipsis);
3859     }
3860     else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3861         return save_singleton_type(self, obj, Py_NotImplemented);
3862     }
3863     return save_global(self, obj, NULL);
3864 }
3865 
3866 static int
save_pers(PicklerObject * self,PyObject * obj)3867 save_pers(PicklerObject *self, PyObject *obj)
3868 {
3869     PyObject *pid = NULL;
3870     int status = 0;
3871 
3872     const char persid_op = PERSID;
3873     const char binpersid_op = BINPERSID;
3874 
3875     pid = call_method(self->pers_func, self->pers_func_self, obj);
3876     if (pid == NULL)
3877         return -1;
3878 
3879     if (pid != Py_None) {
3880         if (self->bin) {
3881             if (save(self, pid, 1) < 0 ||
3882                 _Pickler_Write(self, &binpersid_op, 1) < 0)
3883                 goto error;
3884         }
3885         else {
3886             PyObject *pid_str;
3887 
3888             pid_str = PyObject_Str(pid);
3889             if (pid_str == NULL)
3890                 goto error;
3891 
3892             /* XXX: Should it check whether the pid contains embedded
3893                newlines? */
3894             if (!PyUnicode_IS_ASCII(pid_str)) {
3895                 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3896                                 "persistent IDs in protocol 0 must be "
3897                                 "ASCII strings");
3898                 Py_DECREF(pid_str);
3899                 goto error;
3900             }
3901 
3902             if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3903                 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3904                                PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3905                 _Pickler_Write(self, "\n", 1) < 0) {
3906                 Py_DECREF(pid_str);
3907                 goto error;
3908             }
3909             Py_DECREF(pid_str);
3910         }
3911         status = 1;
3912     }
3913 
3914     if (0) {
3915   error:
3916         status = -1;
3917     }
3918     Py_XDECREF(pid);
3919 
3920     return status;
3921 }
3922 
3923 static PyObject *
get_class(PyObject * obj)3924 get_class(PyObject *obj)
3925 {
3926     PyObject *cls;
3927     _Py_IDENTIFIER(__class__);
3928 
3929     if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3930         cls = (PyObject *) Py_TYPE(obj);
3931         Py_INCREF(cls);
3932     }
3933     return cls;
3934 }
3935 
3936 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3937  * appropriate __reduce__ method for obj.
3938  */
3939 static int
save_reduce(PicklerObject * self,PyObject * args,PyObject * obj)3940 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3941 {
3942     PyObject *callable;
3943     PyObject *argtup;
3944     PyObject *state = NULL;
3945     PyObject *listitems = Py_None;
3946     PyObject *dictitems = Py_None;
3947     PyObject *state_setter = Py_None;
3948     PickleState *st = _Pickle_GetGlobalState();
3949     Py_ssize_t size;
3950     int use_newobj = 0, use_newobj_ex = 0;
3951 
3952     const char reduce_op = REDUCE;
3953     const char build_op = BUILD;
3954     const char newobj_op = NEWOBJ;
3955     const char newobj_ex_op = NEWOBJ_EX;
3956 
3957     size = PyTuple_Size(args);
3958     if (size < 2 || size > 6) {
3959         PyErr_SetString(st->PicklingError, "tuple returned by "
3960                         "__reduce__ must contain 2 through 6 elements");
3961         return -1;
3962     }
3963 
3964     if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3965                            &callable, &argtup, &state, &listitems, &dictitems,
3966                            &state_setter))
3967         return -1;
3968 
3969     if (!PyCallable_Check(callable)) {
3970         PyErr_SetString(st->PicklingError, "first item of the tuple "
3971                         "returned by __reduce__ must be callable");
3972         return -1;
3973     }
3974     if (!PyTuple_Check(argtup)) {
3975         PyErr_SetString(st->PicklingError, "second item of the tuple "
3976                         "returned by __reduce__ must be a tuple");
3977         return -1;
3978     }
3979 
3980     if (state == Py_None)
3981         state = NULL;
3982 
3983     if (listitems == Py_None)
3984         listitems = NULL;
3985     else if (!PyIter_Check(listitems)) {
3986         PyErr_Format(st->PicklingError, "fourth element of the tuple "
3987                      "returned by __reduce__ must be an iterator, not %s",
3988                      Py_TYPE(listitems)->tp_name);
3989         return -1;
3990     }
3991 
3992     if (dictitems == Py_None)
3993         dictitems = NULL;
3994     else if (!PyIter_Check(dictitems)) {
3995         PyErr_Format(st->PicklingError, "fifth element of the tuple "
3996                      "returned by __reduce__ must be an iterator, not %s",
3997                      Py_TYPE(dictitems)->tp_name);
3998         return -1;
3999     }
4000 
4001     if (state_setter == Py_None)
4002         state_setter = NULL;
4003     else if (!PyCallable_Check(state_setter)) {
4004         PyErr_Format(st->PicklingError, "sixth element of the tuple "
4005                      "returned by __reduce__ must be a function, not %s",
4006                      Py_TYPE(state_setter)->tp_name);
4007         return -1;
4008     }
4009 
4010     if (self->proto >= 2) {
4011         PyObject *name;
4012         _Py_IDENTIFIER(__name__);
4013 
4014         if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
4015             return -1;
4016         }
4017         if (name != NULL && PyUnicode_Check(name)) {
4018             _Py_IDENTIFIER(__newobj_ex__);
4019             use_newobj_ex = _PyUnicode_EqualToASCIIId(
4020                     name, &PyId___newobj_ex__);
4021             if (!use_newobj_ex) {
4022                 _Py_IDENTIFIER(__newobj__);
4023                 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
4024             }
4025         }
4026         Py_XDECREF(name);
4027     }
4028 
4029     if (use_newobj_ex) {
4030         PyObject *cls;
4031         PyObject *args;
4032         PyObject *kwargs;
4033 
4034         if (PyTuple_GET_SIZE(argtup) != 3) {
4035             PyErr_Format(st->PicklingError,
4036                          "length of the NEWOBJ_EX argument tuple must be "
4037                          "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
4038             return -1;
4039         }
4040 
4041         cls = PyTuple_GET_ITEM(argtup, 0);
4042         if (!PyType_Check(cls)) {
4043             PyErr_Format(st->PicklingError,
4044                          "first item from NEWOBJ_EX argument tuple must "
4045                          "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4046             return -1;
4047         }
4048         args = PyTuple_GET_ITEM(argtup, 1);
4049         if (!PyTuple_Check(args)) {
4050             PyErr_Format(st->PicklingError,
4051                          "second item from NEWOBJ_EX argument tuple must "
4052                          "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4053             return -1;
4054         }
4055         kwargs = PyTuple_GET_ITEM(argtup, 2);
4056         if (!PyDict_Check(kwargs)) {
4057             PyErr_Format(st->PicklingError,
4058                          "third item from NEWOBJ_EX argument tuple must "
4059                          "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4060             return -1;
4061         }
4062 
4063         if (self->proto >= 4) {
4064             if (save(self, cls, 0) < 0 ||
4065                 save(self, args, 0) < 0 ||
4066                 save(self, kwargs, 0) < 0 ||
4067                 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4068                 return -1;
4069             }
4070         }
4071         else {
4072             PyObject *newargs;
4073             PyObject *cls_new;
4074             Py_ssize_t i;
4075             _Py_IDENTIFIER(__new__);
4076 
4077             newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
4078             if (newargs == NULL)
4079                 return -1;
4080 
4081             cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
4082             if (cls_new == NULL) {
4083                 Py_DECREF(newargs);
4084                 return -1;
4085             }
4086             PyTuple_SET_ITEM(newargs, 0, cls_new);
4087             Py_INCREF(cls);
4088             PyTuple_SET_ITEM(newargs, 1, cls);
4089             for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
4090                 PyObject *item = PyTuple_GET_ITEM(args, i);
4091                 Py_INCREF(item);
4092                 PyTuple_SET_ITEM(newargs, i + 2, item);
4093             }
4094 
4095             callable = PyObject_Call(st->partial, newargs, kwargs);
4096             Py_DECREF(newargs);
4097             if (callable == NULL)
4098                 return -1;
4099 
4100             newargs = PyTuple_New(0);
4101             if (newargs == NULL) {
4102                 Py_DECREF(callable);
4103                 return -1;
4104             }
4105 
4106             if (save(self, callable, 0) < 0 ||
4107                 save(self, newargs, 0) < 0 ||
4108                 _Pickler_Write(self, &reduce_op, 1) < 0) {
4109                 Py_DECREF(newargs);
4110                 Py_DECREF(callable);
4111                 return -1;
4112             }
4113             Py_DECREF(newargs);
4114             Py_DECREF(callable);
4115         }
4116     }
4117     else if (use_newobj) {
4118         PyObject *cls;
4119         PyObject *newargtup;
4120         PyObject *obj_class;
4121         int p;
4122 
4123         /* Sanity checks. */
4124         if (PyTuple_GET_SIZE(argtup) < 1) {
4125             PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
4126             return -1;
4127         }
4128 
4129         cls = PyTuple_GET_ITEM(argtup, 0);
4130         if (!PyType_Check(cls)) {
4131             PyErr_SetString(st->PicklingError, "args[0] from "
4132                             "__newobj__ args is not a type");
4133             return -1;
4134         }
4135 
4136         if (obj != NULL) {
4137             obj_class = get_class(obj);
4138             if (obj_class == NULL) {
4139                 return -1;
4140             }
4141             p = obj_class != cls;
4142             Py_DECREF(obj_class);
4143             if (p) {
4144                 PyErr_SetString(st->PicklingError, "args[0] from "
4145                                 "__newobj__ args has the wrong class");
4146                 return -1;
4147             }
4148         }
4149         /* XXX: These calls save() are prone to infinite recursion. Imagine
4150            what happen if the value returned by the __reduce__() method of
4151            some extension type contains another object of the same type. Ouch!
4152 
4153            Here is a quick example, that I ran into, to illustrate what I
4154            mean:
4155 
4156              >>> import pickle, copyreg
4157              >>> copyreg.dispatch_table.pop(complex)
4158              >>> pickle.dumps(1+2j)
4159              Traceback (most recent call last):
4160                ...
4161              RecursionError: maximum recursion depth exceeded
4162 
4163            Removing the complex class from copyreg.dispatch_table made the
4164            __reduce_ex__() method emit another complex object:
4165 
4166              >>> (1+1j).__reduce_ex__(2)
4167              (<function __newobj__ at 0xb7b71c3c>,
4168                (<class 'complex'>, (1+1j)), None, None, None)
4169 
4170            Thus when save() was called on newargstup (the 2nd item) recursion
4171            ensued. Of course, the bug was in the complex class which had a
4172            broken __getnewargs__() that emitted another complex object. But,
4173            the point, here, is it is quite easy to end up with a broken reduce
4174            function. */
4175 
4176         /* Save the class and its __new__ arguments. */
4177         if (save(self, cls, 0) < 0)
4178             return -1;
4179 
4180         newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
4181         if (newargtup == NULL)
4182             return -1;
4183 
4184         p = save(self, newargtup, 0);
4185         Py_DECREF(newargtup);
4186         if (p < 0)
4187             return -1;
4188 
4189         /* Add NEWOBJ opcode. */
4190         if (_Pickler_Write(self, &newobj_op, 1) < 0)
4191             return -1;
4192     }
4193     else { /* Not using NEWOBJ. */
4194         if (save(self, callable, 0) < 0 ||
4195             save(self, argtup, 0) < 0 ||
4196             _Pickler_Write(self, &reduce_op, 1) < 0)
4197             return -1;
4198     }
4199 
4200     /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4201        the caller do not want to memoize the object. Not particularly useful,
4202        but that is to mimic the behavior save_reduce() in pickle.py when
4203        obj is None. */
4204     if (obj != NULL) {
4205         /* If the object is already in the memo, this means it is
4206            recursive. In this case, throw away everything we put on the
4207            stack, and fetch the object back from the memo. */
4208         if (PyMemoTable_Get(self->memo, obj)) {
4209             const char pop_op = POP;
4210 
4211             if (_Pickler_Write(self, &pop_op, 1) < 0)
4212                 return -1;
4213             if (memo_get(self, obj) < 0)
4214                 return -1;
4215 
4216             return 0;
4217         }
4218         else if (memo_put(self, obj) < 0)
4219             return -1;
4220     }
4221 
4222     if (listitems && batch_list(self, listitems) < 0)
4223         return -1;
4224 
4225     if (dictitems && batch_dict(self, dictitems) < 0)
4226         return -1;
4227 
4228     if (state) {
4229         if (state_setter == NULL) {
4230             if (save(self, state, 0) < 0 ||
4231                 _Pickler_Write(self, &build_op, 1) < 0)
4232                 return -1;
4233         }
4234         else {
4235 
4236             /* If a state_setter is specified, call it instead of load_build to
4237              * update obj's with its previous state.
4238              * The first 4 save/write instructions push state_setter and its
4239              * tuple of expected arguments (obj, state) onto the stack. The
4240              * REDUCE opcode triggers the state_setter(obj, state) function
4241              * call. Finally, because state-updating routines only do in-place
4242              * modification, the whole operation has to be stack-transparent.
4243              * Thus, we finally pop the call's output from the stack.*/
4244 
4245             const char tupletwo_op = TUPLE2;
4246             const char pop_op = POP;
4247             if (save(self, state_setter, 0) < 0 ||
4248                 save(self, obj, 0) < 0 || save(self, state, 0) < 0 ||
4249                 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4250                 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4251                 _Pickler_Write(self, &pop_op, 1) < 0)
4252                 return -1;
4253         }
4254     }
4255     return 0;
4256 }
4257 
4258 static int
save(PicklerObject * self,PyObject * obj,int pers_save)4259 save(PicklerObject *self, PyObject *obj, int pers_save)
4260 {
4261     PyTypeObject *type;
4262     PyObject *reduce_func = NULL;
4263     PyObject *reduce_value = NULL;
4264     int status = 0;
4265 
4266     if (_Pickler_OpcodeBoundary(self) < 0)
4267         return -1;
4268 
4269     /* The extra pers_save argument is necessary to avoid calling save_pers()
4270        on its returned object. */
4271     if (!pers_save && self->pers_func) {
4272         /* save_pers() returns:
4273             -1   to signal an error;
4274              0   if it did nothing successfully;
4275              1   if a persistent id was saved.
4276          */
4277         if ((status = save_pers(self, obj)) != 0)
4278             return status;
4279     }
4280 
4281     type = Py_TYPE(obj);
4282 
4283     /* The old cPickle had an optimization that used switch-case statement
4284        dispatching on the first letter of the type name.  This has was removed
4285        since benchmarks shown that this optimization was actually slowing
4286        things down. */
4287 
4288     /* Atom types; these aren't memoized, so don't check the memo. */
4289 
4290     if (obj == Py_None) {
4291         return save_none(self, obj);
4292     }
4293     else if (obj == Py_False || obj == Py_True) {
4294         return save_bool(self, obj);
4295     }
4296     else if (type == &PyLong_Type) {
4297         return save_long(self, obj);
4298     }
4299     else if (type == &PyFloat_Type) {
4300         return save_float(self, obj);
4301     }
4302 
4303     /* Check the memo to see if it has the object. If so, generate
4304        a GET (or BINGET) opcode, instead of pickling the object
4305        once again. */
4306     if (PyMemoTable_Get(self->memo, obj)) {
4307         return memo_get(self, obj);
4308     }
4309 
4310     if (type == &PyBytes_Type) {
4311         return save_bytes(self, obj);
4312     }
4313     else if (type == &PyUnicode_Type) {
4314         return save_unicode(self, obj);
4315     }
4316 
4317     /* We're only calling Py_EnterRecursiveCall here so that atomic
4318        types above are pickled faster. */
4319     if (Py_EnterRecursiveCall(" while pickling an object")) {
4320         return -1;
4321     }
4322 
4323     if (type == &PyDict_Type) {
4324         status = save_dict(self, obj);
4325         goto done;
4326     }
4327     else if (type == &PySet_Type) {
4328         status = save_set(self, obj);
4329         goto done;
4330     }
4331     else if (type == &PyFrozenSet_Type) {
4332         status = save_frozenset(self, obj);
4333         goto done;
4334     }
4335     else if (type == &PyList_Type) {
4336         status = save_list(self, obj);
4337         goto done;
4338     }
4339     else if (type == &PyTuple_Type) {
4340         status = save_tuple(self, obj);
4341         goto done;
4342     }
4343     else if (type == &PyByteArray_Type) {
4344         status = save_bytearray(self, obj);
4345         goto done;
4346     }
4347     else if (type == &PyPickleBuffer_Type) {
4348         status = save_picklebuffer(self, obj);
4349         goto done;
4350     }
4351 
4352     /* Now, check reducer_override.  If it returns NotImplemented,
4353      * fallback to save_type or save_global, and then perhaps to the
4354      * regular reduction mechanism.
4355      */
4356     if (self->reducer_override != NULL) {
4357         reduce_value = PyObject_CallOneArg(self->reducer_override, obj);
4358         if (reduce_value == NULL) {
4359             goto error;
4360         }
4361         if (reduce_value != Py_NotImplemented) {
4362             goto reduce;
4363         }
4364         Py_DECREF(reduce_value);
4365         reduce_value = NULL;
4366     }
4367 
4368     if (type == &PyType_Type) {
4369         status = save_type(self, obj);
4370         goto done;
4371     }
4372     else if (type == &PyFunction_Type) {
4373         status = save_global(self, obj, NULL);
4374         goto done;
4375     }
4376 
4377     /* XXX: This part needs some unit tests. */
4378 
4379     /* Get a reduction callable, and call it.  This may come from
4380      * self.dispatch_table, copyreg.dispatch_table, the object's
4381      * __reduce_ex__ method, or the object's __reduce__ method.
4382      */
4383     if (self->dispatch_table == NULL) {
4384         PickleState *st = _Pickle_GetGlobalState();
4385         reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4386                                               (PyObject *)type);
4387         if (reduce_func == NULL) {
4388             if (PyErr_Occurred()) {
4389                 goto error;
4390             }
4391         } else {
4392             /* PyDict_GetItemWithError() returns a borrowed reference.
4393                Increase the reference count to be consistent with
4394                PyObject_GetItem and _PyObject_GetAttrId used below. */
4395             Py_INCREF(reduce_func);
4396         }
4397     } else {
4398         reduce_func = PyObject_GetItem(self->dispatch_table,
4399                                        (PyObject *)type);
4400         if (reduce_func == NULL) {
4401             if (PyErr_ExceptionMatches(PyExc_KeyError))
4402                 PyErr_Clear();
4403             else
4404                 goto error;
4405         }
4406     }
4407     if (reduce_func != NULL) {
4408         Py_INCREF(obj);
4409         reduce_value = _Pickle_FastCall(reduce_func, obj);
4410     }
4411     else if (PyType_IsSubtype(type, &PyType_Type)) {
4412         status = save_global(self, obj, NULL);
4413         goto done;
4414     }
4415     else {
4416         _Py_IDENTIFIER(__reduce__);
4417         _Py_IDENTIFIER(__reduce_ex__);
4418 
4419         /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4420            automatically defined as __reduce__. While this is convenient, this
4421            make it impossible to know which method was actually called. Of
4422            course, this is not a big deal. But still, it would be nice to let
4423            the user know which method was called when something go
4424            wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4425            don't actually have to check for a __reduce__ method. */
4426 
4427         /* Check for a __reduce_ex__ method. */
4428         if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4429             goto error;
4430         }
4431         if (reduce_func != NULL) {
4432             PyObject *proto;
4433             proto = PyLong_FromLong(self->proto);
4434             if (proto != NULL) {
4435                 reduce_value = _Pickle_FastCall(reduce_func, proto);
4436             }
4437         }
4438         else {
4439             /* Check for a __reduce__ method. */
4440             if (_PyObject_LookupAttrId(obj, &PyId___reduce__, &reduce_func) < 0) {
4441                 goto error;
4442             }
4443             if (reduce_func != NULL) {
4444                 reduce_value = PyObject_CallNoArgs(reduce_func);
4445             }
4446             else {
4447                 PickleState *st = _Pickle_GetGlobalState();
4448                 PyErr_Format(st->PicklingError,
4449                              "can't pickle '%.200s' object: %R",
4450                              type->tp_name, obj);
4451                 goto error;
4452             }
4453         }
4454     }
4455 
4456     if (reduce_value == NULL)
4457         goto error;
4458 
4459   reduce:
4460     if (PyUnicode_Check(reduce_value)) {
4461         status = save_global(self, obj, reduce_value);
4462         goto done;
4463     }
4464 
4465     if (!PyTuple_Check(reduce_value)) {
4466         PickleState *st = _Pickle_GetGlobalState();
4467         PyErr_SetString(st->PicklingError,
4468                         "__reduce__ must return a string or tuple");
4469         goto error;
4470     }
4471 
4472     status = save_reduce(self, reduce_value, obj);
4473 
4474     if (0) {
4475   error:
4476         status = -1;
4477     }
4478   done:
4479 
4480     Py_LeaveRecursiveCall();
4481     Py_XDECREF(reduce_func);
4482     Py_XDECREF(reduce_value);
4483 
4484     return status;
4485 }
4486 
4487 static int
dump(PicklerObject * self,PyObject * obj)4488 dump(PicklerObject *self, PyObject *obj)
4489 {
4490     const char stop_op = STOP;
4491     int status = -1;
4492     PyObject *tmp;
4493     _Py_IDENTIFIER(reducer_override);
4494 
4495     if (_PyObject_LookupAttrId((PyObject *)self, &PyId_reducer_override,
4496                                &tmp) < 0) {
4497       goto error;
4498     }
4499     /* Cache the reducer_override method, if it exists. */
4500     if (tmp != NULL) {
4501         Py_XSETREF(self->reducer_override, tmp);
4502     }
4503     else {
4504         Py_CLEAR(self->reducer_override);
4505     }
4506 
4507     if (self->proto >= 2) {
4508         char header[2];
4509 
4510         header[0] = PROTO;
4511         assert(self->proto >= 0 && self->proto < 256);
4512         header[1] = (unsigned char)self->proto;
4513         if (_Pickler_Write(self, header, 2) < 0)
4514             goto error;
4515         if (self->proto >= 4)
4516             self->framing = 1;
4517     }
4518 
4519     if (save(self, obj, 0) < 0 ||
4520         _Pickler_Write(self, &stop_op, 1) < 0 ||
4521         _Pickler_CommitFrame(self) < 0)
4522         goto error;
4523 
4524     // Success
4525     status = 0;
4526 
4527   error:
4528     self->framing = 0;
4529 
4530     /* Break the reference cycle we generated at the beginning this function
4531      * call when setting the reducer_override attribute of the Pickler instance
4532      * to a bound method of the same instance. This is important as the Pickler
4533      * instance holds a reference to each object it has pickled (through its
4534      * memo): thus, these objects wont be garbage-collected as long as the
4535      * Pickler itself is not collected. */
4536     Py_CLEAR(self->reducer_override);
4537     return status;
4538 }
4539 
4540 /*[clinic input]
4541 
4542 _pickle.Pickler.clear_memo
4543 
4544 Clears the pickler's "memo".
4545 
4546 The memo is the data structure that remembers which objects the
4547 pickler has already seen, so that shared or recursive objects are
4548 pickled by reference and not by value.  This method is useful when
4549 re-using picklers.
4550 [clinic start generated code]*/
4551 
4552 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4553 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4554 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4555 {
4556     if (self->memo)
4557         PyMemoTable_Clear(self->memo);
4558 
4559     Py_RETURN_NONE;
4560 }
4561 
4562 /*[clinic input]
4563 
4564 _pickle.Pickler.dump
4565 
4566   obj: object
4567   /
4568 
4569 Write a pickled representation of the given object to the open file.
4570 [clinic start generated code]*/
4571 
4572 static PyObject *
_pickle_Pickler_dump(PicklerObject * self,PyObject * obj)4573 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4574 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4575 {
4576     /* Check whether the Pickler was initialized correctly (issue3664).
4577        Developers often forget to call __init__() in their subclasses, which
4578        would trigger a segfault without this check. */
4579     if (self->write == NULL) {
4580         PickleState *st = _Pickle_GetGlobalState();
4581         PyErr_Format(st->PicklingError,
4582                      "Pickler.__init__() was not called by %s.__init__()",
4583                      Py_TYPE(self)->tp_name);
4584         return NULL;
4585     }
4586 
4587     if (_Pickler_ClearBuffer(self) < 0)
4588         return NULL;
4589 
4590     if (dump(self, obj) < 0)
4591         return NULL;
4592 
4593     if (_Pickler_FlushToFile(self) < 0)
4594         return NULL;
4595 
4596     Py_RETURN_NONE;
4597 }
4598 
4599 /*[clinic input]
4600 
4601 _pickle.Pickler.__sizeof__ -> Py_ssize_t
4602 
4603 Returns size in memory, in bytes.
4604 [clinic start generated code]*/
4605 
4606 static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4607 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4608 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4609 {
4610     Py_ssize_t res, s;
4611 
4612     res = _PyObject_SIZE(Py_TYPE(self));
4613     if (self->memo != NULL) {
4614         res += sizeof(PyMemoTable);
4615         res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4616     }
4617     if (self->output_buffer != NULL) {
4618         s = _PySys_GetSizeOf(self->output_buffer);
4619         if (s == -1)
4620             return -1;
4621         res += s;
4622     }
4623     return res;
4624 }
4625 
4626 static struct PyMethodDef Pickler_methods[] = {
4627     _PICKLE_PICKLER_DUMP_METHODDEF
4628     _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4629     _PICKLE_PICKLER___SIZEOF___METHODDEF
4630     {NULL, NULL}                /* sentinel */
4631 };
4632 
4633 static void
Pickler_dealloc(PicklerObject * self)4634 Pickler_dealloc(PicklerObject *self)
4635 {
4636     PyObject_GC_UnTrack(self);
4637 
4638     Py_XDECREF(self->output_buffer);
4639     Py_XDECREF(self->write);
4640     Py_XDECREF(self->pers_func);
4641     Py_XDECREF(self->dispatch_table);
4642     Py_XDECREF(self->fast_memo);
4643     Py_XDECREF(self->reducer_override);
4644     Py_XDECREF(self->buffer_callback);
4645 
4646     PyMemoTable_Del(self->memo);
4647 
4648     Py_TYPE(self)->tp_free((PyObject *)self);
4649 }
4650 
4651 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4652 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4653 {
4654     Py_VISIT(self->write);
4655     Py_VISIT(self->pers_func);
4656     Py_VISIT(self->dispatch_table);
4657     Py_VISIT(self->fast_memo);
4658     Py_VISIT(self->reducer_override);
4659     Py_VISIT(self->buffer_callback);
4660     return 0;
4661 }
4662 
4663 static int
Pickler_clear(PicklerObject * self)4664 Pickler_clear(PicklerObject *self)
4665 {
4666     Py_CLEAR(self->output_buffer);
4667     Py_CLEAR(self->write);
4668     Py_CLEAR(self->pers_func);
4669     Py_CLEAR(self->dispatch_table);
4670     Py_CLEAR(self->fast_memo);
4671     Py_CLEAR(self->reducer_override);
4672     Py_CLEAR(self->buffer_callback);
4673 
4674     if (self->memo != NULL) {
4675         PyMemoTable *memo = self->memo;
4676         self->memo = NULL;
4677         PyMemoTable_Del(memo);
4678     }
4679     return 0;
4680 }
4681 
4682 
4683 /*[clinic input]
4684 
4685 _pickle.Pickler.__init__
4686 
4687   file: object
4688   protocol: object = None
4689   fix_imports: bool = True
4690   buffer_callback: object = None
4691 
4692 This takes a binary file for writing a pickle data stream.
4693 
4694 The optional *protocol* argument tells the pickler to use the given
4695 protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
4696 protocol is 4. It was introduced in Python 3.4, and is incompatible
4697 with previous versions.
4698 
4699 Specifying a negative protocol version selects the highest protocol
4700 version supported.  The higher the protocol used, the more recent the
4701 version of Python needed to read the pickle produced.
4702 
4703 The *file* argument must have a write() method that accepts a single
4704 bytes argument. It can thus be a file object opened for binary
4705 writing, an io.BytesIO instance, or any other custom object that meets
4706 this interface.
4707 
4708 If *fix_imports* is True and protocol is less than 3, pickle will try
4709 to map the new Python 3 names to the old module names used in Python
4710 2, so that the pickle data stream is readable with Python 2.
4711 
4712 If *buffer_callback* is None (the default), buffer views are
4713 serialized into *file* as part of the pickle stream.
4714 
4715 If *buffer_callback* is not None, then it can be called any number
4716 of times with a buffer view.  If the callback returns a false value
4717 (such as None), the given buffer is out-of-band; otherwise the
4718 buffer is serialized in-band, i.e. inside the pickle stream.
4719 
4720 It is an error if *buffer_callback* is not None and *protocol*
4721 is None or smaller than 5.
4722 
4723 [clinic start generated code]*/
4724 
4725 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)4726 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4727                               PyObject *protocol, int fix_imports,
4728                               PyObject *buffer_callback)
4729 /*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/
4730 {
4731     _Py_IDENTIFIER(persistent_id);
4732     _Py_IDENTIFIER(dispatch_table);
4733 
4734     /* In case of multiple __init__() calls, clear previous content. */
4735     if (self->write != NULL)
4736         (void)Pickler_clear(self);
4737 
4738     if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4739         return -1;
4740 
4741     if (_Pickler_SetOutputStream(self, file) < 0)
4742         return -1;
4743 
4744     if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4745         return -1;
4746 
4747     /* memo and output_buffer may have already been created in _Pickler_New */
4748     if (self->memo == NULL) {
4749         self->memo = PyMemoTable_New();
4750         if (self->memo == NULL)
4751             return -1;
4752     }
4753     self->output_len = 0;
4754     if (self->output_buffer == NULL) {
4755         self->max_output_len = WRITE_BUF_SIZE;
4756         self->output_buffer = PyBytes_FromStringAndSize(NULL,
4757                                                         self->max_output_len);
4758         if (self->output_buffer == NULL)
4759             return -1;
4760     }
4761 
4762     self->fast = 0;
4763     self->fast_nesting = 0;
4764     self->fast_memo = NULL;
4765 
4766     if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4767                         &self->pers_func, &self->pers_func_self) < 0)
4768     {
4769         return -1;
4770     }
4771 
4772     if (_PyObject_LookupAttrId((PyObject *)self,
4773                                     &PyId_dispatch_table, &self->dispatch_table) < 0) {
4774         return -1;
4775     }
4776 
4777     return 0;
4778 }
4779 
4780 
4781 /* Define a proxy object for the Pickler's internal memo object. This is to
4782  * avoid breaking code like:
4783  *  pickler.memo.clear()
4784  * and
4785  *  pickler.memo = saved_memo
4786  * Is this a good idea? Not really, but we don't want to break code that uses
4787  * it. Note that we don't implement the entire mapping API here. This is
4788  * intentional, as these should be treated as black-box implementation details.
4789  */
4790 
4791 /*[clinic input]
4792 _pickle.PicklerMemoProxy.clear
4793 
4794 Remove all items from memo.
4795 [clinic start generated code]*/
4796 
4797 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4798 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4799 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4800 {
4801     if (self->pickler->memo)
4802         PyMemoTable_Clear(self->pickler->memo);
4803     Py_RETURN_NONE;
4804 }
4805 
4806 /*[clinic input]
4807 _pickle.PicklerMemoProxy.copy
4808 
4809 Copy the memo to a new object.
4810 [clinic start generated code]*/
4811 
4812 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4813 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4814 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4815 {
4816     PyMemoTable *memo;
4817     PyObject *new_memo = PyDict_New();
4818     if (new_memo == NULL)
4819         return NULL;
4820 
4821     memo = self->pickler->memo;
4822     for (size_t i = 0; i < memo->mt_allocated; ++i) {
4823         PyMemoEntry entry = memo->mt_table[i];
4824         if (entry.me_key != NULL) {
4825             int status;
4826             PyObject *key, *value;
4827 
4828             key = PyLong_FromVoidPtr(entry.me_key);
4829             value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4830 
4831             if (key == NULL || value == NULL) {
4832                 Py_XDECREF(key);
4833                 Py_XDECREF(value);
4834                 goto error;
4835             }
4836             status = PyDict_SetItem(new_memo, key, value);
4837             Py_DECREF(key);
4838             Py_DECREF(value);
4839             if (status < 0)
4840                 goto error;
4841         }
4842     }
4843     return new_memo;
4844 
4845   error:
4846     Py_XDECREF(new_memo);
4847     return NULL;
4848 }
4849 
4850 /*[clinic input]
4851 _pickle.PicklerMemoProxy.__reduce__
4852 
4853 Implement pickle support.
4854 [clinic start generated code]*/
4855 
4856 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4857 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4858 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4859 {
4860     PyObject *reduce_value, *dict_args;
4861     PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4862     if (contents == NULL)
4863         return NULL;
4864 
4865     reduce_value = PyTuple_New(2);
4866     if (reduce_value == NULL) {
4867         Py_DECREF(contents);
4868         return NULL;
4869     }
4870     dict_args = PyTuple_New(1);
4871     if (dict_args == NULL) {
4872         Py_DECREF(contents);
4873         Py_DECREF(reduce_value);
4874         return NULL;
4875     }
4876     PyTuple_SET_ITEM(dict_args, 0, contents);
4877     Py_INCREF((PyObject *)&PyDict_Type);
4878     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4879     PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4880     return reduce_value;
4881 }
4882 
4883 static PyMethodDef picklerproxy_methods[] = {
4884     _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4885     _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4886     _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4887     {NULL, NULL} /* sentinel */
4888 };
4889 
4890 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4891 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4892 {
4893     PyObject_GC_UnTrack(self);
4894     Py_XDECREF(self->pickler);
4895     PyObject_GC_Del((PyObject *)self);
4896 }
4897 
4898 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4899 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4900                           visitproc visit, void *arg)
4901 {
4902     Py_VISIT(self->pickler);
4903     return 0;
4904 }
4905 
4906 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4907 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4908 {
4909     Py_CLEAR(self->pickler);
4910     return 0;
4911 }
4912 
4913 static PyTypeObject PicklerMemoProxyType = {
4914     PyVarObject_HEAD_INIT(NULL, 0)
4915     "_pickle.PicklerMemoProxy",                 /*tp_name*/
4916     sizeof(PicklerMemoProxyObject),             /*tp_basicsize*/
4917     0,
4918     (destructor)PicklerMemoProxy_dealloc,       /* tp_dealloc */
4919     0,                                          /* tp_vectorcall_offset */
4920     0,                                          /* tp_getattr */
4921     0,                                          /* tp_setattr */
4922     0,                                          /* tp_as_async */
4923     0,                                          /* tp_repr */
4924     0,                                          /* tp_as_number */
4925     0,                                          /* tp_as_sequence */
4926     0,                                          /* tp_as_mapping */
4927     PyObject_HashNotImplemented,                /* tp_hash */
4928     0,                                          /* tp_call */
4929     0,                                          /* tp_str */
4930     PyObject_GenericGetAttr,                    /* tp_getattro */
4931     PyObject_GenericSetAttr,                    /* tp_setattro */
4932     0,                                          /* tp_as_buffer */
4933     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4934     0,                                          /* tp_doc */
4935     (traverseproc)PicklerMemoProxy_traverse,    /* tp_traverse */
4936     (inquiry)PicklerMemoProxy_clear,            /* tp_clear */
4937     0,                                          /* tp_richcompare */
4938     0,                                          /* tp_weaklistoffset */
4939     0,                                          /* tp_iter */
4940     0,                                          /* tp_iternext */
4941     picklerproxy_methods,                       /* tp_methods */
4942 };
4943 
4944 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4945 PicklerMemoProxy_New(PicklerObject *pickler)
4946 {
4947     PicklerMemoProxyObject *self;
4948 
4949     self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4950     if (self == NULL)
4951         return NULL;
4952     Py_INCREF(pickler);
4953     self->pickler = pickler;
4954     PyObject_GC_Track(self);
4955     return (PyObject *)self;
4956 }
4957 
4958 /*****************************************************************************/
4959 
4960 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4961 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4962 {
4963     return PicklerMemoProxy_New(self);
4964 }
4965 
4966 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4967 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4968 {
4969     PyMemoTable *new_memo = NULL;
4970 
4971     if (obj == NULL) {
4972         PyErr_SetString(PyExc_TypeError,
4973                         "attribute deletion is not supported");
4974         return -1;
4975     }
4976 
4977     if (Py_IS_TYPE(obj, &PicklerMemoProxyType)) {
4978         PicklerObject *pickler =
4979             ((PicklerMemoProxyObject *)obj)->pickler;
4980 
4981         new_memo = PyMemoTable_Copy(pickler->memo);
4982         if (new_memo == NULL)
4983             return -1;
4984     }
4985     else if (PyDict_Check(obj)) {
4986         Py_ssize_t i = 0;
4987         PyObject *key, *value;
4988 
4989         new_memo = PyMemoTable_New();
4990         if (new_memo == NULL)
4991             return -1;
4992 
4993         while (PyDict_Next(obj, &i, &key, &value)) {
4994             Py_ssize_t memo_id;
4995             PyObject *memo_obj;
4996 
4997             if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
4998                 PyErr_SetString(PyExc_TypeError,
4999                                 "'memo' values must be 2-item tuples");
5000                 goto error;
5001             }
5002             memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
5003             if (memo_id == -1 && PyErr_Occurred())
5004                 goto error;
5005             memo_obj = PyTuple_GET_ITEM(value, 1);
5006             if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
5007                 goto error;
5008         }
5009     }
5010     else {
5011         PyErr_Format(PyExc_TypeError,
5012                      "'memo' attribute must be a PicklerMemoProxy object "
5013                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
5014         return -1;
5015     }
5016 
5017     PyMemoTable_Del(self->memo);
5018     self->memo = new_memo;
5019 
5020     return 0;
5021 
5022   error:
5023     if (new_memo)
5024         PyMemoTable_Del(new_memo);
5025     return -1;
5026 }
5027 
5028 static PyObject *
Pickler_get_persid(PicklerObject * self,void * Py_UNUSED (ignored))5029 Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
5030 {
5031     if (self->pers_func == NULL) {
5032         PyErr_SetString(PyExc_AttributeError, "persistent_id");
5033         return NULL;
5034     }
5035     return reconstruct_method(self->pers_func, self->pers_func_self);
5036 }
5037 
5038 static int
Pickler_set_persid(PicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))5039 Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
5040 {
5041     if (value == NULL) {
5042         PyErr_SetString(PyExc_TypeError,
5043                         "attribute deletion is not supported");
5044         return -1;
5045     }
5046     if (!PyCallable_Check(value)) {
5047         PyErr_SetString(PyExc_TypeError,
5048                         "persistent_id must be a callable taking one argument");
5049         return -1;
5050     }
5051 
5052     self->pers_func_self = NULL;
5053     Py_INCREF(value);
5054     Py_XSETREF(self->pers_func, value);
5055 
5056     return 0;
5057 }
5058 
5059 static PyMemberDef Pickler_members[] = {
5060     {"bin", T_INT, offsetof(PicklerObject, bin)},
5061     {"fast", T_INT, offsetof(PicklerObject, fast)},
5062     {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
5063     {NULL}
5064 };
5065 
5066 static PyGetSetDef Pickler_getsets[] = {
5067     {"memo",          (getter)Pickler_get_memo,
5068                       (setter)Pickler_set_memo},
5069     {"persistent_id", (getter)Pickler_get_persid,
5070                       (setter)Pickler_set_persid},
5071     {NULL}
5072 };
5073 
5074 static PyTypeObject Pickler_Type = {
5075     PyVarObject_HEAD_INIT(NULL, 0)
5076     "_pickle.Pickler"  ,                /*tp_name*/
5077     sizeof(PicklerObject),              /*tp_basicsize*/
5078     0,                                  /*tp_itemsize*/
5079     (destructor)Pickler_dealloc,        /*tp_dealloc*/
5080     0,                                  /*tp_vectorcall_offset*/
5081     0,                                  /*tp_getattr*/
5082     0,                                  /*tp_setattr*/
5083     0,                                  /*tp_as_async*/
5084     0,                                  /*tp_repr*/
5085     0,                                  /*tp_as_number*/
5086     0,                                  /*tp_as_sequence*/
5087     0,                                  /*tp_as_mapping*/
5088     0,                                  /*tp_hash*/
5089     0,                                  /*tp_call*/
5090     0,                                  /*tp_str*/
5091     0,                                  /*tp_getattro*/
5092     0,                                  /*tp_setattro*/
5093     0,                                  /*tp_as_buffer*/
5094     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5095     _pickle_Pickler___init____doc__,    /*tp_doc*/
5096     (traverseproc)Pickler_traverse,     /*tp_traverse*/
5097     (inquiry)Pickler_clear,             /*tp_clear*/
5098     0,                                  /*tp_richcompare*/
5099     0,                                  /*tp_weaklistoffset*/
5100     0,                                  /*tp_iter*/
5101     0,                                  /*tp_iternext*/
5102     Pickler_methods,                    /*tp_methods*/
5103     Pickler_members,                    /*tp_members*/
5104     Pickler_getsets,                    /*tp_getset*/
5105     0,                                  /*tp_base*/
5106     0,                                  /*tp_dict*/
5107     0,                                  /*tp_descr_get*/
5108     0,                                  /*tp_descr_set*/
5109     0,                                  /*tp_dictoffset*/
5110     _pickle_Pickler___init__,           /*tp_init*/
5111     PyType_GenericAlloc,                /*tp_alloc*/
5112     PyType_GenericNew,                  /*tp_new*/
5113     PyObject_GC_Del,                    /*tp_free*/
5114     0,                                  /*tp_is_gc*/
5115 };
5116 
5117 /* Temporary helper for calling self.find_class().
5118 
5119    XXX: It would be nice to able to avoid Python function call overhead, by
5120    using directly the C version of find_class(), when find_class() is not
5121    overridden by a subclass. Although, this could become rather hackish. A
5122    simpler optimization would be to call the C function when self is not a
5123    subclass instance. */
5124 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)5125 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5126 {
5127     _Py_IDENTIFIER(find_class);
5128 
5129     return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
5130                                          module_name, global_name, NULL);
5131 }
5132 
5133 static Py_ssize_t
marker(UnpicklerObject * self)5134 marker(UnpicklerObject *self)
5135 {
5136     Py_ssize_t mark;
5137 
5138     if (self->num_marks < 1) {
5139         PickleState *st = _Pickle_GetGlobalState();
5140         PyErr_SetString(st->UnpicklingError, "could not find MARK");
5141         return -1;
5142     }
5143 
5144     mark = self->marks[--self->num_marks];
5145     self->stack->mark_set = self->num_marks != 0;
5146     self->stack->fence = self->num_marks ?
5147             self->marks[self->num_marks - 1] : 0;
5148     return mark;
5149 }
5150 
5151 static int
load_none(UnpicklerObject * self)5152 load_none(UnpicklerObject *self)
5153 {
5154     PDATA_APPEND(self->stack, Py_None, -1);
5155     return 0;
5156 }
5157 
5158 static int
load_int(UnpicklerObject * self)5159 load_int(UnpicklerObject *self)
5160 {
5161     PyObject *value;
5162     char *endptr, *s;
5163     Py_ssize_t len;
5164     long x;
5165 
5166     if ((len = _Unpickler_Readline(self, &s)) < 0)
5167         return -1;
5168     if (len < 2)
5169         return bad_readline();
5170 
5171     errno = 0;
5172     /* XXX: Should the base argument of strtol() be explicitly set to 10?
5173        XXX(avassalotti): Should this uses PyOS_strtol()? */
5174     x = strtol(s, &endptr, 0);
5175 
5176     if (errno || (*endptr != '\n' && *endptr != '\0')) {
5177         /* Hm, maybe we've got something long.  Let's try reading
5178          * it as a Python int object. */
5179         errno = 0;
5180         /* XXX: Same thing about the base here. */
5181         value = PyLong_FromString(s, NULL, 0);
5182         if (value == NULL) {
5183             PyErr_SetString(PyExc_ValueError,
5184                             "could not convert string to int");
5185             return -1;
5186         }
5187     }
5188     else {
5189         if (len == 3 && (x == 0 || x == 1)) {
5190             if ((value = PyBool_FromLong(x)) == NULL)
5191                 return -1;
5192         }
5193         else {
5194             if ((value = PyLong_FromLong(x)) == NULL)
5195                 return -1;
5196         }
5197     }
5198 
5199     PDATA_PUSH(self->stack, value, -1);
5200     return 0;
5201 }
5202 
5203 static int
load_bool(UnpicklerObject * self,PyObject * boolean)5204 load_bool(UnpicklerObject *self, PyObject *boolean)
5205 {
5206     assert(boolean == Py_True || boolean == Py_False);
5207     PDATA_APPEND(self->stack, boolean, -1);
5208     return 0;
5209 }
5210 
5211 /* s contains x bytes of an unsigned little-endian integer.  Return its value
5212  * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5213  */
5214 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)5215 calc_binsize(char *bytes, int nbytes)
5216 {
5217     unsigned char *s = (unsigned char *)bytes;
5218     int i;
5219     size_t x = 0;
5220 
5221     if (nbytes > (int)sizeof(size_t)) {
5222         /* Check for integer overflow.  BINBYTES8 and BINUNICODE8 opcodes
5223          * have 64-bit size that can't be represented on 32-bit platform.
5224          */
5225         for (i = (int)sizeof(size_t); i < nbytes; i++) {
5226             if (s[i])
5227                 return -1;
5228         }
5229         nbytes = (int)sizeof(size_t);
5230     }
5231     for (i = 0; i < nbytes; i++) {
5232         x |= (size_t) s[i] << (8 * i);
5233     }
5234 
5235     if (x > PY_SSIZE_T_MAX)
5236         return -1;
5237     else
5238         return (Py_ssize_t) x;
5239 }
5240 
5241 /* s contains x bytes of a little-endian integer.  Return its value as a
5242  * C int.  Obscure:  when x is 1 or 2, this is an unsigned little-endian
5243  * int, but when x is 4 it's a signed one.  This is a historical source
5244  * of x-platform bugs.
5245  */
5246 static long
calc_binint(char * bytes,int nbytes)5247 calc_binint(char *bytes, int nbytes)
5248 {
5249     unsigned char *s = (unsigned char *)bytes;
5250     Py_ssize_t i;
5251     long x = 0;
5252 
5253     for (i = 0; i < nbytes; i++) {
5254         x |= (long)s[i] << (8 * i);
5255     }
5256 
5257     /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5258      * is signed, so on a box with longs bigger than 4 bytes we need
5259      * to extend a BININT's sign bit to the full width.
5260      */
5261     if (SIZEOF_LONG > 4 && nbytes == 4) {
5262         x |= -(x & (1L << 31));
5263     }
5264 
5265     return x;
5266 }
5267 
5268 static int
load_binintx(UnpicklerObject * self,char * s,int size)5269 load_binintx(UnpicklerObject *self, char *s, int size)
5270 {
5271     PyObject *value;
5272     long x;
5273 
5274     x = calc_binint(s, size);
5275 
5276     if ((value = PyLong_FromLong(x)) == NULL)
5277         return -1;
5278 
5279     PDATA_PUSH(self->stack, value, -1);
5280     return 0;
5281 }
5282 
5283 static int
load_binint(UnpicklerObject * self)5284 load_binint(UnpicklerObject *self)
5285 {
5286     char *s;
5287 
5288     if (_Unpickler_Read(self, &s, 4) < 0)
5289         return -1;
5290 
5291     return load_binintx(self, s, 4);
5292 }
5293 
5294 static int
load_binint1(UnpicklerObject * self)5295 load_binint1(UnpicklerObject *self)
5296 {
5297     char *s;
5298 
5299     if (_Unpickler_Read(self, &s, 1) < 0)
5300         return -1;
5301 
5302     return load_binintx(self, s, 1);
5303 }
5304 
5305 static int
load_binint2(UnpicklerObject * self)5306 load_binint2(UnpicklerObject *self)
5307 {
5308     char *s;
5309 
5310     if (_Unpickler_Read(self, &s, 2) < 0)
5311         return -1;
5312 
5313     return load_binintx(self, s, 2);
5314 }
5315 
5316 static int
load_long(UnpicklerObject * self)5317 load_long(UnpicklerObject *self)
5318 {
5319     PyObject *value;
5320     char *s = NULL;
5321     Py_ssize_t len;
5322 
5323     if ((len = _Unpickler_Readline(self, &s)) < 0)
5324         return -1;
5325     if (len < 2)
5326         return bad_readline();
5327 
5328     /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5329        the 'L' before calling PyLong_FromString.  In order to maintain
5330        compatibility with Python 3.0.0, we don't actually *require*
5331        the 'L' to be present. */
5332     if (s[len-2] == 'L')
5333         s[len-2] = '\0';
5334     /* XXX: Should the base argument explicitly set to 10? */
5335     value = PyLong_FromString(s, NULL, 0);
5336     if (value == NULL)
5337         return -1;
5338 
5339     PDATA_PUSH(self->stack, value, -1);
5340     return 0;
5341 }
5342 
5343 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
5344  * data following.
5345  */
5346 static int
load_counted_long(UnpicklerObject * self,int size)5347 load_counted_long(UnpicklerObject *self, int size)
5348 {
5349     PyObject *value;
5350     char *nbytes;
5351     char *pdata;
5352 
5353     assert(size == 1 || size == 4);
5354     if (_Unpickler_Read(self, &nbytes, size) < 0)
5355         return -1;
5356 
5357     size = calc_binint(nbytes, size);
5358     if (size < 0) {
5359         PickleState *st = _Pickle_GetGlobalState();
5360         /* Corrupt or hostile pickle -- we never write one like this */
5361         PyErr_SetString(st->UnpicklingError,
5362                         "LONG pickle has negative byte count");
5363         return -1;
5364     }
5365 
5366     if (size == 0)
5367         value = PyLong_FromLong(0L);
5368     else {
5369         /* Read the raw little-endian bytes and convert. */
5370         if (_Unpickler_Read(self, &pdata, size) < 0)
5371             return -1;
5372         value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5373                                       1 /* little endian */ , 1 /* signed */ );
5374     }
5375     if (value == NULL)
5376         return -1;
5377     PDATA_PUSH(self->stack, value, -1);
5378     return 0;
5379 }
5380 
5381 static int
load_float(UnpicklerObject * self)5382 load_float(UnpicklerObject *self)
5383 {
5384     PyObject *value;
5385     char *endptr, *s;
5386     Py_ssize_t len;
5387     double d;
5388 
5389     if ((len = _Unpickler_Readline(self, &s)) < 0)
5390         return -1;
5391     if (len < 2)
5392         return bad_readline();
5393 
5394     errno = 0;
5395     d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5396     if (d == -1.0 && PyErr_Occurred())
5397         return -1;
5398     if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5399         PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5400         return -1;
5401     }
5402     value = PyFloat_FromDouble(d);
5403     if (value == NULL)
5404         return -1;
5405 
5406     PDATA_PUSH(self->stack, value, -1);
5407     return 0;
5408 }
5409 
5410 static int
load_binfloat(UnpicklerObject * self)5411 load_binfloat(UnpicklerObject *self)
5412 {
5413     PyObject *value;
5414     double x;
5415     char *s;
5416 
5417     if (_Unpickler_Read(self, &s, 8) < 0)
5418         return -1;
5419 
5420     x = _PyFloat_Unpack8((unsigned char *)s, 0);
5421     if (x == -1.0 && PyErr_Occurred())
5422         return -1;
5423 
5424     if ((value = PyFloat_FromDouble(x)) == NULL)
5425         return -1;
5426 
5427     PDATA_PUSH(self->stack, value, -1);
5428     return 0;
5429 }
5430 
5431 static int
load_string(UnpicklerObject * self)5432 load_string(UnpicklerObject *self)
5433 {
5434     PyObject *bytes;
5435     PyObject *obj;
5436     Py_ssize_t len;
5437     char *s, *p;
5438 
5439     if ((len = _Unpickler_Readline(self, &s)) < 0)
5440         return -1;
5441     /* Strip the newline */
5442     len--;
5443     /* Strip outermost quotes */
5444     if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5445         p = s + 1;
5446         len -= 2;
5447     }
5448     else {
5449         PickleState *st = _Pickle_GetGlobalState();
5450         PyErr_SetString(st->UnpicklingError,
5451                         "the STRING opcode argument must be quoted");
5452         return -1;
5453     }
5454     assert(len >= 0);
5455 
5456     /* Use the PyBytes API to decode the string, since that is what is used
5457        to encode, and then coerce the result to Unicode. */
5458     bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5459     if (bytes == NULL)
5460         return -1;
5461 
5462     /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5463        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5464     if (strcmp(self->encoding, "bytes") == 0) {
5465         obj = bytes;
5466     }
5467     else {
5468         obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5469         Py_DECREF(bytes);
5470         if (obj == NULL) {
5471             return -1;
5472         }
5473     }
5474 
5475     PDATA_PUSH(self->stack, obj, -1);
5476     return 0;
5477 }
5478 
5479 static int
load_counted_binstring(UnpicklerObject * self,int nbytes)5480 load_counted_binstring(UnpicklerObject *self, int nbytes)
5481 {
5482     PyObject *obj;
5483     Py_ssize_t size;
5484     char *s;
5485 
5486     if (_Unpickler_Read(self, &s, nbytes) < 0)
5487         return -1;
5488 
5489     size = calc_binsize(s, nbytes);
5490     if (size < 0) {
5491         PickleState *st = _Pickle_GetGlobalState();
5492         PyErr_Format(st->UnpicklingError,
5493                      "BINSTRING exceeds system's maximum size of %zd bytes",
5494                      PY_SSIZE_T_MAX);
5495         return -1;
5496     }
5497 
5498     if (_Unpickler_Read(self, &s, size) < 0)
5499         return -1;
5500 
5501     /* Convert Python 2.x strings to bytes if the *encoding* given to the
5502        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5503     if (strcmp(self->encoding, "bytes") == 0) {
5504         obj = PyBytes_FromStringAndSize(s, size);
5505     }
5506     else {
5507         obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5508     }
5509     if (obj == NULL) {
5510         return -1;
5511     }
5512 
5513     PDATA_PUSH(self->stack, obj, -1);
5514     return 0;
5515 }
5516 
5517 static int
load_counted_binbytes(UnpicklerObject * self,int nbytes)5518 load_counted_binbytes(UnpicklerObject *self, int nbytes)
5519 {
5520     PyObject *bytes;
5521     Py_ssize_t size;
5522     char *s;
5523 
5524     if (_Unpickler_Read(self, &s, nbytes) < 0)
5525         return -1;
5526 
5527     size = calc_binsize(s, nbytes);
5528     if (size < 0) {
5529         PyErr_Format(PyExc_OverflowError,
5530                      "BINBYTES exceeds system's maximum size of %zd bytes",
5531                      PY_SSIZE_T_MAX);
5532         return -1;
5533     }
5534 
5535     bytes = PyBytes_FromStringAndSize(NULL, size);
5536     if (bytes == NULL)
5537         return -1;
5538     if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
5539         Py_DECREF(bytes);
5540         return -1;
5541     }
5542 
5543     PDATA_PUSH(self->stack, bytes, -1);
5544     return 0;
5545 }
5546 
5547 static int
load_counted_bytearray(UnpicklerObject * self)5548 load_counted_bytearray(UnpicklerObject *self)
5549 {
5550     PyObject *bytearray;
5551     Py_ssize_t size;
5552     char *s;
5553 
5554     if (_Unpickler_Read(self, &s, 8) < 0) {
5555         return -1;
5556     }
5557 
5558     size = calc_binsize(s, 8);
5559     if (size < 0) {
5560         PyErr_Format(PyExc_OverflowError,
5561                      "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5562                      PY_SSIZE_T_MAX);
5563         return -1;
5564     }
5565 
5566     bytearray = PyByteArray_FromStringAndSize(NULL, size);
5567     if (bytearray == NULL) {
5568         return -1;
5569     }
5570     if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
5571         Py_DECREF(bytearray);
5572         return -1;
5573     }
5574 
5575     PDATA_PUSH(self->stack, bytearray, -1);
5576     return 0;
5577 }
5578 
5579 static int
load_next_buffer(UnpicklerObject * self)5580 load_next_buffer(UnpicklerObject *self)
5581 {
5582     if (self->buffers == NULL) {
5583         PickleState *st = _Pickle_GetGlobalState();
5584         PyErr_SetString(st->UnpicklingError,
5585                         "pickle stream refers to out-of-band data "
5586                         "but no *buffers* argument was given");
5587         return -1;
5588     }
5589     PyObject *buf = PyIter_Next(self->buffers);
5590     if (buf == NULL) {
5591         if (!PyErr_Occurred()) {
5592             PickleState *st = _Pickle_GetGlobalState();
5593             PyErr_SetString(st->UnpicklingError,
5594                             "not enough out-of-band buffers");
5595         }
5596         return -1;
5597     }
5598 
5599     PDATA_PUSH(self->stack, buf, -1);
5600     return 0;
5601 }
5602 
5603 static int
load_readonly_buffer(UnpicklerObject * self)5604 load_readonly_buffer(UnpicklerObject *self)
5605 {
5606     Py_ssize_t len = Py_SIZE(self->stack);
5607     if (len <= self->stack->fence) {
5608         return Pdata_stack_underflow(self->stack);
5609     }
5610 
5611     PyObject *obj = self->stack->data[len - 1];
5612     PyObject *view = PyMemoryView_FromObject(obj);
5613     if (view == NULL) {
5614         return -1;
5615     }
5616     if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5617         /* Original object is writable */
5618         PyMemoryView_GET_BUFFER(view)->readonly = 1;
5619         self->stack->data[len - 1] = view;
5620         Py_DECREF(obj);
5621     }
5622     else {
5623         /* Original object is read-only, no need to replace it */
5624         Py_DECREF(view);
5625     }
5626     return 0;
5627 }
5628 
5629 static int
load_unicode(UnpicklerObject * self)5630 load_unicode(UnpicklerObject *self)
5631 {
5632     PyObject *str;
5633     Py_ssize_t len;
5634     char *s = NULL;
5635 
5636     if ((len = _Unpickler_Readline(self, &s)) < 0)
5637         return -1;
5638     if (len < 1)
5639         return bad_readline();
5640 
5641     str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5642     if (str == NULL)
5643         return -1;
5644 
5645     PDATA_PUSH(self->stack, str, -1);
5646     return 0;
5647 }
5648 
5649 static int
load_counted_binunicode(UnpicklerObject * self,int nbytes)5650 load_counted_binunicode(UnpicklerObject *self, int nbytes)
5651 {
5652     PyObject *str;
5653     Py_ssize_t size;
5654     char *s;
5655 
5656     if (_Unpickler_Read(self, &s, nbytes) < 0)
5657         return -1;
5658 
5659     size = calc_binsize(s, nbytes);
5660     if (size < 0) {
5661         PyErr_Format(PyExc_OverflowError,
5662                      "BINUNICODE exceeds system's maximum size of %zd bytes",
5663                      PY_SSIZE_T_MAX);
5664         return -1;
5665     }
5666 
5667     if (_Unpickler_Read(self, &s, size) < 0)
5668         return -1;
5669 
5670     str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5671     if (str == NULL)
5672         return -1;
5673 
5674     PDATA_PUSH(self->stack, str, -1);
5675     return 0;
5676 }
5677 
5678 static int
load_counted_tuple(UnpicklerObject * self,Py_ssize_t len)5679 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5680 {
5681     PyObject *tuple;
5682 
5683     if (Py_SIZE(self->stack) < len)
5684         return Pdata_stack_underflow(self->stack);
5685 
5686     tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5687     if (tuple == NULL)
5688         return -1;
5689     PDATA_PUSH(self->stack, tuple, -1);
5690     return 0;
5691 }
5692 
5693 static int
load_tuple(UnpicklerObject * self)5694 load_tuple(UnpicklerObject *self)
5695 {
5696     Py_ssize_t i;
5697 
5698     if ((i = marker(self)) < 0)
5699         return -1;
5700 
5701     return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5702 }
5703 
5704 static int
load_empty_list(UnpicklerObject * self)5705 load_empty_list(UnpicklerObject *self)
5706 {
5707     PyObject *list;
5708 
5709     if ((list = PyList_New(0)) == NULL)
5710         return -1;
5711     PDATA_PUSH(self->stack, list, -1);
5712     return 0;
5713 }
5714 
5715 static int
load_empty_dict(UnpicklerObject * self)5716 load_empty_dict(UnpicklerObject *self)
5717 {
5718     PyObject *dict;
5719 
5720     if ((dict = PyDict_New()) == NULL)
5721         return -1;
5722     PDATA_PUSH(self->stack, dict, -1);
5723     return 0;
5724 }
5725 
5726 static int
load_empty_set(UnpicklerObject * self)5727 load_empty_set(UnpicklerObject *self)
5728 {
5729     PyObject *set;
5730 
5731     if ((set = PySet_New(NULL)) == NULL)
5732         return -1;
5733     PDATA_PUSH(self->stack, set, -1);
5734     return 0;
5735 }
5736 
5737 static int
load_list(UnpicklerObject * self)5738 load_list(UnpicklerObject *self)
5739 {
5740     PyObject *list;
5741     Py_ssize_t i;
5742 
5743     if ((i = marker(self)) < 0)
5744         return -1;
5745 
5746     list = Pdata_poplist(self->stack, i);
5747     if (list == NULL)
5748         return -1;
5749     PDATA_PUSH(self->stack, list, -1);
5750     return 0;
5751 }
5752 
5753 static int
load_dict(UnpicklerObject * self)5754 load_dict(UnpicklerObject *self)
5755 {
5756     PyObject *dict, *key, *value;
5757     Py_ssize_t i, j, k;
5758 
5759     if ((i = marker(self)) < 0)
5760         return -1;
5761     j = Py_SIZE(self->stack);
5762 
5763     if ((dict = PyDict_New()) == NULL)
5764         return -1;
5765 
5766     if ((j - i) % 2 != 0) {
5767         PickleState *st = _Pickle_GetGlobalState();
5768         PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5769         Py_DECREF(dict);
5770         return -1;
5771     }
5772 
5773     for (k = i + 1; k < j; k += 2) {
5774         key = self->stack->data[k - 1];
5775         value = self->stack->data[k];
5776         if (PyDict_SetItem(dict, key, value) < 0) {
5777             Py_DECREF(dict);
5778             return -1;
5779         }
5780     }
5781     Pdata_clear(self->stack, i);
5782     PDATA_PUSH(self->stack, dict, -1);
5783     return 0;
5784 }
5785 
5786 static int
load_frozenset(UnpicklerObject * self)5787 load_frozenset(UnpicklerObject *self)
5788 {
5789     PyObject *items;
5790     PyObject *frozenset;
5791     Py_ssize_t i;
5792 
5793     if ((i = marker(self)) < 0)
5794         return -1;
5795 
5796     items = Pdata_poptuple(self->stack, i);
5797     if (items == NULL)
5798         return -1;
5799 
5800     frozenset = PyFrozenSet_New(items);
5801     Py_DECREF(items);
5802     if (frozenset == NULL)
5803         return -1;
5804 
5805     PDATA_PUSH(self->stack, frozenset, -1);
5806     return 0;
5807 }
5808 
5809 static PyObject *
instantiate(PyObject * cls,PyObject * args)5810 instantiate(PyObject *cls, PyObject *args)
5811 {
5812     /* Caller must assure args are a tuple.  Normally, args come from
5813        Pdata_poptuple which packs objects from the top of the stack
5814        into a newly created tuple. */
5815     assert(PyTuple_Check(args));
5816     if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5817         _Py_IDENTIFIER(__getinitargs__);
5818         _Py_IDENTIFIER(__new__);
5819         PyObject *func;
5820         if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5821             return NULL;
5822         }
5823         if (func == NULL) {
5824             return _PyObject_CallMethodIdOneArg(cls, &PyId___new__, cls);
5825         }
5826         Py_DECREF(func);
5827     }
5828     return PyObject_CallObject(cls, args);
5829 }
5830 
5831 static int
load_obj(UnpicklerObject * self)5832 load_obj(UnpicklerObject *self)
5833 {
5834     PyObject *cls, *args, *obj = NULL;
5835     Py_ssize_t i;
5836 
5837     if ((i = marker(self)) < 0)
5838         return -1;
5839 
5840     if (Py_SIZE(self->stack) - i < 1)
5841         return Pdata_stack_underflow(self->stack);
5842 
5843     args = Pdata_poptuple(self->stack, i + 1);
5844     if (args == NULL)
5845         return -1;
5846 
5847     PDATA_POP(self->stack, cls);
5848     if (cls) {
5849         obj = instantiate(cls, args);
5850         Py_DECREF(cls);
5851     }
5852     Py_DECREF(args);
5853     if (obj == NULL)
5854         return -1;
5855 
5856     PDATA_PUSH(self->stack, obj, -1);
5857     return 0;
5858 }
5859 
5860 static int
load_inst(UnpicklerObject * self)5861 load_inst(UnpicklerObject *self)
5862 {
5863     PyObject *cls = NULL;
5864     PyObject *args = NULL;
5865     PyObject *obj = NULL;
5866     PyObject *module_name;
5867     PyObject *class_name;
5868     Py_ssize_t len;
5869     Py_ssize_t i;
5870     char *s;
5871 
5872     if ((i = marker(self)) < 0)
5873         return -1;
5874     if ((len = _Unpickler_Readline(self, &s)) < 0)
5875         return -1;
5876     if (len < 2)
5877         return bad_readline();
5878 
5879     /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5880        identifiers are permitted in Python 3.0, since the INST opcode is only
5881        supported by older protocols on Python 2.x. */
5882     module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5883     if (module_name == NULL)
5884         return -1;
5885 
5886     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5887         if (len < 2) {
5888             Py_DECREF(module_name);
5889             return bad_readline();
5890         }
5891         class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5892         if (class_name != NULL) {
5893             cls = find_class(self, module_name, class_name);
5894             Py_DECREF(class_name);
5895         }
5896     }
5897     Py_DECREF(module_name);
5898 
5899     if (cls == NULL)
5900         return -1;
5901 
5902     if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5903         obj = instantiate(cls, args);
5904         Py_DECREF(args);
5905     }
5906     Py_DECREF(cls);
5907 
5908     if (obj == NULL)
5909         return -1;
5910 
5911     PDATA_PUSH(self->stack, obj, -1);
5912     return 0;
5913 }
5914 
5915 static int
load_newobj(UnpicklerObject * self)5916 load_newobj(UnpicklerObject *self)
5917 {
5918     PyObject *args = NULL;
5919     PyObject *clsraw = NULL;
5920     PyTypeObject *cls;          /* clsraw cast to its true type */
5921     PyObject *obj;
5922     PickleState *st = _Pickle_GetGlobalState();
5923 
5924     /* Stack is ... cls argtuple, and we want to call
5925      * cls.__new__(cls, *argtuple).
5926      */
5927     PDATA_POP(self->stack, args);
5928     if (args == NULL)
5929         goto error;
5930     if (!PyTuple_Check(args)) {
5931         PyErr_SetString(st->UnpicklingError,
5932                         "NEWOBJ expected an arg " "tuple.");
5933         goto error;
5934     }
5935 
5936     PDATA_POP(self->stack, clsraw);
5937     cls = (PyTypeObject *)clsraw;
5938     if (cls == NULL)
5939         goto error;
5940     if (!PyType_Check(cls)) {
5941         PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5942                         "isn't a type object");
5943         goto error;
5944     }
5945     if (cls->tp_new == NULL) {
5946         PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5947                         "has NULL tp_new");
5948         goto error;
5949     }
5950 
5951     /* Call __new__. */
5952     obj = cls->tp_new(cls, args, NULL);
5953     if (obj == NULL)
5954         goto error;
5955 
5956     Py_DECREF(args);
5957     Py_DECREF(clsraw);
5958     PDATA_PUSH(self->stack, obj, -1);
5959     return 0;
5960 
5961   error:
5962     Py_XDECREF(args);
5963     Py_XDECREF(clsraw);
5964     return -1;
5965 }
5966 
5967 static int
load_newobj_ex(UnpicklerObject * self)5968 load_newobj_ex(UnpicklerObject *self)
5969 {
5970     PyObject *cls, *args, *kwargs;
5971     PyObject *obj;
5972     PickleState *st = _Pickle_GetGlobalState();
5973 
5974     PDATA_POP(self->stack, kwargs);
5975     if (kwargs == NULL) {
5976         return -1;
5977     }
5978     PDATA_POP(self->stack, args);
5979     if (args == NULL) {
5980         Py_DECREF(kwargs);
5981         return -1;
5982     }
5983     PDATA_POP(self->stack, cls);
5984     if (cls == NULL) {
5985         Py_DECREF(kwargs);
5986         Py_DECREF(args);
5987         return -1;
5988     }
5989 
5990     if (!PyType_Check(cls)) {
5991         PyErr_Format(st->UnpicklingError,
5992                      "NEWOBJ_EX class argument must be a type, not %.200s",
5993                      Py_TYPE(cls)->tp_name);
5994         goto error;
5995     }
5996 
5997     if (((PyTypeObject *)cls)->tp_new == NULL) {
5998         PyErr_SetString(st->UnpicklingError,
5999                         "NEWOBJ_EX class argument doesn't have __new__");
6000         goto error;
6001     }
6002     if (!PyTuple_Check(args)) {
6003         PyErr_Format(st->UnpicklingError,
6004                      "NEWOBJ_EX args argument must be a tuple, not %.200s",
6005                      Py_TYPE(args)->tp_name);
6006         goto error;
6007     }
6008     if (!PyDict_Check(kwargs)) {
6009         PyErr_Format(st->UnpicklingError,
6010                      "NEWOBJ_EX kwargs argument must be a dict, not %.200s",
6011                      Py_TYPE(kwargs)->tp_name);
6012         goto error;
6013     }
6014 
6015     obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
6016     Py_DECREF(kwargs);
6017     Py_DECREF(args);
6018     Py_DECREF(cls);
6019     if (obj == NULL) {
6020         return -1;
6021     }
6022     PDATA_PUSH(self->stack, obj, -1);
6023     return 0;
6024 
6025 error:
6026     Py_DECREF(kwargs);
6027     Py_DECREF(args);
6028     Py_DECREF(cls);
6029     return -1;
6030 }
6031 
6032 static int
load_global(UnpicklerObject * self)6033 load_global(UnpicklerObject *self)
6034 {
6035     PyObject *global = NULL;
6036     PyObject *module_name;
6037     PyObject *global_name;
6038     Py_ssize_t len;
6039     char *s;
6040 
6041     if ((len = _Unpickler_Readline(self, &s)) < 0)
6042         return -1;
6043     if (len < 2)
6044         return bad_readline();
6045     module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6046     if (!module_name)
6047         return -1;
6048 
6049     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
6050         if (len < 2) {
6051             Py_DECREF(module_name);
6052             return bad_readline();
6053         }
6054         global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6055         if (global_name) {
6056             global = find_class(self, module_name, global_name);
6057             Py_DECREF(global_name);
6058         }
6059     }
6060     Py_DECREF(module_name);
6061 
6062     if (global == NULL)
6063         return -1;
6064     PDATA_PUSH(self->stack, global, -1);
6065     return 0;
6066 }
6067 
6068 static int
load_stack_global(UnpicklerObject * self)6069 load_stack_global(UnpicklerObject *self)
6070 {
6071     PyObject *global;
6072     PyObject *module_name;
6073     PyObject *global_name;
6074 
6075     PDATA_POP(self->stack, global_name);
6076     PDATA_POP(self->stack, module_name);
6077     if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
6078         global_name == NULL || !PyUnicode_CheckExact(global_name)) {
6079         PickleState *st = _Pickle_GetGlobalState();
6080         PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
6081         Py_XDECREF(global_name);
6082         Py_XDECREF(module_name);
6083         return -1;
6084     }
6085     global = find_class(self, module_name, global_name);
6086     Py_DECREF(global_name);
6087     Py_DECREF(module_name);
6088     if (global == NULL)
6089         return -1;
6090     PDATA_PUSH(self->stack, global, -1);
6091     return 0;
6092 }
6093 
6094 static int
load_persid(UnpicklerObject * self)6095 load_persid(UnpicklerObject *self)
6096 {
6097     PyObject *pid, *obj;
6098     Py_ssize_t len;
6099     char *s;
6100 
6101     if (self->pers_func) {
6102         if ((len = _Unpickler_Readline(self, &s)) < 0)
6103             return -1;
6104         if (len < 1)
6105             return bad_readline();
6106 
6107         pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6108         if (pid == NULL) {
6109             if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6110                 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
6111                                 "persistent IDs in protocol 0 must be "
6112                                 "ASCII strings");
6113             }
6114             return -1;
6115         }
6116 
6117         obj = call_method(self->pers_func, self->pers_func_self, pid);
6118         Py_DECREF(pid);
6119         if (obj == NULL)
6120             return -1;
6121 
6122         PDATA_PUSH(self->stack, obj, -1);
6123         return 0;
6124     }
6125     else {
6126         PickleState *st = _Pickle_GetGlobalState();
6127         PyErr_SetString(st->UnpicklingError,
6128                         "A load persistent id instruction was encountered,\n"
6129                         "but no persistent_load function was specified.");
6130         return -1;
6131     }
6132 }
6133 
6134 static int
load_binpersid(UnpicklerObject * self)6135 load_binpersid(UnpicklerObject *self)
6136 {
6137     PyObject *pid, *obj;
6138 
6139     if (self->pers_func) {
6140         PDATA_POP(self->stack, pid);
6141         if (pid == NULL)
6142             return -1;
6143 
6144         obj = call_method(self->pers_func, self->pers_func_self, pid);
6145         Py_DECREF(pid);
6146         if (obj == NULL)
6147             return -1;
6148 
6149         PDATA_PUSH(self->stack, obj, -1);
6150         return 0;
6151     }
6152     else {
6153         PickleState *st = _Pickle_GetGlobalState();
6154         PyErr_SetString(st->UnpicklingError,
6155                         "A load persistent id instruction was encountered,\n"
6156                         "but no persistent_load function was specified.");
6157         return -1;
6158     }
6159 }
6160 
6161 static int
load_pop(UnpicklerObject * self)6162 load_pop(UnpicklerObject *self)
6163 {
6164     Py_ssize_t len = Py_SIZE(self->stack);
6165 
6166     /* Note that we split the (pickle.py) stack into two stacks,
6167      * an object stack and a mark stack. We have to be clever and
6168      * pop the right one. We do this by looking at the top of the
6169      * mark stack first, and only signalling a stack underflow if
6170      * the object stack is empty and the mark stack doesn't match
6171      * our expectations.
6172      */
6173     if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
6174         self->num_marks--;
6175         self->stack->mark_set = self->num_marks != 0;
6176         self->stack->fence = self->num_marks ?
6177                 self->marks[self->num_marks - 1] : 0;
6178     } else if (len <= self->stack->fence)
6179         return Pdata_stack_underflow(self->stack);
6180     else {
6181         len--;
6182         Py_DECREF(self->stack->data[len]);
6183         Py_SET_SIZE(self->stack, len);
6184     }
6185     return 0;
6186 }
6187 
6188 static int
load_pop_mark(UnpicklerObject * self)6189 load_pop_mark(UnpicklerObject *self)
6190 {
6191     Py_ssize_t i;
6192 
6193     if ((i = marker(self)) < 0)
6194         return -1;
6195 
6196     Pdata_clear(self->stack, i);
6197 
6198     return 0;
6199 }
6200 
6201 static int
load_dup(UnpicklerObject * self)6202 load_dup(UnpicklerObject *self)
6203 {
6204     PyObject *last;
6205     Py_ssize_t len = Py_SIZE(self->stack);
6206 
6207     if (len <= self->stack->fence)
6208         return Pdata_stack_underflow(self->stack);
6209     last = self->stack->data[len - 1];
6210     PDATA_APPEND(self->stack, last, -1);
6211     return 0;
6212 }
6213 
6214 static int
load_get(UnpicklerObject * self)6215 load_get(UnpicklerObject *self)
6216 {
6217     PyObject *key, *value;
6218     Py_ssize_t idx;
6219     Py_ssize_t len;
6220     char *s;
6221 
6222     if ((len = _Unpickler_Readline(self, &s)) < 0)
6223         return -1;
6224     if (len < 2)
6225         return bad_readline();
6226 
6227     key = PyLong_FromString(s, NULL, 10);
6228     if (key == NULL)
6229         return -1;
6230     idx = PyLong_AsSsize_t(key);
6231     if (idx == -1 && PyErr_Occurred()) {
6232         Py_DECREF(key);
6233         return -1;
6234     }
6235 
6236     value = _Unpickler_MemoGet(self, idx);
6237     if (value == NULL) {
6238         if (!PyErr_Occurred()) {
6239            PickleState *st = _Pickle_GetGlobalState();
6240            PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6241         }
6242         Py_DECREF(key);
6243         return -1;
6244     }
6245     Py_DECREF(key);
6246 
6247     PDATA_APPEND(self->stack, value, -1);
6248     return 0;
6249 }
6250 
6251 static int
load_binget(UnpicklerObject * self)6252 load_binget(UnpicklerObject *self)
6253 {
6254     PyObject *value;
6255     Py_ssize_t idx;
6256     char *s;
6257 
6258     if (_Unpickler_Read(self, &s, 1) < 0)
6259         return -1;
6260 
6261     idx = Py_CHARMASK(s[0]);
6262 
6263     value = _Unpickler_MemoGet(self, idx);
6264     if (value == NULL) {
6265         PyObject *key = PyLong_FromSsize_t(idx);
6266         if (key != NULL) {
6267             PickleState *st = _Pickle_GetGlobalState();
6268             PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6269             Py_DECREF(key);
6270         }
6271         return -1;
6272     }
6273 
6274     PDATA_APPEND(self->stack, value, -1);
6275     return 0;
6276 }
6277 
6278 static int
load_long_binget(UnpicklerObject * self)6279 load_long_binget(UnpicklerObject *self)
6280 {
6281     PyObject *value;
6282     Py_ssize_t idx;
6283     char *s;
6284 
6285     if (_Unpickler_Read(self, &s, 4) < 0)
6286         return -1;
6287 
6288     idx = calc_binsize(s, 4);
6289 
6290     value = _Unpickler_MemoGet(self, idx);
6291     if (value == NULL) {
6292         PyObject *key = PyLong_FromSsize_t(idx);
6293         if (key != NULL) {
6294             PickleState *st = _Pickle_GetGlobalState();
6295             PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6296             Py_DECREF(key);
6297         }
6298         return -1;
6299     }
6300 
6301     PDATA_APPEND(self->stack, value, -1);
6302     return 0;
6303 }
6304 
6305 /* Push an object from the extension registry (EXT[124]).  nbytes is
6306  * the number of bytes following the opcode, holding the index (code) value.
6307  */
6308 static int
load_extension(UnpicklerObject * self,int nbytes)6309 load_extension(UnpicklerObject *self, int nbytes)
6310 {
6311     char *codebytes;            /* the nbytes bytes after the opcode */
6312     long code;                  /* calc_binint returns long */
6313     PyObject *py_code;          /* code as a Python int */
6314     PyObject *obj;              /* the object to push */
6315     PyObject *pair;             /* (module_name, class_name) */
6316     PyObject *module_name, *class_name;
6317     PickleState *st = _Pickle_GetGlobalState();
6318 
6319     assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
6320     if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
6321         return -1;
6322     code = calc_binint(codebytes, nbytes);
6323     if (code <= 0) {            /* note that 0 is forbidden */
6324         /* Corrupt or hostile pickle. */
6325         PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
6326         return -1;
6327     }
6328 
6329     /* Look for the code in the cache. */
6330     py_code = PyLong_FromLong(code);
6331     if (py_code == NULL)
6332         return -1;
6333     obj = PyDict_GetItemWithError(st->extension_cache, py_code);
6334     if (obj != NULL) {
6335         /* Bingo. */
6336         Py_DECREF(py_code);
6337         PDATA_APPEND(self->stack, obj, -1);
6338         return 0;
6339     }
6340     if (PyErr_Occurred()) {
6341         Py_DECREF(py_code);
6342         return -1;
6343     }
6344 
6345     /* Look up the (module_name, class_name) pair. */
6346     pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
6347     if (pair == NULL) {
6348         Py_DECREF(py_code);
6349         if (!PyErr_Occurred()) {
6350             PyErr_Format(PyExc_ValueError, "unregistered extension "
6351                          "code %ld", code);
6352         }
6353         return -1;
6354     }
6355     /* Since the extension registry is manipulable via Python code,
6356      * confirm that pair is really a 2-tuple of strings.
6357      */
6358     if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6359         goto error;
6360     }
6361 
6362     module_name = PyTuple_GET_ITEM(pair, 0);
6363     if (!PyUnicode_Check(module_name)) {
6364         goto error;
6365     }
6366 
6367     class_name = PyTuple_GET_ITEM(pair, 1);
6368     if (!PyUnicode_Check(class_name)) {
6369         goto error;
6370     }
6371 
6372     /* Load the object. */
6373     obj = find_class(self, module_name, class_name);
6374     if (obj == NULL) {
6375         Py_DECREF(py_code);
6376         return -1;
6377     }
6378     /* Cache code -> obj. */
6379     code = PyDict_SetItem(st->extension_cache, py_code, obj);
6380     Py_DECREF(py_code);
6381     if (code < 0) {
6382         Py_DECREF(obj);
6383         return -1;
6384     }
6385     PDATA_PUSH(self->stack, obj, -1);
6386     return 0;
6387 
6388 error:
6389     Py_DECREF(py_code);
6390     PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6391                  "isn't a 2-tuple of strings", code);
6392     return -1;
6393 }
6394 
6395 static int
load_put(UnpicklerObject * self)6396 load_put(UnpicklerObject *self)
6397 {
6398     PyObject *key, *value;
6399     Py_ssize_t idx;
6400     Py_ssize_t len;
6401     char *s = NULL;
6402 
6403     if ((len = _Unpickler_Readline(self, &s)) < 0)
6404         return -1;
6405     if (len < 2)
6406         return bad_readline();
6407     if (Py_SIZE(self->stack) <= self->stack->fence)
6408         return Pdata_stack_underflow(self->stack);
6409     value = self->stack->data[Py_SIZE(self->stack) - 1];
6410 
6411     key = PyLong_FromString(s, NULL, 10);
6412     if (key == NULL)
6413         return -1;
6414     idx = PyLong_AsSsize_t(key);
6415     Py_DECREF(key);
6416     if (idx < 0) {
6417         if (!PyErr_Occurred())
6418             PyErr_SetString(PyExc_ValueError,
6419                             "negative PUT argument");
6420         return -1;
6421     }
6422 
6423     return _Unpickler_MemoPut(self, idx, value);
6424 }
6425 
6426 static int
load_binput(UnpicklerObject * self)6427 load_binput(UnpicklerObject *self)
6428 {
6429     PyObject *value;
6430     Py_ssize_t idx;
6431     char *s;
6432 
6433     if (_Unpickler_Read(self, &s, 1) < 0)
6434         return -1;
6435 
6436     if (Py_SIZE(self->stack) <= self->stack->fence)
6437         return Pdata_stack_underflow(self->stack);
6438     value = self->stack->data[Py_SIZE(self->stack) - 1];
6439 
6440     idx = Py_CHARMASK(s[0]);
6441 
6442     return _Unpickler_MemoPut(self, idx, value);
6443 }
6444 
6445 static int
load_long_binput(UnpicklerObject * self)6446 load_long_binput(UnpicklerObject *self)
6447 {
6448     PyObject *value;
6449     Py_ssize_t idx;
6450     char *s;
6451 
6452     if (_Unpickler_Read(self, &s, 4) < 0)
6453         return -1;
6454 
6455     if (Py_SIZE(self->stack) <= self->stack->fence)
6456         return Pdata_stack_underflow(self->stack);
6457     value = self->stack->data[Py_SIZE(self->stack) - 1];
6458 
6459     idx = calc_binsize(s, 4);
6460     if (idx < 0) {
6461         PyErr_SetString(PyExc_ValueError,
6462                         "negative LONG_BINPUT argument");
6463         return -1;
6464     }
6465 
6466     return _Unpickler_MemoPut(self, idx, value);
6467 }
6468 
6469 static int
load_memoize(UnpicklerObject * self)6470 load_memoize(UnpicklerObject *self)
6471 {
6472     PyObject *value;
6473 
6474     if (Py_SIZE(self->stack) <= self->stack->fence)
6475         return Pdata_stack_underflow(self->stack);
6476     value = self->stack->data[Py_SIZE(self->stack) - 1];
6477 
6478     return _Unpickler_MemoPut(self, self->memo_len, value);
6479 }
6480 
6481 static int
do_append(UnpicklerObject * self,Py_ssize_t x)6482 do_append(UnpicklerObject *self, Py_ssize_t x)
6483 {
6484     PyObject *value;
6485     PyObject *slice;
6486     PyObject *list;
6487     PyObject *result;
6488     Py_ssize_t len, i;
6489 
6490     len = Py_SIZE(self->stack);
6491     if (x > len || x <= self->stack->fence)
6492         return Pdata_stack_underflow(self->stack);
6493     if (len == x)  /* nothing to do */
6494         return 0;
6495 
6496     list = self->stack->data[x - 1];
6497 
6498     if (PyList_CheckExact(list)) {
6499         Py_ssize_t list_len;
6500         int ret;
6501 
6502         slice = Pdata_poplist(self->stack, x);
6503         if (!slice)
6504             return -1;
6505         list_len = PyList_GET_SIZE(list);
6506         ret = PyList_SetSlice(list, list_len, list_len, slice);
6507         Py_DECREF(slice);
6508         return ret;
6509     }
6510     else {
6511         PyObject *extend_func;
6512         _Py_IDENTIFIER(extend);
6513 
6514         if (_PyObject_LookupAttrId(list, &PyId_extend, &extend_func) < 0) {
6515             return -1;
6516         }
6517         if (extend_func != NULL) {
6518             slice = Pdata_poplist(self->stack, x);
6519             if (!slice) {
6520                 Py_DECREF(extend_func);
6521                 return -1;
6522             }
6523             result = _Pickle_FastCall(extend_func, slice);
6524             Py_DECREF(extend_func);
6525             if (result == NULL)
6526                 return -1;
6527             Py_DECREF(result);
6528         }
6529         else {
6530             PyObject *append_func;
6531             _Py_IDENTIFIER(append);
6532 
6533             /* Even if the PEP 307 requires extend() and append() methods,
6534                fall back on append() if the object has no extend() method
6535                for backward compatibility. */
6536             append_func = _PyObject_GetAttrId(list, &PyId_append);
6537             if (append_func == NULL)
6538                 return -1;
6539             for (i = x; i < len; i++) {
6540                 value = self->stack->data[i];
6541                 result = _Pickle_FastCall(append_func, value);
6542                 if (result == NULL) {
6543                     Pdata_clear(self->stack, i + 1);
6544                     Py_SET_SIZE(self->stack, x);
6545                     Py_DECREF(append_func);
6546                     return -1;
6547                 }
6548                 Py_DECREF(result);
6549             }
6550             Py_SET_SIZE(self->stack, x);
6551             Py_DECREF(append_func);
6552         }
6553     }
6554 
6555     return 0;
6556 }
6557 
6558 static int
load_append(UnpicklerObject * self)6559 load_append(UnpicklerObject *self)
6560 {
6561     if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6562         return Pdata_stack_underflow(self->stack);
6563     return do_append(self, Py_SIZE(self->stack) - 1);
6564 }
6565 
6566 static int
load_appends(UnpicklerObject * self)6567 load_appends(UnpicklerObject *self)
6568 {
6569     Py_ssize_t i = marker(self);
6570     if (i < 0)
6571         return -1;
6572     return do_append(self, i);
6573 }
6574 
6575 static int
do_setitems(UnpicklerObject * self,Py_ssize_t x)6576 do_setitems(UnpicklerObject *self, Py_ssize_t x)
6577 {
6578     PyObject *value, *key;
6579     PyObject *dict;
6580     Py_ssize_t len, i;
6581     int status = 0;
6582 
6583     len = Py_SIZE(self->stack);
6584     if (x > len || x <= self->stack->fence)
6585         return Pdata_stack_underflow(self->stack);
6586     if (len == x)  /* nothing to do */
6587         return 0;
6588     if ((len - x) % 2 != 0) {
6589         PickleState *st = _Pickle_GetGlobalState();
6590         /* Currupt or hostile pickle -- we never write one like this. */
6591         PyErr_SetString(st->UnpicklingError,
6592                         "odd number of items for SETITEMS");
6593         return -1;
6594     }
6595 
6596     /* Here, dict does not actually need to be a PyDict; it could be anything
6597        that supports the __setitem__ attribute. */
6598     dict = self->stack->data[x - 1];
6599 
6600     for (i = x + 1; i < len; i += 2) {
6601         key = self->stack->data[i - 1];
6602         value = self->stack->data[i];
6603         if (PyObject_SetItem(dict, key, value) < 0) {
6604             status = -1;
6605             break;
6606         }
6607     }
6608 
6609     Pdata_clear(self->stack, x);
6610     return status;
6611 }
6612 
6613 static int
load_setitem(UnpicklerObject * self)6614 load_setitem(UnpicklerObject *self)
6615 {
6616     return do_setitems(self, Py_SIZE(self->stack) - 2);
6617 }
6618 
6619 static int
load_setitems(UnpicklerObject * self)6620 load_setitems(UnpicklerObject *self)
6621 {
6622     Py_ssize_t i = marker(self);
6623     if (i < 0)
6624         return -1;
6625     return do_setitems(self, i);
6626 }
6627 
6628 static int
load_additems(UnpicklerObject * self)6629 load_additems(UnpicklerObject *self)
6630 {
6631     PyObject *set;
6632     Py_ssize_t mark, len, i;
6633 
6634     mark =  marker(self);
6635     if (mark < 0)
6636         return -1;
6637     len = Py_SIZE(self->stack);
6638     if (mark > len || mark <= self->stack->fence)
6639         return Pdata_stack_underflow(self->stack);
6640     if (len == mark)  /* nothing to do */
6641         return 0;
6642 
6643     set = self->stack->data[mark - 1];
6644 
6645     if (PySet_Check(set)) {
6646         PyObject *items;
6647         int status;
6648 
6649         items = Pdata_poptuple(self->stack, mark);
6650         if (items == NULL)
6651             return -1;
6652 
6653         status = _PySet_Update(set, items);
6654         Py_DECREF(items);
6655         return status;
6656     }
6657     else {
6658         PyObject *add_func;
6659         _Py_IDENTIFIER(add);
6660 
6661         add_func = _PyObject_GetAttrId(set, &PyId_add);
6662         if (add_func == NULL)
6663             return -1;
6664         for (i = mark; i < len; i++) {
6665             PyObject *result;
6666             PyObject *item;
6667 
6668             item = self->stack->data[i];
6669             result = _Pickle_FastCall(add_func, item);
6670             if (result == NULL) {
6671                 Pdata_clear(self->stack, i + 1);
6672                 Py_SET_SIZE(self->stack, mark);
6673                 return -1;
6674             }
6675             Py_DECREF(result);
6676         }
6677         Py_SET_SIZE(self->stack, mark);
6678     }
6679 
6680     return 0;
6681 }
6682 
6683 static int
load_build(UnpicklerObject * self)6684 load_build(UnpicklerObject *self)
6685 {
6686     PyObject *state, *inst, *slotstate;
6687     PyObject *setstate;
6688     int status = 0;
6689     _Py_IDENTIFIER(__setstate__);
6690 
6691     /* Stack is ... instance, state.  We want to leave instance at
6692      * the stack top, possibly mutated via instance.__setstate__(state).
6693      */
6694     if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6695         return Pdata_stack_underflow(self->stack);
6696 
6697     PDATA_POP(self->stack, state);
6698     if (state == NULL)
6699         return -1;
6700 
6701     inst = self->stack->data[Py_SIZE(self->stack) - 1];
6702 
6703     if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6704         Py_DECREF(state);
6705         return -1;
6706     }
6707     if (setstate != NULL) {
6708         PyObject *result;
6709 
6710         /* The explicit __setstate__ is responsible for everything. */
6711         result = _Pickle_FastCall(setstate, state);
6712         Py_DECREF(setstate);
6713         if (result == NULL)
6714             return -1;
6715         Py_DECREF(result);
6716         return 0;
6717     }
6718 
6719     /* A default __setstate__.  First see whether state embeds a
6720      * slot state dict too (a proto 2 addition).
6721      */
6722     if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6723         PyObject *tmp = state;
6724 
6725         state = PyTuple_GET_ITEM(tmp, 0);
6726         slotstate = PyTuple_GET_ITEM(tmp, 1);
6727         Py_INCREF(state);
6728         Py_INCREF(slotstate);
6729         Py_DECREF(tmp);
6730     }
6731     else
6732         slotstate = NULL;
6733 
6734     /* Set inst.__dict__ from the state dict (if any). */
6735     if (state != Py_None) {
6736         PyObject *dict;
6737         PyObject *d_key, *d_value;
6738         Py_ssize_t i;
6739         _Py_IDENTIFIER(__dict__);
6740 
6741         if (!PyDict_Check(state)) {
6742             PickleState *st = _Pickle_GetGlobalState();
6743             PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6744             goto error;
6745         }
6746         dict = _PyObject_GetAttrId(inst, &PyId___dict__);
6747         if (dict == NULL)
6748             goto error;
6749 
6750         i = 0;
6751         while (PyDict_Next(state, &i, &d_key, &d_value)) {
6752             /* normally the keys for instance attributes are
6753                interned.  we should try to do that here. */
6754             Py_INCREF(d_key);
6755             if (PyUnicode_CheckExact(d_key))
6756                 PyUnicode_InternInPlace(&d_key);
6757             if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6758                 Py_DECREF(d_key);
6759                 goto error;
6760             }
6761             Py_DECREF(d_key);
6762         }
6763         Py_DECREF(dict);
6764     }
6765 
6766     /* Also set instance attributes from the slotstate dict (if any). */
6767     if (slotstate != NULL) {
6768         PyObject *d_key, *d_value;
6769         Py_ssize_t i;
6770 
6771         if (!PyDict_Check(slotstate)) {
6772             PickleState *st = _Pickle_GetGlobalState();
6773             PyErr_SetString(st->UnpicklingError,
6774                             "slot state is not a dictionary");
6775             goto error;
6776         }
6777         i = 0;
6778         while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6779             if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6780                 goto error;
6781         }
6782     }
6783 
6784     if (0) {
6785   error:
6786         status = -1;
6787     }
6788 
6789     Py_DECREF(state);
6790     Py_XDECREF(slotstate);
6791     return status;
6792 }
6793 
6794 static int
load_mark(UnpicklerObject * self)6795 load_mark(UnpicklerObject *self)
6796 {
6797 
6798     /* Note that we split the (pickle.py) stack into two stacks, an
6799      * object stack and a mark stack. Here we push a mark onto the
6800      * mark stack.
6801      */
6802 
6803     if (self->num_marks >= self->marks_size) {
6804         size_t alloc = ((size_t)self->num_marks << 1) + 20;
6805         Py_ssize_t *marks_new = self->marks;
6806         PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6807         if (marks_new == NULL) {
6808             PyErr_NoMemory();
6809             return -1;
6810         }
6811         self->marks = marks_new;
6812         self->marks_size = (Py_ssize_t)alloc;
6813     }
6814 
6815     self->stack->mark_set = 1;
6816     self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6817 
6818     return 0;
6819 }
6820 
6821 static int
load_reduce(UnpicklerObject * self)6822 load_reduce(UnpicklerObject *self)
6823 {
6824     PyObject *callable = NULL;
6825     PyObject *argtup = NULL;
6826     PyObject *obj = NULL;
6827 
6828     PDATA_POP(self->stack, argtup);
6829     if (argtup == NULL)
6830         return -1;
6831     PDATA_POP(self->stack, callable);
6832     if (callable) {
6833         obj = PyObject_CallObject(callable, argtup);
6834         Py_DECREF(callable);
6835     }
6836     Py_DECREF(argtup);
6837 
6838     if (obj == NULL)
6839         return -1;
6840 
6841     PDATA_PUSH(self->stack, obj, -1);
6842     return 0;
6843 }
6844 
6845 /* Just raises an error if we don't know the protocol specified.  PROTO
6846  * is the first opcode for protocols >= 2.
6847  */
6848 static int
load_proto(UnpicklerObject * self)6849 load_proto(UnpicklerObject *self)
6850 {
6851     char *s;
6852     int i;
6853 
6854     if (_Unpickler_Read(self, &s, 1) < 0)
6855         return -1;
6856 
6857     i = (unsigned char)s[0];
6858     if (i <= HIGHEST_PROTOCOL) {
6859         self->proto = i;
6860         return 0;
6861     }
6862 
6863     PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6864     return -1;
6865 }
6866 
6867 static int
load_frame(UnpicklerObject * self)6868 load_frame(UnpicklerObject *self)
6869 {
6870     char *s;
6871     Py_ssize_t frame_len;
6872 
6873     if (_Unpickler_Read(self, &s, 8) < 0)
6874         return -1;
6875 
6876     frame_len = calc_binsize(s, 8);
6877     if (frame_len < 0) {
6878         PyErr_Format(PyExc_OverflowError,
6879                      "FRAME length exceeds system's maximum of %zd bytes",
6880                      PY_SSIZE_T_MAX);
6881         return -1;
6882     }
6883 
6884     if (_Unpickler_Read(self, &s, frame_len) < 0)
6885         return -1;
6886 
6887     /* Rewind to start of frame */
6888     self->next_read_idx -= frame_len;
6889     return 0;
6890 }
6891 
6892 static PyObject *
load(UnpicklerObject * self)6893 load(UnpicklerObject *self)
6894 {
6895     PyObject *value = NULL;
6896     char *s = NULL;
6897 
6898     self->num_marks = 0;
6899     self->stack->mark_set = 0;
6900     self->stack->fence = 0;
6901     self->proto = 0;
6902     if (Py_SIZE(self->stack))
6903         Pdata_clear(self->stack, 0);
6904 
6905     /* Convenient macros for the dispatch while-switch loop just below. */
6906 #define OP(opcode, load_func) \
6907     case opcode: if (load_func(self) < 0) break; continue;
6908 
6909 #define OP_ARG(opcode, load_func, arg) \
6910     case opcode: if (load_func(self, (arg)) < 0) break; continue;
6911 
6912     while (1) {
6913         if (_Unpickler_Read(self, &s, 1) < 0) {
6914             PickleState *st = _Pickle_GetGlobalState();
6915             if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6916                 PyErr_Format(PyExc_EOFError, "Ran out of input");
6917             }
6918             return NULL;
6919         }
6920 
6921         switch ((enum opcode)s[0]) {
6922         OP(NONE, load_none)
6923         OP(BININT, load_binint)
6924         OP(BININT1, load_binint1)
6925         OP(BININT2, load_binint2)
6926         OP(INT, load_int)
6927         OP(LONG, load_long)
6928         OP_ARG(LONG1, load_counted_long, 1)
6929         OP_ARG(LONG4, load_counted_long, 4)
6930         OP(FLOAT, load_float)
6931         OP(BINFLOAT, load_binfloat)
6932         OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6933         OP_ARG(BINBYTES, load_counted_binbytes, 4)
6934         OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6935         OP(BYTEARRAY8, load_counted_bytearray)
6936         OP(NEXT_BUFFER, load_next_buffer)
6937         OP(READONLY_BUFFER, load_readonly_buffer)
6938         OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6939         OP_ARG(BINSTRING, load_counted_binstring, 4)
6940         OP(STRING, load_string)
6941         OP(UNICODE, load_unicode)
6942         OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6943         OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6944         OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6945         OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6946         OP_ARG(TUPLE1, load_counted_tuple, 1)
6947         OP_ARG(TUPLE2, load_counted_tuple, 2)
6948         OP_ARG(TUPLE3, load_counted_tuple, 3)
6949         OP(TUPLE, load_tuple)
6950         OP(EMPTY_LIST, load_empty_list)
6951         OP(LIST, load_list)
6952         OP(EMPTY_DICT, load_empty_dict)
6953         OP(DICT, load_dict)
6954         OP(EMPTY_SET, load_empty_set)
6955         OP(ADDITEMS, load_additems)
6956         OP(FROZENSET, load_frozenset)
6957         OP(OBJ, load_obj)
6958         OP(INST, load_inst)
6959         OP(NEWOBJ, load_newobj)
6960         OP(NEWOBJ_EX, load_newobj_ex)
6961         OP(GLOBAL, load_global)
6962         OP(STACK_GLOBAL, load_stack_global)
6963         OP(APPEND, load_append)
6964         OP(APPENDS, load_appends)
6965         OP(BUILD, load_build)
6966         OP(DUP, load_dup)
6967         OP(BINGET, load_binget)
6968         OP(LONG_BINGET, load_long_binget)
6969         OP(GET, load_get)
6970         OP(MARK, load_mark)
6971         OP(BINPUT, load_binput)
6972         OP(LONG_BINPUT, load_long_binput)
6973         OP(PUT, load_put)
6974         OP(MEMOIZE, load_memoize)
6975         OP(POP, load_pop)
6976         OP(POP_MARK, load_pop_mark)
6977         OP(SETITEM, load_setitem)
6978         OP(SETITEMS, load_setitems)
6979         OP(PERSID, load_persid)
6980         OP(BINPERSID, load_binpersid)
6981         OP(REDUCE, load_reduce)
6982         OP(PROTO, load_proto)
6983         OP(FRAME, load_frame)
6984         OP_ARG(EXT1, load_extension, 1)
6985         OP_ARG(EXT2, load_extension, 2)
6986         OP_ARG(EXT4, load_extension, 4)
6987         OP_ARG(NEWTRUE, load_bool, Py_True)
6988         OP_ARG(NEWFALSE, load_bool, Py_False)
6989 
6990         case STOP:
6991             break;
6992 
6993         default:
6994             {
6995                 PickleState *st = _Pickle_GetGlobalState();
6996                 unsigned char c = (unsigned char) *s;
6997                 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6998                     PyErr_Format(st->UnpicklingError,
6999                                  "invalid load key, '%c'.", c);
7000                 }
7001                 else {
7002                     PyErr_Format(st->UnpicklingError,
7003                                  "invalid load key, '\\x%02x'.", c);
7004                 }
7005                 return NULL;
7006             }
7007         }
7008 
7009         break;                  /* and we are done! */
7010     }
7011 
7012     if (PyErr_Occurred()) {
7013         return NULL;
7014     }
7015 
7016     if (_Unpickler_SkipConsumed(self) < 0)
7017         return NULL;
7018 
7019     PDATA_POP(self->stack, value);
7020     return value;
7021 }
7022 
7023 /*[clinic input]
7024 
7025 _pickle.Unpickler.load
7026 
7027 Load a pickle.
7028 
7029 Read a pickled object representation from the open file object given
7030 in the constructor, and return the reconstituted object hierarchy
7031 specified therein.
7032 [clinic start generated code]*/
7033 
7034 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self)7035 _pickle_Unpickler_load_impl(UnpicklerObject *self)
7036 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
7037 {
7038     UnpicklerObject *unpickler = (UnpicklerObject*)self;
7039 
7040     /* Check whether the Unpickler was initialized correctly. This prevents
7041        segfaulting if a subclass overridden __init__ with a function that does
7042        not call Unpickler.__init__(). Here, we simply ensure that self->read
7043        is not NULL. */
7044     if (unpickler->read == NULL) {
7045         PickleState *st = _Pickle_GetGlobalState();
7046         PyErr_Format(st->UnpicklingError,
7047                      "Unpickler.__init__() was not called by %s.__init__()",
7048                      Py_TYPE(unpickler)->tp_name);
7049         return NULL;
7050     }
7051 
7052     return load(unpickler);
7053 }
7054 
7055 /* The name of find_class() is misleading. In newer pickle protocols, this
7056    function is used for loading any global (i.e., functions), not just
7057    classes. The name is kept only for backward compatibility. */
7058 
7059 /*[clinic input]
7060 
7061 _pickle.Unpickler.find_class
7062 
7063   module_name: object
7064   global_name: object
7065   /
7066 
7067 Return an object from a specified module.
7068 
7069 If necessary, the module will be imported. Subclasses may override
7070 this method (e.g. to restrict unpickling of arbitrary classes and
7071 functions).
7072 
7073 This method is called whenever a class or a function object is
7074 needed.  Both arguments passed are str objects.
7075 [clinic start generated code]*/
7076 
7077 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)7078 _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
7079                                   PyObject *module_name,
7080                                   PyObject *global_name)
7081 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
7082 {
7083     PyObject *global;
7084     PyObject *module;
7085 
7086     if (PySys_Audit("pickle.find_class", "OO",
7087                     module_name, global_name) < 0) {
7088         return NULL;
7089     }
7090 
7091     /* Try to map the old names used in Python 2.x to the new ones used in
7092        Python 3.x.  We do this only with old pickle protocols and when the
7093        user has not disabled the feature. */
7094     if (self->proto < 3 && self->fix_imports) {
7095         PyObject *key;
7096         PyObject *item;
7097         PickleState *st = _Pickle_GetGlobalState();
7098 
7099         /* Check if the global (i.e., a function or a class) was renamed
7100            or moved to another module. */
7101         key = PyTuple_Pack(2, module_name, global_name);
7102         if (key == NULL)
7103             return NULL;
7104         item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
7105         Py_DECREF(key);
7106         if (item) {
7107             if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7108                 PyErr_Format(PyExc_RuntimeError,
7109                              "_compat_pickle.NAME_MAPPING values should be "
7110                              "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7111                 return NULL;
7112             }
7113             module_name = PyTuple_GET_ITEM(item, 0);
7114             global_name = PyTuple_GET_ITEM(item, 1);
7115             if (!PyUnicode_Check(module_name) ||
7116                 !PyUnicode_Check(global_name)) {
7117                 PyErr_Format(PyExc_RuntimeError,
7118                              "_compat_pickle.NAME_MAPPING values should be "
7119                              "pairs of str, not (%.200s, %.200s)",
7120                              Py_TYPE(module_name)->tp_name,
7121                              Py_TYPE(global_name)->tp_name);
7122                 return NULL;
7123             }
7124         }
7125         else if (PyErr_Occurred()) {
7126             return NULL;
7127         }
7128         else {
7129             /* Check if the module was renamed. */
7130             item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7131             if (item) {
7132                 if (!PyUnicode_Check(item)) {
7133                     PyErr_Format(PyExc_RuntimeError,
7134                                 "_compat_pickle.IMPORT_MAPPING values should be "
7135                                 "strings, not %.200s", Py_TYPE(item)->tp_name);
7136                     return NULL;
7137                 }
7138                 module_name = item;
7139             }
7140             else if (PyErr_Occurred()) {
7141                 return NULL;
7142             }
7143         }
7144     }
7145 
7146     /*
7147      * we don't use PyImport_GetModule here, because it can return partially-
7148      * initialised modules, which then cause the getattribute to fail.
7149      */
7150     module = PyImport_Import(module_name);
7151     if (module == NULL) {
7152         return NULL;
7153     }
7154     global = getattribute(module, global_name, self->proto >= 4);
7155     Py_DECREF(module);
7156     return global;
7157 }
7158 
7159 /*[clinic input]
7160 
7161 _pickle.Unpickler.__sizeof__ -> Py_ssize_t
7162 
7163 Returns size in memory, in bytes.
7164 [clinic start generated code]*/
7165 
7166 static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)7167 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7168 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
7169 {
7170     Py_ssize_t res;
7171 
7172     res = _PyObject_SIZE(Py_TYPE(self));
7173     if (self->memo != NULL)
7174         res += self->memo_size * sizeof(PyObject *);
7175     if (self->marks != NULL)
7176         res += self->marks_size * sizeof(Py_ssize_t);
7177     if (self->input_line != NULL)
7178         res += strlen(self->input_line) + 1;
7179     if (self->encoding != NULL)
7180         res += strlen(self->encoding) + 1;
7181     if (self->errors != NULL)
7182         res += strlen(self->errors) + 1;
7183     return res;
7184 }
7185 
7186 static struct PyMethodDef Unpickler_methods[] = {
7187     _PICKLE_UNPICKLER_LOAD_METHODDEF
7188     _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
7189     _PICKLE_UNPICKLER___SIZEOF___METHODDEF
7190     {NULL, NULL}                /* sentinel */
7191 };
7192 
7193 static void
Unpickler_dealloc(UnpicklerObject * self)7194 Unpickler_dealloc(UnpicklerObject *self)
7195 {
7196     PyObject_GC_UnTrack((PyObject *)self);
7197     Py_XDECREF(self->readline);
7198     Py_XDECREF(self->readinto);
7199     Py_XDECREF(self->read);
7200     Py_XDECREF(self->peek);
7201     Py_XDECREF(self->stack);
7202     Py_XDECREF(self->pers_func);
7203     Py_XDECREF(self->buffers);
7204     if (self->buffer.buf != NULL) {
7205         PyBuffer_Release(&self->buffer);
7206         self->buffer.buf = NULL;
7207     }
7208 
7209     _Unpickler_MemoCleanup(self);
7210     PyMem_Free(self->marks);
7211     PyMem_Free(self->input_line);
7212     PyMem_Free(self->encoding);
7213     PyMem_Free(self->errors);
7214 
7215     Py_TYPE(self)->tp_free((PyObject *)self);
7216 }
7217 
7218 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)7219 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7220 {
7221     Py_VISIT(self->readline);
7222     Py_VISIT(self->readinto);
7223     Py_VISIT(self->read);
7224     Py_VISIT(self->peek);
7225     Py_VISIT(self->stack);
7226     Py_VISIT(self->pers_func);
7227     Py_VISIT(self->buffers);
7228     return 0;
7229 }
7230 
7231 static int
Unpickler_clear(UnpicklerObject * self)7232 Unpickler_clear(UnpicklerObject *self)
7233 {
7234     Py_CLEAR(self->readline);
7235     Py_CLEAR(self->readinto);
7236     Py_CLEAR(self->read);
7237     Py_CLEAR(self->peek);
7238     Py_CLEAR(self->stack);
7239     Py_CLEAR(self->pers_func);
7240     Py_CLEAR(self->buffers);
7241     if (self->buffer.buf != NULL) {
7242         PyBuffer_Release(&self->buffer);
7243         self->buffer.buf = NULL;
7244     }
7245 
7246     _Unpickler_MemoCleanup(self);
7247     PyMem_Free(self->marks);
7248     self->marks = NULL;
7249     PyMem_Free(self->input_line);
7250     self->input_line = NULL;
7251     PyMem_Free(self->encoding);
7252     self->encoding = NULL;
7253     PyMem_Free(self->errors);
7254     self->errors = NULL;
7255 
7256     return 0;
7257 }
7258 
7259 /*[clinic input]
7260 
7261 _pickle.Unpickler.__init__
7262 
7263   file: object
7264   *
7265   fix_imports: bool = True
7266   encoding: str = 'ASCII'
7267   errors: str = 'strict'
7268   buffers: object(c_default="NULL") = ()
7269 
7270 This takes a binary file for reading a pickle data stream.
7271 
7272 The protocol version of the pickle is detected automatically, so no
7273 protocol argument is needed.  Bytes past the pickled object's
7274 representation are ignored.
7275 
7276 The argument *file* must have two methods, a read() method that takes
7277 an integer argument, and a readline() method that requires no
7278 arguments.  Both methods should return bytes.  Thus *file* can be a
7279 binary file object opened for reading, an io.BytesIO object, or any
7280 other custom object that meets this interface.
7281 
7282 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7283 which are used to control compatibility support for pickle stream
7284 generated by Python 2.  If *fix_imports* is True, pickle will try to
7285 map the old Python 2 names to the new names used in Python 3.  The
7286 *encoding* and *errors* tell pickle how to decode 8-bit string
7287 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7288 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7289 string instances as bytes objects.
7290 [clinic start generated code]*/
7291 
7292 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7293 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7294                                 int fix_imports, const char *encoding,
7295                                 const char *errors, PyObject *buffers)
7296 /*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
7297 {
7298     _Py_IDENTIFIER(persistent_load);
7299 
7300     /* In case of multiple __init__() calls, clear previous content. */
7301     if (self->read != NULL)
7302         (void)Unpickler_clear(self);
7303 
7304     if (_Unpickler_SetInputStream(self, file) < 0)
7305         return -1;
7306 
7307     if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
7308         return -1;
7309 
7310     if (_Unpickler_SetBuffers(self, buffers) < 0)
7311         return -1;
7312 
7313     self->fix_imports = fix_imports;
7314 
7315     if (init_method_ref((PyObject *)self, &PyId_persistent_load,
7316                         &self->pers_func, &self->pers_func_self) < 0)
7317     {
7318         return -1;
7319     }
7320 
7321     self->stack = (Pdata *)Pdata_New();
7322     if (self->stack == NULL)
7323         return -1;
7324 
7325     self->memo_size = 32;
7326     self->memo = _Unpickler_NewMemo(self->memo_size);
7327     if (self->memo == NULL)
7328         return -1;
7329 
7330     self->proto = 0;
7331 
7332     return 0;
7333 }
7334 
7335 
7336 /* Define a proxy object for the Unpickler's internal memo object. This is to
7337  * avoid breaking code like:
7338  *  unpickler.memo.clear()
7339  * and
7340  *  unpickler.memo = saved_memo
7341  * Is this a good idea? Not really, but we don't want to break code that uses
7342  * it. Note that we don't implement the entire mapping API here. This is
7343  * intentional, as these should be treated as black-box implementation details.
7344  *
7345  * We do, however, have to implement pickling/unpickling support because of
7346  * real-world code like cvs2svn.
7347  */
7348 
7349 /*[clinic input]
7350 _pickle.UnpicklerMemoProxy.clear
7351 
7352 Remove all items from memo.
7353 [clinic start generated code]*/
7354 
7355 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)7356 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
7357 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
7358 {
7359     _Unpickler_MemoCleanup(self->unpickler);
7360     self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7361     if (self->unpickler->memo == NULL)
7362         return NULL;
7363     Py_RETURN_NONE;
7364 }
7365 
7366 /*[clinic input]
7367 _pickle.UnpicklerMemoProxy.copy
7368 
7369 Copy the memo to a new object.
7370 [clinic start generated code]*/
7371 
7372 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)7373 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
7374 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
7375 {
7376     size_t i;
7377     PyObject *new_memo = PyDict_New();
7378     if (new_memo == NULL)
7379         return NULL;
7380 
7381     for (i = 0; i < self->unpickler->memo_size; i++) {
7382         int status;
7383         PyObject *key, *value;
7384 
7385         value = self->unpickler->memo[i];
7386         if (value == NULL)
7387             continue;
7388 
7389         key = PyLong_FromSsize_t(i);
7390         if (key == NULL)
7391             goto error;
7392         status = PyDict_SetItem(new_memo, key, value);
7393         Py_DECREF(key);
7394         if (status < 0)
7395             goto error;
7396     }
7397     return new_memo;
7398 
7399 error:
7400     Py_DECREF(new_memo);
7401     return NULL;
7402 }
7403 
7404 /*[clinic input]
7405 _pickle.UnpicklerMemoProxy.__reduce__
7406 
7407 Implement pickling support.
7408 [clinic start generated code]*/
7409 
7410 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)7411 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
7412 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
7413 {
7414     PyObject *reduce_value;
7415     PyObject *constructor_args;
7416     PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
7417     if (contents == NULL)
7418         return NULL;
7419 
7420     reduce_value = PyTuple_New(2);
7421     if (reduce_value == NULL) {
7422         Py_DECREF(contents);
7423         return NULL;
7424     }
7425     constructor_args = PyTuple_New(1);
7426     if (constructor_args == NULL) {
7427         Py_DECREF(contents);
7428         Py_DECREF(reduce_value);
7429         return NULL;
7430     }
7431     PyTuple_SET_ITEM(constructor_args, 0, contents);
7432     Py_INCREF((PyObject *)&PyDict_Type);
7433     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
7434     PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7435     return reduce_value;
7436 }
7437 
7438 static PyMethodDef unpicklerproxy_methods[] = {
7439     _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7440     _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7441     _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
7442     {NULL, NULL}    /* sentinel */
7443 };
7444 
7445 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)7446 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7447 {
7448     PyObject_GC_UnTrack(self);
7449     Py_XDECREF(self->unpickler);
7450     PyObject_GC_Del((PyObject *)self);
7451 }
7452 
7453 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)7454 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7455                             visitproc visit, void *arg)
7456 {
7457     Py_VISIT(self->unpickler);
7458     return 0;
7459 }
7460 
7461 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)7462 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7463 {
7464     Py_CLEAR(self->unpickler);
7465     return 0;
7466 }
7467 
7468 static PyTypeObject UnpicklerMemoProxyType = {
7469     PyVarObject_HEAD_INIT(NULL, 0)
7470     "_pickle.UnpicklerMemoProxy",               /*tp_name*/
7471     sizeof(UnpicklerMemoProxyObject),           /*tp_basicsize*/
7472     0,
7473     (destructor)UnpicklerMemoProxy_dealloc,     /* tp_dealloc */
7474     0,                                          /* tp_vectorcall_offset */
7475     0,                                          /* tp_getattr */
7476     0,                                          /* tp_setattr */
7477     0,                                          /* tp_as_async */
7478     0,                                          /* tp_repr */
7479     0,                                          /* tp_as_number */
7480     0,                                          /* tp_as_sequence */
7481     0,                                          /* tp_as_mapping */
7482     PyObject_HashNotImplemented,                /* tp_hash */
7483     0,                                          /* tp_call */
7484     0,                                          /* tp_str */
7485     PyObject_GenericGetAttr,                    /* tp_getattro */
7486     PyObject_GenericSetAttr,                    /* tp_setattro */
7487     0,                                          /* tp_as_buffer */
7488     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7489     0,                                          /* tp_doc */
7490     (traverseproc)UnpicklerMemoProxy_traverse,  /* tp_traverse */
7491     (inquiry)UnpicklerMemoProxy_clear,          /* tp_clear */
7492     0,                                          /* tp_richcompare */
7493     0,                                          /* tp_weaklistoffset */
7494     0,                                          /* tp_iter */
7495     0,                                          /* tp_iternext */
7496     unpicklerproxy_methods,                     /* tp_methods */
7497 };
7498 
7499 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)7500 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7501 {
7502     UnpicklerMemoProxyObject *self;
7503 
7504     self = PyObject_GC_New(UnpicklerMemoProxyObject,
7505                            &UnpicklerMemoProxyType);
7506     if (self == NULL)
7507         return NULL;
7508     Py_INCREF(unpickler);
7509     self->unpickler = unpickler;
7510     PyObject_GC_Track(self);
7511     return (PyObject *)self;
7512 }
7513 
7514 /*****************************************************************************/
7515 
7516 
7517 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7518 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7519 {
7520     return UnpicklerMemoProxy_New(self);
7521 }
7522 
7523 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7524 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7525 {
7526     PyObject **new_memo;
7527     size_t new_memo_size = 0;
7528 
7529     if (obj == NULL) {
7530         PyErr_SetString(PyExc_TypeError,
7531                         "attribute deletion is not supported");
7532         return -1;
7533     }
7534 
7535     if (Py_IS_TYPE(obj, &UnpicklerMemoProxyType)) {
7536         UnpicklerObject *unpickler =
7537             ((UnpicklerMemoProxyObject *)obj)->unpickler;
7538 
7539         new_memo_size = unpickler->memo_size;
7540         new_memo = _Unpickler_NewMemo(new_memo_size);
7541         if (new_memo == NULL)
7542             return -1;
7543 
7544         for (size_t i = 0; i < new_memo_size; i++) {
7545             Py_XINCREF(unpickler->memo[i]);
7546             new_memo[i] = unpickler->memo[i];
7547         }
7548     }
7549     else if (PyDict_Check(obj)) {
7550         Py_ssize_t i = 0;
7551         PyObject *key, *value;
7552 
7553         new_memo_size = PyDict_GET_SIZE(obj);
7554         new_memo = _Unpickler_NewMemo(new_memo_size);
7555         if (new_memo == NULL)
7556             return -1;
7557 
7558         while (PyDict_Next(obj, &i, &key, &value)) {
7559             Py_ssize_t idx;
7560             if (!PyLong_Check(key)) {
7561                 PyErr_SetString(PyExc_TypeError,
7562                                 "memo key must be integers");
7563                 goto error;
7564             }
7565             idx = PyLong_AsSsize_t(key);
7566             if (idx == -1 && PyErr_Occurred())
7567                 goto error;
7568             if (idx < 0) {
7569                 PyErr_SetString(PyExc_ValueError,
7570                                 "memo key must be positive integers.");
7571                 goto error;
7572             }
7573             if (_Unpickler_MemoPut(self, idx, value) < 0)
7574                 goto error;
7575         }
7576     }
7577     else {
7578         PyErr_Format(PyExc_TypeError,
7579                      "'memo' attribute must be an UnpicklerMemoProxy object "
7580                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7581         return -1;
7582     }
7583 
7584     _Unpickler_MemoCleanup(self);
7585     self->memo_size = new_memo_size;
7586     self->memo = new_memo;
7587 
7588     return 0;
7589 
7590   error:
7591     if (new_memo_size) {
7592         for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7593             Py_XDECREF(new_memo[i]);
7594         }
7595         PyMem_FREE(new_memo);
7596     }
7597     return -1;
7598 }
7599 
7600 static PyObject *
Unpickler_get_persload(UnpicklerObject * self,void * Py_UNUSED (ignored))7601 Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
7602 {
7603     if (self->pers_func == NULL) {
7604         PyErr_SetString(PyExc_AttributeError, "persistent_load");
7605         return NULL;
7606     }
7607     return reconstruct_method(self->pers_func, self->pers_func_self);
7608 }
7609 
7610 static int
Unpickler_set_persload(UnpicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))7611 Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
7612 {
7613     if (value == NULL) {
7614         PyErr_SetString(PyExc_TypeError,
7615                         "attribute deletion is not supported");
7616         return -1;
7617     }
7618     if (!PyCallable_Check(value)) {
7619         PyErr_SetString(PyExc_TypeError,
7620                         "persistent_load must be a callable taking "
7621                         "one argument");
7622         return -1;
7623     }
7624 
7625     self->pers_func_self = NULL;
7626     Py_INCREF(value);
7627     Py_XSETREF(self->pers_func, value);
7628 
7629     return 0;
7630 }
7631 
7632 static PyGetSetDef Unpickler_getsets[] = {
7633     {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7634     {"persistent_load", (getter)Unpickler_get_persload,
7635                         (setter)Unpickler_set_persload},
7636     {NULL}
7637 };
7638 
7639 static PyTypeObject Unpickler_Type = {
7640     PyVarObject_HEAD_INIT(NULL, 0)
7641     "_pickle.Unpickler",                /*tp_name*/
7642     sizeof(UnpicklerObject),            /*tp_basicsize*/
7643     0,                                  /*tp_itemsize*/
7644     (destructor)Unpickler_dealloc,      /*tp_dealloc*/
7645     0,                                  /*tp_vectorcall_offset*/
7646     0,                                  /*tp_getattr*/
7647     0,                                  /*tp_setattr*/
7648     0,                                  /*tp_as_async*/
7649     0,                                  /*tp_repr*/
7650     0,                                  /*tp_as_number*/
7651     0,                                  /*tp_as_sequence*/
7652     0,                                  /*tp_as_mapping*/
7653     0,                                  /*tp_hash*/
7654     0,                                  /*tp_call*/
7655     0,                                  /*tp_str*/
7656     0,                                  /*tp_getattro*/
7657     0,                                  /*tp_setattro*/
7658     0,                                  /*tp_as_buffer*/
7659     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7660     _pickle_Unpickler___init____doc__,  /*tp_doc*/
7661     (traverseproc)Unpickler_traverse,   /*tp_traverse*/
7662     (inquiry)Unpickler_clear,           /*tp_clear*/
7663     0,                                  /*tp_richcompare*/
7664     0,                                  /*tp_weaklistoffset*/
7665     0,                                  /*tp_iter*/
7666     0,                                  /*tp_iternext*/
7667     Unpickler_methods,                  /*tp_methods*/
7668     0,                                  /*tp_members*/
7669     Unpickler_getsets,                  /*tp_getset*/
7670     0,                                  /*tp_base*/
7671     0,                                  /*tp_dict*/
7672     0,                                  /*tp_descr_get*/
7673     0,                                  /*tp_descr_set*/
7674     0,                                  /*tp_dictoffset*/
7675     _pickle_Unpickler___init__,         /*tp_init*/
7676     PyType_GenericAlloc,                /*tp_alloc*/
7677     PyType_GenericNew,                  /*tp_new*/
7678     PyObject_GC_Del,                    /*tp_free*/
7679     0,                                  /*tp_is_gc*/
7680 };
7681 
7682 /*[clinic input]
7683 
7684 _pickle.dump
7685 
7686   obj: object
7687   file: object
7688   protocol: object = None
7689   *
7690   fix_imports: bool = True
7691   buffer_callback: object = None
7692 
7693 Write a pickled representation of obj to the open file object file.
7694 
7695 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7696 be more efficient.
7697 
7698 The optional *protocol* argument tells the pickler to use the given
7699 protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
7700 protocol is 4. It was introduced in Python 3.4, and is incompatible
7701 with previous versions.
7702 
7703 Specifying a negative protocol version selects the highest protocol
7704 version supported.  The higher the protocol used, the more recent the
7705 version of Python needed to read the pickle produced.
7706 
7707 The *file* argument must have a write() method that accepts a single
7708 bytes argument.  It can thus be a file object opened for binary
7709 writing, an io.BytesIO instance, or any other custom object that meets
7710 this interface.
7711 
7712 If *fix_imports* is True and protocol is less than 3, pickle will try
7713 to map the new Python 3 names to the old module names used in Python
7714 2, so that the pickle data stream is readable with Python 2.
7715 
7716 If *buffer_callback* is None (the default), buffer views are serialized
7717 into *file* as part of the pickle stream.  It is an error if
7718 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7719 
7720 [clinic start generated code]*/
7721 
7722 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7723 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7724                   PyObject *protocol, int fix_imports,
7725                   PyObject *buffer_callback)
7726 /*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/
7727 {
7728     PicklerObject *pickler = _Pickler_New();
7729 
7730     if (pickler == NULL)
7731         return NULL;
7732 
7733     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7734         goto error;
7735 
7736     if (_Pickler_SetOutputStream(pickler, file) < 0)
7737         goto error;
7738 
7739     if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7740         goto error;
7741 
7742     if (dump(pickler, obj) < 0)
7743         goto error;
7744 
7745     if (_Pickler_FlushToFile(pickler) < 0)
7746         goto error;
7747 
7748     Py_DECREF(pickler);
7749     Py_RETURN_NONE;
7750 
7751   error:
7752     Py_XDECREF(pickler);
7753     return NULL;
7754 }
7755 
7756 /*[clinic input]
7757 
7758 _pickle.dumps
7759 
7760   obj: object
7761   protocol: object = None
7762   *
7763   fix_imports: bool = True
7764   buffer_callback: object = None
7765 
7766 Return the pickled representation of the object as a bytes object.
7767 
7768 The optional *protocol* argument tells the pickler to use the given
7769 protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
7770 protocol is 4. It was introduced in Python 3.4, and is incompatible
7771 with previous versions.
7772 
7773 Specifying a negative protocol version selects the highest protocol
7774 version supported.  The higher the protocol used, the more recent the
7775 version of Python needed to read the pickle produced.
7776 
7777 If *fix_imports* is True and *protocol* is less than 3, pickle will
7778 try to map the new Python 3 names to the old module names used in
7779 Python 2, so that the pickle data stream is readable with Python 2.
7780 
7781 If *buffer_callback* is None (the default), buffer views are serialized
7782 into *file* as part of the pickle stream.  It is an error if
7783 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7784 
7785 [clinic start generated code]*/
7786 
7787 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7788 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7789                    int fix_imports, PyObject *buffer_callback)
7790 /*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/
7791 {
7792     PyObject *result;
7793     PicklerObject *pickler = _Pickler_New();
7794 
7795     if (pickler == NULL)
7796         return NULL;
7797 
7798     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7799         goto error;
7800 
7801     if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7802         goto error;
7803 
7804     if (dump(pickler, obj) < 0)
7805         goto error;
7806 
7807     result = _Pickler_GetString(pickler);
7808     Py_DECREF(pickler);
7809     return result;
7810 
7811   error:
7812     Py_XDECREF(pickler);
7813     return NULL;
7814 }
7815 
7816 /*[clinic input]
7817 
7818 _pickle.load
7819 
7820   file: object
7821   *
7822   fix_imports: bool = True
7823   encoding: str = 'ASCII'
7824   errors: str = 'strict'
7825   buffers: object(c_default="NULL") = ()
7826 
7827 Read and return an object from the pickle data stored in a file.
7828 
7829 This is equivalent to ``Unpickler(file).load()``, but may be more
7830 efficient.
7831 
7832 The protocol version of the pickle is detected automatically, so no
7833 protocol argument is needed.  Bytes past the pickled object's
7834 representation are ignored.
7835 
7836 The argument *file* must have two methods, a read() method that takes
7837 an integer argument, and a readline() method that requires no
7838 arguments.  Both methods should return bytes.  Thus *file* can be a
7839 binary file object opened for reading, an io.BytesIO object, or any
7840 other custom object that meets this interface.
7841 
7842 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7843 which are used to control compatibility support for pickle stream
7844 generated by Python 2.  If *fix_imports* is True, pickle will try to
7845 map the old Python 2 names to the new names used in Python 3.  The
7846 *encoding* and *errors* tell pickle how to decode 8-bit string
7847 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7848 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7849 string instances as bytes objects.
7850 [clinic start generated code]*/
7851 
7852 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7853 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7854                   const char *encoding, const char *errors,
7855                   PyObject *buffers)
7856 /*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
7857 {
7858     PyObject *result;
7859     UnpicklerObject *unpickler = _Unpickler_New();
7860 
7861     if (unpickler == NULL)
7862         return NULL;
7863 
7864     if (_Unpickler_SetInputStream(unpickler, file) < 0)
7865         goto error;
7866 
7867     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7868         goto error;
7869 
7870     if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7871         goto error;
7872 
7873     unpickler->fix_imports = fix_imports;
7874 
7875     result = load(unpickler);
7876     Py_DECREF(unpickler);
7877     return result;
7878 
7879   error:
7880     Py_XDECREF(unpickler);
7881     return NULL;
7882 }
7883 
7884 /*[clinic input]
7885 
7886 _pickle.loads
7887 
7888   data: object
7889   /
7890   *
7891   fix_imports: bool = True
7892   encoding: str = 'ASCII'
7893   errors: str = 'strict'
7894   buffers: object(c_default="NULL") = ()
7895 
7896 Read and return an object from the given pickle data.
7897 
7898 The protocol version of the pickle is detected automatically, so no
7899 protocol argument is needed.  Bytes past the pickled object's
7900 representation are ignored.
7901 
7902 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7903 which are used to control compatibility support for pickle stream
7904 generated by Python 2.  If *fix_imports* is True, pickle will try to
7905 map the old Python 2 names to the new names used in Python 3.  The
7906 *encoding* and *errors* tell pickle how to decode 8-bit string
7907 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7908 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7909 string instances as bytes objects.
7910 [clinic start generated code]*/
7911 
7912 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7913 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7914                    const char *encoding, const char *errors,
7915                    PyObject *buffers)
7916 /*[clinic end generated code: output=82ac1e6b588e6d02 input=b3615540d0535087]*/
7917 {
7918     PyObject *result;
7919     UnpicklerObject *unpickler = _Unpickler_New();
7920 
7921     if (unpickler == NULL)
7922         return NULL;
7923 
7924     if (_Unpickler_SetStringInput(unpickler, data) < 0)
7925         goto error;
7926 
7927     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7928         goto error;
7929 
7930     if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7931         goto error;
7932 
7933     unpickler->fix_imports = fix_imports;
7934 
7935     result = load(unpickler);
7936     Py_DECREF(unpickler);
7937     return result;
7938 
7939   error:
7940     Py_XDECREF(unpickler);
7941     return NULL;
7942 }
7943 
7944 static struct PyMethodDef pickle_methods[] = {
7945     _PICKLE_DUMP_METHODDEF
7946     _PICKLE_DUMPS_METHODDEF
7947     _PICKLE_LOAD_METHODDEF
7948     _PICKLE_LOADS_METHODDEF
7949     {NULL, NULL} /* sentinel */
7950 };
7951 
7952 static int
pickle_clear(PyObject * m)7953 pickle_clear(PyObject *m)
7954 {
7955     _Pickle_ClearState(_Pickle_GetState(m));
7956     return 0;
7957 }
7958 
7959 static void
pickle_free(PyObject * m)7960 pickle_free(PyObject *m)
7961 {
7962     _Pickle_ClearState(_Pickle_GetState(m));
7963 }
7964 
7965 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7966 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7967 {
7968     PickleState *st = _Pickle_GetState(m);
7969     Py_VISIT(st->PickleError);
7970     Py_VISIT(st->PicklingError);
7971     Py_VISIT(st->UnpicklingError);
7972     Py_VISIT(st->dispatch_table);
7973     Py_VISIT(st->extension_registry);
7974     Py_VISIT(st->extension_cache);
7975     Py_VISIT(st->inverted_registry);
7976     Py_VISIT(st->name_mapping_2to3);
7977     Py_VISIT(st->import_mapping_2to3);
7978     Py_VISIT(st->name_mapping_3to2);
7979     Py_VISIT(st->import_mapping_3to2);
7980     Py_VISIT(st->codecs_encode);
7981     Py_VISIT(st->getattr);
7982     Py_VISIT(st->partial);
7983     return 0;
7984 }
7985 
7986 static struct PyModuleDef _picklemodule = {
7987     PyModuleDef_HEAD_INIT,
7988     "_pickle",            /* m_name */
7989     pickle_module_doc,    /* m_doc */
7990     sizeof(PickleState),  /* m_size */
7991     pickle_methods,       /* m_methods */
7992     NULL,                 /* m_reload */
7993     pickle_traverse,      /* m_traverse */
7994     pickle_clear,         /* m_clear */
7995     (freefunc)pickle_free /* m_free */
7996 };
7997 
7998 PyMODINIT_FUNC
PyInit__pickle(void)7999 PyInit__pickle(void)
8000 {
8001     PyObject *m;
8002     PickleState *st;
8003 
8004     m = PyState_FindModule(&_picklemodule);
8005     if (m) {
8006         Py_INCREF(m);
8007         return m;
8008     }
8009 
8010     if (PyType_Ready(&Pdata_Type) < 0)
8011         return NULL;
8012     if (PyType_Ready(&PicklerMemoProxyType) < 0)
8013         return NULL;
8014     if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
8015         return NULL;
8016 
8017     /* Create the module and add the functions. */
8018     m = PyModule_Create(&_picklemodule);
8019     if (m == NULL)
8020         return NULL;
8021 
8022     /* Add types */
8023     if (PyModule_AddType(m, &Pickler_Type) < 0) {
8024         return NULL;
8025     }
8026     if (PyModule_AddType(m, &Unpickler_Type) < 0) {
8027         return NULL;
8028     }
8029     if (PyModule_AddType(m, &PyPickleBuffer_Type) < 0) {
8030         return NULL;
8031     }
8032 
8033     st = _Pickle_GetState(m);
8034 
8035     /* Initialize the exceptions. */
8036     st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
8037     if (st->PickleError == NULL)
8038         return NULL;
8039     st->PicklingError = \
8040         PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
8041     if (st->PicklingError == NULL)
8042         return NULL;
8043     st->UnpicklingError = \
8044         PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
8045     if (st->UnpicklingError == NULL)
8046         return NULL;
8047 
8048     Py_INCREF(st->PickleError);
8049     if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
8050         return NULL;
8051     Py_INCREF(st->PicklingError);
8052     if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
8053         return NULL;
8054     Py_INCREF(st->UnpicklingError);
8055     if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
8056         return NULL;
8057 
8058     if (_Pickle_InitState(st) < 0)
8059         return NULL;
8060 
8061     return m;
8062 }
8063