• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* pickle accelerator C extensor: _pickle module.
2  *
3  * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4  * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5  * platforms. */
6 
7 #if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8 #  error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
9 #endif
10 
11 #include "Python.h"
12 #include "structmember.h"
13 
14 PyDoc_STRVAR(pickle_module_doc,
15 "Optimized C implementation for the Python pickle module.");
16 
17 /*[clinic input]
18 module _pickle
19 class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
20 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
21 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
22 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
23 [clinic start generated code]*/
24 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
25 
26 /* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
27    Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
28    already includes it. */
29 enum {
30     HIGHEST_PROTOCOL = 5,
31     DEFAULT_PROTOCOL = 4
32 };
33 
34 /* Pickle opcodes. These must be kept updated with pickle.py.
35    Extensive docs are in pickletools.py. */
36 enum opcode {
37     MARK            = '(',
38     STOP            = '.',
39     POP             = '0',
40     POP_MARK        = '1',
41     DUP             = '2',
42     FLOAT           = 'F',
43     INT             = 'I',
44     BININT          = 'J',
45     BININT1         = 'K',
46     LONG            = 'L',
47     BININT2         = 'M',
48     NONE            = 'N',
49     PERSID          = 'P',
50     BINPERSID       = 'Q',
51     REDUCE          = 'R',
52     STRING          = 'S',
53     BINSTRING       = 'T',
54     SHORT_BINSTRING = 'U',
55     UNICODE         = 'V',
56     BINUNICODE      = 'X',
57     APPEND          = 'a',
58     BUILD           = 'b',
59     GLOBAL          = 'c',
60     DICT            = 'd',
61     EMPTY_DICT      = '}',
62     APPENDS         = 'e',
63     GET             = 'g',
64     BINGET          = 'h',
65     INST            = 'i',
66     LONG_BINGET     = 'j',
67     LIST            = 'l',
68     EMPTY_LIST      = ']',
69     OBJ             = 'o',
70     PUT             = 'p',
71     BINPUT          = 'q',
72     LONG_BINPUT     = 'r',
73     SETITEM         = 's',
74     TUPLE           = 't',
75     EMPTY_TUPLE     = ')',
76     SETITEMS        = 'u',
77     BINFLOAT        = 'G',
78 
79     /* Protocol 2. */
80     PROTO       = '\x80',
81     NEWOBJ      = '\x81',
82     EXT1        = '\x82',
83     EXT2        = '\x83',
84     EXT4        = '\x84',
85     TUPLE1      = '\x85',
86     TUPLE2      = '\x86',
87     TUPLE3      = '\x87',
88     NEWTRUE     = '\x88',
89     NEWFALSE    = '\x89',
90     LONG1       = '\x8a',
91     LONG4       = '\x8b',
92 
93     /* Protocol 3 (Python 3.x) */
94     BINBYTES       = 'B',
95     SHORT_BINBYTES = 'C',
96 
97     /* Protocol 4 */
98     SHORT_BINUNICODE = '\x8c',
99     BINUNICODE8      = '\x8d',
100     BINBYTES8        = '\x8e',
101     EMPTY_SET        = '\x8f',
102     ADDITEMS         = '\x90',
103     FROZENSET        = '\x91',
104     NEWOBJ_EX        = '\x92',
105     STACK_GLOBAL     = '\x93',
106     MEMOIZE          = '\x94',
107     FRAME            = '\x95',
108 
109     /* Protocol 5 */
110     BYTEARRAY8       = '\x96',
111     NEXT_BUFFER      = '\x97',
112     READONLY_BUFFER  = '\x98'
113 };
114 
115 enum {
116    /* Keep in synch with pickle.Pickler._BATCHSIZE.  This is how many elements
117       batch_list/dict() pumps out before doing APPENDS/SETITEMS.  Nothing will
118       break if this gets out of synch with pickle.py, but it's unclear that would
119       help anything either. */
120     BATCHSIZE = 1000,
121 
122     /* Nesting limit until Pickler, when running in "fast mode", starts
123        checking for self-referential data-structures. */
124     FAST_NESTING_LIMIT = 50,
125 
126     /* Initial size of the write buffer of Pickler. */
127     WRITE_BUF_SIZE = 4096,
128 
129     /* Prefetch size when unpickling (disabled on unpeekable streams) */
130     PREFETCH = 8192 * 16,
131 
132     FRAME_SIZE_MIN = 4,
133     FRAME_SIZE_TARGET = 64 * 1024,
134     FRAME_HEADER_SIZE = 9
135 };
136 
137 /*************************************************************************/
138 
139 /* State of the pickle module, per PEP 3121. */
140 typedef struct {
141     /* Exception classes for pickle. */
142     PyObject *PickleError;
143     PyObject *PicklingError;
144     PyObject *UnpicklingError;
145 
146     /* copyreg.dispatch_table, {type_object: pickling_function} */
147     PyObject *dispatch_table;
148 
149     /* For the extension opcodes EXT1, EXT2 and EXT4. */
150 
151     /* copyreg._extension_registry, {(module_name, function_name): code} */
152     PyObject *extension_registry;
153     /* copyreg._extension_cache, {code: object} */
154     PyObject *extension_cache;
155     /* copyreg._inverted_registry, {code: (module_name, function_name)} */
156     PyObject *inverted_registry;
157 
158     /* Import mappings for compatibility with Python 2.x */
159 
160     /* _compat_pickle.NAME_MAPPING,
161        {(oldmodule, oldname): (newmodule, newname)} */
162     PyObject *name_mapping_2to3;
163     /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
164     PyObject *import_mapping_2to3;
165     /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
166     PyObject *name_mapping_3to2;
167     PyObject *import_mapping_3to2;
168 
169     /* codecs.encode, used for saving bytes in older protocols */
170     PyObject *codecs_encode;
171     /* builtins.getattr, used for saving nested names with protocol < 4 */
172     PyObject *getattr;
173     /* functools.partial, used for implementing __newobj_ex__ with protocols
174        2 and 3 */
175     PyObject *partial;
176 } PickleState;
177 
178 /* Forward declaration of the _pickle module definition. */
179 static struct PyModuleDef _picklemodule;
180 
181 /* Given a module object, get its per-module state. */
182 static PickleState *
_Pickle_GetState(PyObject * module)183 _Pickle_GetState(PyObject *module)
184 {
185     return (PickleState *)PyModule_GetState(module);
186 }
187 
188 /* Find the module instance imported in the currently running sub-interpreter
189    and get its state. */
190 static PickleState *
_Pickle_GetGlobalState(void)191 _Pickle_GetGlobalState(void)
192 {
193     return _Pickle_GetState(PyState_FindModule(&_picklemodule));
194 }
195 
196 /* Clear the given pickle module state. */
197 static void
_Pickle_ClearState(PickleState * st)198 _Pickle_ClearState(PickleState *st)
199 {
200     Py_CLEAR(st->PickleError);
201     Py_CLEAR(st->PicklingError);
202     Py_CLEAR(st->UnpicklingError);
203     Py_CLEAR(st->dispatch_table);
204     Py_CLEAR(st->extension_registry);
205     Py_CLEAR(st->extension_cache);
206     Py_CLEAR(st->inverted_registry);
207     Py_CLEAR(st->name_mapping_2to3);
208     Py_CLEAR(st->import_mapping_2to3);
209     Py_CLEAR(st->name_mapping_3to2);
210     Py_CLEAR(st->import_mapping_3to2);
211     Py_CLEAR(st->codecs_encode);
212     Py_CLEAR(st->getattr);
213     Py_CLEAR(st->partial);
214 }
215 
216 /* Initialize the given pickle module state. */
217 static int
_Pickle_InitState(PickleState * st)218 _Pickle_InitState(PickleState *st)
219 {
220     PyObject *copyreg = NULL;
221     PyObject *compat_pickle = NULL;
222     PyObject *codecs = NULL;
223     PyObject *functools = NULL;
224     _Py_IDENTIFIER(getattr);
225 
226     st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
227     if (st->getattr == NULL)
228         goto error;
229 
230     copyreg = PyImport_ImportModule("copyreg");
231     if (!copyreg)
232         goto error;
233     st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
234     if (!st->dispatch_table)
235         goto error;
236     if (!PyDict_CheckExact(st->dispatch_table)) {
237         PyErr_Format(PyExc_RuntimeError,
238                      "copyreg.dispatch_table should be a dict, not %.200s",
239                      Py_TYPE(st->dispatch_table)->tp_name);
240         goto error;
241     }
242     st->extension_registry = \
243         PyObject_GetAttrString(copyreg, "_extension_registry");
244     if (!st->extension_registry)
245         goto error;
246     if (!PyDict_CheckExact(st->extension_registry)) {
247         PyErr_Format(PyExc_RuntimeError,
248                      "copyreg._extension_registry should be a dict, "
249                      "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
250         goto error;
251     }
252     st->inverted_registry = \
253         PyObject_GetAttrString(copyreg, "_inverted_registry");
254     if (!st->inverted_registry)
255         goto error;
256     if (!PyDict_CheckExact(st->inverted_registry)) {
257         PyErr_Format(PyExc_RuntimeError,
258                      "copyreg._inverted_registry should be a dict, "
259                      "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
260         goto error;
261     }
262     st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
263     if (!st->extension_cache)
264         goto error;
265     if (!PyDict_CheckExact(st->extension_cache)) {
266         PyErr_Format(PyExc_RuntimeError,
267                      "copyreg._extension_cache should be a dict, "
268                      "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
269         goto error;
270     }
271     Py_CLEAR(copyreg);
272 
273     /* Load the 2.x -> 3.x stdlib module mapping tables */
274     compat_pickle = PyImport_ImportModule("_compat_pickle");
275     if (!compat_pickle)
276         goto error;
277     st->name_mapping_2to3 = \
278         PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
279     if (!st->name_mapping_2to3)
280         goto error;
281     if (!PyDict_CheckExact(st->name_mapping_2to3)) {
282         PyErr_Format(PyExc_RuntimeError,
283                      "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
284                      Py_TYPE(st->name_mapping_2to3)->tp_name);
285         goto error;
286     }
287     st->import_mapping_2to3 = \
288         PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
289     if (!st->import_mapping_2to3)
290         goto error;
291     if (!PyDict_CheckExact(st->import_mapping_2to3)) {
292         PyErr_Format(PyExc_RuntimeError,
293                      "_compat_pickle.IMPORT_MAPPING should be a dict, "
294                      "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
295         goto error;
296     }
297     /* ... and the 3.x -> 2.x mapping tables */
298     st->name_mapping_3to2 = \
299         PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
300     if (!st->name_mapping_3to2)
301         goto error;
302     if (!PyDict_CheckExact(st->name_mapping_3to2)) {
303         PyErr_Format(PyExc_RuntimeError,
304                      "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
305                      "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
306         goto error;
307     }
308     st->import_mapping_3to2 = \
309         PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
310     if (!st->import_mapping_3to2)
311         goto error;
312     if (!PyDict_CheckExact(st->import_mapping_3to2)) {
313         PyErr_Format(PyExc_RuntimeError,
314                      "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
315                      "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
316         goto error;
317     }
318     Py_CLEAR(compat_pickle);
319 
320     codecs = PyImport_ImportModule("codecs");
321     if (codecs == NULL)
322         goto error;
323     st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
324     if (st->codecs_encode == NULL) {
325         goto error;
326     }
327     if (!PyCallable_Check(st->codecs_encode)) {
328         PyErr_Format(PyExc_RuntimeError,
329                      "codecs.encode should be a callable, not %.200s",
330                      Py_TYPE(st->codecs_encode)->tp_name);
331         goto error;
332     }
333     Py_CLEAR(codecs);
334 
335     functools = PyImport_ImportModule("functools");
336     if (!functools)
337         goto error;
338     st->partial = PyObject_GetAttrString(functools, "partial");
339     if (!st->partial)
340         goto error;
341     Py_CLEAR(functools);
342 
343     return 0;
344 
345   error:
346     Py_CLEAR(copyreg);
347     Py_CLEAR(compat_pickle);
348     Py_CLEAR(codecs);
349     Py_CLEAR(functools);
350     _Pickle_ClearState(st);
351     return -1;
352 }
353 
354 /* Helper for calling a function with a single argument quickly.
355 
356    This function steals the reference of the given argument. */
357 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)358 _Pickle_FastCall(PyObject *func, PyObject *obj)
359 {
360     PyObject *result;
361 
362     result = PyObject_CallFunctionObjArgs(func, obj, NULL);
363     Py_DECREF(obj);
364     return result;
365 }
366 
367 /*************************************************************************/
368 
369 /* Retrieve and deconstruct a method for avoiding a reference cycle
370    (pickler -> bound method of pickler -> pickler) */
371 static int
init_method_ref(PyObject * self,_Py_Identifier * name,PyObject ** method_func,PyObject ** method_self)372 init_method_ref(PyObject *self, _Py_Identifier *name,
373                 PyObject **method_func, PyObject **method_self)
374 {
375     PyObject *func, *func2;
376     int ret;
377 
378     /* *method_func and *method_self should be consistent.  All refcount decrements
379        should be occurred after setting *method_self and *method_func. */
380     ret = _PyObject_LookupAttrId(self, name, &func);
381     if (func == NULL) {
382         *method_self = NULL;
383         Py_CLEAR(*method_func);
384         return ret;
385     }
386 
387     if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
388         /* Deconstruct a bound Python method */
389         func2 = PyMethod_GET_FUNCTION(func);
390         Py_INCREF(func2);
391         *method_self = self; /* borrowed */
392         Py_XSETREF(*method_func, func2);
393         Py_DECREF(func);
394         return 0;
395     }
396     else {
397         *method_self = NULL;
398         Py_XSETREF(*method_func, func);
399         return 0;
400     }
401 }
402 
403 /* Bind a method if it was deconstructed */
404 static PyObject *
reconstruct_method(PyObject * func,PyObject * self)405 reconstruct_method(PyObject *func, PyObject *self)
406 {
407     if (self) {
408         return PyMethod_New(func, self);
409     }
410     else {
411         Py_INCREF(func);
412         return func;
413     }
414 }
415 
416 static PyObject *
call_method(PyObject * func,PyObject * self,PyObject * obj)417 call_method(PyObject *func, PyObject *self, PyObject *obj)
418 {
419     if (self) {
420         return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
421     }
422     else {
423         return PyObject_CallFunctionObjArgs(func, obj, NULL);
424     }
425 }
426 
427 /*************************************************************************/
428 
429 /* Internal data type used as the unpickling stack. */
430 typedef struct {
431     PyObject_VAR_HEAD
432     PyObject **data;
433     int mark_set;          /* is MARK set? */
434     Py_ssize_t fence;      /* position of top MARK or 0 */
435     Py_ssize_t allocated;  /* number of slots in data allocated */
436 } Pdata;
437 
438 static void
Pdata_dealloc(Pdata * self)439 Pdata_dealloc(Pdata *self)
440 {
441     Py_ssize_t i = Py_SIZE(self);
442     while (--i >= 0) {
443         Py_DECREF(self->data[i]);
444     }
445     PyMem_FREE(self->data);
446     PyObject_Del(self);
447 }
448 
449 static PyTypeObject Pdata_Type = {
450     PyVarObject_HEAD_INIT(NULL, 0)
451     "_pickle.Pdata",              /*tp_name*/
452     sizeof(Pdata),                /*tp_basicsize*/
453     sizeof(PyObject *),           /*tp_itemsize*/
454     (destructor)Pdata_dealloc,    /*tp_dealloc*/
455 };
456 
457 static PyObject *
Pdata_New(void)458 Pdata_New(void)
459 {
460     Pdata *self;
461 
462     if (!(self = PyObject_New(Pdata, &Pdata_Type)))
463         return NULL;
464     Py_SIZE(self) = 0;
465     self->mark_set = 0;
466     self->fence = 0;
467     self->allocated = 8;
468     self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
469     if (self->data)
470         return (PyObject *)self;
471     Py_DECREF(self);
472     return PyErr_NoMemory();
473 }
474 
475 
476 /* Retain only the initial clearto items.  If clearto >= the current
477  * number of items, this is a (non-erroneous) NOP.
478  */
479 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)480 Pdata_clear(Pdata *self, Py_ssize_t clearto)
481 {
482     Py_ssize_t i = Py_SIZE(self);
483 
484     assert(clearto >= self->fence);
485     if (clearto >= i)
486         return 0;
487 
488     while (--i >= clearto) {
489         Py_CLEAR(self->data[i]);
490     }
491     Py_SIZE(self) = clearto;
492     return 0;
493 }
494 
495 static int
Pdata_grow(Pdata * self)496 Pdata_grow(Pdata *self)
497 {
498     PyObject **data = self->data;
499     size_t allocated = (size_t)self->allocated;
500     size_t new_allocated;
501 
502     new_allocated = (allocated >> 3) + 6;
503     /* check for integer overflow */
504     if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
505         goto nomemory;
506     new_allocated += allocated;
507     PyMem_RESIZE(data, PyObject *, new_allocated);
508     if (data == NULL)
509         goto nomemory;
510 
511     self->data = data;
512     self->allocated = (Py_ssize_t)new_allocated;
513     return 0;
514 
515   nomemory:
516     PyErr_NoMemory();
517     return -1;
518 }
519 
520 static int
Pdata_stack_underflow(Pdata * self)521 Pdata_stack_underflow(Pdata *self)
522 {
523     PickleState *st = _Pickle_GetGlobalState();
524     PyErr_SetString(st->UnpicklingError,
525                     self->mark_set ?
526                     "unexpected MARK found" :
527                     "unpickling stack underflow");
528     return -1;
529 }
530 
531 /* D is a Pdata*.  Pop the topmost element and store it into V, which
532  * must be an lvalue holding PyObject*.  On stack underflow, UnpicklingError
533  * is raised and V is set to NULL.
534  */
535 static PyObject *
Pdata_pop(Pdata * self)536 Pdata_pop(Pdata *self)
537 {
538     if (Py_SIZE(self) <= self->fence) {
539         Pdata_stack_underflow(self);
540         return NULL;
541     }
542     return self->data[--Py_SIZE(self)];
543 }
544 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
545 
546 static int
Pdata_push(Pdata * self,PyObject * obj)547 Pdata_push(Pdata *self, PyObject *obj)
548 {
549     if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
550         return -1;
551     }
552     self->data[Py_SIZE(self)++] = obj;
553     return 0;
554 }
555 
556 /* Push an object on stack, transferring its ownership to the stack. */
557 #define PDATA_PUSH(D, O, ER) do {                               \
558         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
559 
560 /* Push an object on stack, adding a new reference to the object. */
561 #define PDATA_APPEND(D, O, ER) do {                             \
562         Py_INCREF((O));                                         \
563         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
564 
565 static PyObject *
Pdata_poptuple(Pdata * self,Py_ssize_t start)566 Pdata_poptuple(Pdata *self, Py_ssize_t start)
567 {
568     PyObject *tuple;
569     Py_ssize_t len, i, j;
570 
571     if (start < self->fence) {
572         Pdata_stack_underflow(self);
573         return NULL;
574     }
575     len = Py_SIZE(self) - start;
576     tuple = PyTuple_New(len);
577     if (tuple == NULL)
578         return NULL;
579     for (i = start, j = 0; j < len; i++, j++)
580         PyTuple_SET_ITEM(tuple, j, self->data[i]);
581 
582     Py_SIZE(self) = start;
583     return tuple;
584 }
585 
586 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)587 Pdata_poplist(Pdata *self, Py_ssize_t start)
588 {
589     PyObject *list;
590     Py_ssize_t len, i, j;
591 
592     len = Py_SIZE(self) - start;
593     list = PyList_New(len);
594     if (list == NULL)
595         return NULL;
596     for (i = start, j = 0; j < len; i++, j++)
597         PyList_SET_ITEM(list, j, self->data[i]);
598 
599     Py_SIZE(self) = start;
600     return list;
601 }
602 
603 typedef struct {
604     PyObject *me_key;
605     Py_ssize_t me_value;
606 } PyMemoEntry;
607 
608 typedef struct {
609     size_t mt_mask;
610     size_t mt_used;
611     size_t mt_allocated;
612     PyMemoEntry *mt_table;
613 } PyMemoTable;
614 
615 typedef struct PicklerObject {
616     PyObject_HEAD
617     PyMemoTable *memo;          /* Memo table, keep track of the seen
618                                    objects to support self-referential objects
619                                    pickling. */
620     PyObject *pers_func;        /* persistent_id() method, can be NULL */
621     PyObject *pers_func_self;   /* borrowed reference to self if pers_func
622                                    is an unbound method, NULL otherwise */
623     PyObject *dispatch_table;   /* private dispatch_table, can be NULL */
624     PyObject *reducer_override; /* hook for invoking user-defined callbacks
625                                    instead of save_global when pickling
626                                    functions and classes*/
627 
628     PyObject *write;            /* write() method of the output stream. */
629     PyObject *output_buffer;    /* Write into a local bytearray buffer before
630                                    flushing to the stream. */
631     Py_ssize_t output_len;      /* Length of output_buffer. */
632     Py_ssize_t max_output_len;  /* Allocation size of output_buffer. */
633     int proto;                  /* Pickle protocol number, >= 0 */
634     int bin;                    /* Boolean, true if proto > 0 */
635     int framing;                /* True when framing is enabled, proto >= 4 */
636     Py_ssize_t frame_start;     /* Position in output_buffer where the
637                                    current frame begins. -1 if there
638                                    is no frame currently open. */
639 
640     Py_ssize_t buf_size;        /* Size of the current buffered pickle data */
641     int fast;                   /* Enable fast mode if set to a true value.
642                                    The fast mode disable the usage of memo,
643                                    therefore speeding the pickling process by
644                                    not generating superfluous PUT opcodes. It
645                                    should not be used if with self-referential
646                                    objects. */
647     int fast_nesting;
648     int fix_imports;            /* Indicate whether Pickler should fix
649                                    the name of globals for Python 2.x. */
650     PyObject *fast_memo;
651     PyObject *buffer_callback;  /* Callback for out-of-band buffers, or NULL */
652 } PicklerObject;
653 
654 typedef struct UnpicklerObject {
655     PyObject_HEAD
656     Pdata *stack;               /* Pickle data stack, store unpickled objects. */
657 
658     /* The unpickler memo is just an array of PyObject *s. Using a dict
659        is unnecessary, since the keys are contiguous ints. */
660     PyObject **memo;
661     size_t memo_size;       /* Capacity of the memo array */
662     size_t memo_len;        /* Number of objects in the memo */
663 
664     PyObject *pers_func;        /* persistent_load() method, can be NULL. */
665     PyObject *pers_func_self;   /* borrowed reference to self if pers_func
666                                    is an unbound method, NULL otherwise */
667 
668     Py_buffer buffer;
669     char *input_buffer;
670     char *input_line;
671     Py_ssize_t input_len;
672     Py_ssize_t next_read_idx;
673     Py_ssize_t prefetched_idx;  /* index of first prefetched byte */
674 
675     PyObject *read;             /* read() method of the input stream. */
676     PyObject *readinto;         /* readinto() method of the input stream. */
677     PyObject *readline;         /* readline() method of the input stream. */
678     PyObject *peek;             /* peek() method of the input stream, or NULL */
679     PyObject *buffers;          /* iterable of out-of-band buffers, or NULL */
680 
681     char *encoding;             /* Name of the encoding to be used for
682                                    decoding strings pickled using Python
683                                    2.x. The default value is "ASCII" */
684     char *errors;               /* Name of errors handling scheme to used when
685                                    decoding strings. The default value is
686                                    "strict". */
687     Py_ssize_t *marks;          /* Mark stack, used for unpickling container
688                                    objects. */
689     Py_ssize_t num_marks;       /* Number of marks in the mark stack. */
690     Py_ssize_t marks_size;      /* Current allocated size of the mark stack. */
691     int proto;                  /* Protocol of the pickle loaded. */
692     int fix_imports;            /* Indicate whether Unpickler should fix
693                                    the name of globals pickled by Python 2.x. */
694 } UnpicklerObject;
695 
696 typedef struct {
697     PyObject_HEAD
698     PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
699 }  PicklerMemoProxyObject;
700 
701 typedef struct {
702     PyObject_HEAD
703     UnpicklerObject *unpickler;
704 } UnpicklerMemoProxyObject;
705 
706 /* Forward declarations */
707 static int save(PicklerObject *, PyObject *, int);
708 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
709 static PyTypeObject Pickler_Type;
710 static PyTypeObject Unpickler_Type;
711 
712 #include "clinic/_pickle.c.h"
713 
714 /*************************************************************************
715  A custom hashtable mapping void* to Python ints. This is used by the pickler
716  for memoization. Using a custom hashtable rather than PyDict allows us to skip
717  a bunch of unnecessary object creation. This makes a huge performance
718  difference. */
719 
720 #define MT_MINSIZE 8
721 #define PERTURB_SHIFT 5
722 
723 
724 static PyMemoTable *
PyMemoTable_New(void)725 PyMemoTable_New(void)
726 {
727     PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
728     if (memo == NULL) {
729         PyErr_NoMemory();
730         return NULL;
731     }
732 
733     memo->mt_used = 0;
734     memo->mt_allocated = MT_MINSIZE;
735     memo->mt_mask = MT_MINSIZE - 1;
736     memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
737     if (memo->mt_table == NULL) {
738         PyMem_FREE(memo);
739         PyErr_NoMemory();
740         return NULL;
741     }
742     memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
743 
744     return memo;
745 }
746 
747 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)748 PyMemoTable_Copy(PyMemoTable *self)
749 {
750     PyMemoTable *new = PyMemoTable_New();
751     if (new == NULL)
752         return NULL;
753 
754     new->mt_used = self->mt_used;
755     new->mt_allocated = self->mt_allocated;
756     new->mt_mask = self->mt_mask;
757     /* The table we get from _New() is probably smaller than we wanted.
758        Free it and allocate one that's the right size. */
759     PyMem_FREE(new->mt_table);
760     new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
761     if (new->mt_table == NULL) {
762         PyMem_FREE(new);
763         PyErr_NoMemory();
764         return NULL;
765     }
766     for (size_t i = 0; i < self->mt_allocated; i++) {
767         Py_XINCREF(self->mt_table[i].me_key);
768     }
769     memcpy(new->mt_table, self->mt_table,
770            sizeof(PyMemoEntry) * self->mt_allocated);
771 
772     return new;
773 }
774 
775 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)776 PyMemoTable_Size(PyMemoTable *self)
777 {
778     return self->mt_used;
779 }
780 
781 static int
PyMemoTable_Clear(PyMemoTable * self)782 PyMemoTable_Clear(PyMemoTable *self)
783 {
784     Py_ssize_t i = self->mt_allocated;
785 
786     while (--i >= 0) {
787         Py_XDECREF(self->mt_table[i].me_key);
788     }
789     self->mt_used = 0;
790     memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
791     return 0;
792 }
793 
794 static void
PyMemoTable_Del(PyMemoTable * self)795 PyMemoTable_Del(PyMemoTable *self)
796 {
797     if (self == NULL)
798         return;
799     PyMemoTable_Clear(self);
800 
801     PyMem_FREE(self->mt_table);
802     PyMem_FREE(self);
803 }
804 
805 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
806    can be considerably simpler than dictobject.c's lookdict(). */
807 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)808 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
809 {
810     size_t i;
811     size_t perturb;
812     size_t mask = self->mt_mask;
813     PyMemoEntry *table = self->mt_table;
814     PyMemoEntry *entry;
815     Py_hash_t hash = (Py_hash_t)key >> 3;
816 
817     i = hash & mask;
818     entry = &table[i];
819     if (entry->me_key == NULL || entry->me_key == key)
820         return entry;
821 
822     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
823         i = (i << 2) + i + perturb + 1;
824         entry = &table[i & mask];
825         if (entry->me_key == NULL || entry->me_key == key)
826             return entry;
827     }
828     Py_UNREACHABLE();
829 }
830 
831 /* Returns -1 on failure, 0 on success. */
832 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)833 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
834 {
835     PyMemoEntry *oldtable = NULL;
836     PyMemoEntry *oldentry, *newentry;
837     size_t new_size = MT_MINSIZE;
838     size_t to_process;
839 
840     assert(min_size > 0);
841 
842     if (min_size > PY_SSIZE_T_MAX) {
843         PyErr_NoMemory();
844         return -1;
845     }
846 
847     /* Find the smallest valid table size >= min_size. */
848     while (new_size < min_size) {
849         new_size <<= 1;
850     }
851     /* new_size needs to be a power of two. */
852     assert((new_size & (new_size - 1)) == 0);
853 
854     /* Allocate new table. */
855     oldtable = self->mt_table;
856     self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
857     if (self->mt_table == NULL) {
858         self->mt_table = oldtable;
859         PyErr_NoMemory();
860         return -1;
861     }
862     self->mt_allocated = new_size;
863     self->mt_mask = new_size - 1;
864     memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
865 
866     /* Copy entries from the old table. */
867     to_process = self->mt_used;
868     for (oldentry = oldtable; to_process > 0; oldentry++) {
869         if (oldentry->me_key != NULL) {
870             to_process--;
871             /* newentry is a pointer to a chunk of the new
872                mt_table, so we're setting the key:value pair
873                in-place. */
874             newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
875             newentry->me_key = oldentry->me_key;
876             newentry->me_value = oldentry->me_value;
877         }
878     }
879 
880     /* Deallocate the old table. */
881     PyMem_FREE(oldtable);
882     return 0;
883 }
884 
885 /* Returns NULL on failure, a pointer to the value otherwise. */
886 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)887 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
888 {
889     PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
890     if (entry->me_key == NULL)
891         return NULL;
892     return &entry->me_value;
893 }
894 
895 /* Returns -1 on failure, 0 on success. */
896 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)897 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
898 {
899     PyMemoEntry *entry;
900 
901     assert(key != NULL);
902 
903     entry = _PyMemoTable_Lookup(self, key);
904     if (entry->me_key != NULL) {
905         entry->me_value = value;
906         return 0;
907     }
908     Py_INCREF(key);
909     entry->me_key = key;
910     entry->me_value = value;
911     self->mt_used++;
912 
913     /* If we added a key, we can safely resize. Otherwise just return!
914      * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
915      *
916      * Quadrupling the size improves average table sparseness
917      * (reducing collisions) at the cost of some memory. It also halves
918      * the number of expensive resize operations in a growing memo table.
919      *
920      * Very large memo tables (over 50K items) use doubling instead.
921      * This may help applications with severe memory constraints.
922      */
923     if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
924         return 0;
925     }
926     // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
927     size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
928     return _PyMemoTable_ResizeTable(self, desired_size);
929 }
930 
931 #undef MT_MINSIZE
932 #undef PERTURB_SHIFT
933 
934 /*************************************************************************/
935 
936 
937 static int
_Pickler_ClearBuffer(PicklerObject * self)938 _Pickler_ClearBuffer(PicklerObject *self)
939 {
940     Py_XSETREF(self->output_buffer,
941               PyBytes_FromStringAndSize(NULL, self->max_output_len));
942     if (self->output_buffer == NULL)
943         return -1;
944     self->output_len = 0;
945     self->frame_start = -1;
946     return 0;
947 }
948 
949 static void
_write_size64(char * out,size_t value)950 _write_size64(char *out, size_t value)
951 {
952     size_t i;
953 
954     Py_BUILD_ASSERT(sizeof(size_t) <= 8);
955 
956     for (i = 0; i < sizeof(size_t); i++) {
957         out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
958     }
959     for (i = sizeof(size_t); i < 8; i++) {
960         out[i] = 0;
961     }
962 }
963 
964 static int
_Pickler_CommitFrame(PicklerObject * self)965 _Pickler_CommitFrame(PicklerObject *self)
966 {
967     size_t frame_len;
968     char *qdata;
969 
970     if (!self->framing || self->frame_start == -1)
971         return 0;
972     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
973     qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
974     if (frame_len >= FRAME_SIZE_MIN) {
975         qdata[0] = FRAME;
976         _write_size64(qdata + 1, frame_len);
977     }
978     else {
979         memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
980         self->output_len -= FRAME_HEADER_SIZE;
981     }
982     self->frame_start = -1;
983     return 0;
984 }
985 
986 static PyObject *
_Pickler_GetString(PicklerObject * self)987 _Pickler_GetString(PicklerObject *self)
988 {
989     PyObject *output_buffer = self->output_buffer;
990 
991     assert(self->output_buffer != NULL);
992 
993     if (_Pickler_CommitFrame(self))
994         return NULL;
995 
996     self->output_buffer = NULL;
997     /* Resize down to exact size */
998     if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
999         return NULL;
1000     return output_buffer;
1001 }
1002 
1003 static int
_Pickler_FlushToFile(PicklerObject * self)1004 _Pickler_FlushToFile(PicklerObject *self)
1005 {
1006     PyObject *output, *result;
1007 
1008     assert(self->write != NULL);
1009 
1010     /* This will commit the frame first */
1011     output = _Pickler_GetString(self);
1012     if (output == NULL)
1013         return -1;
1014 
1015     result = _Pickle_FastCall(self->write, output);
1016     Py_XDECREF(result);
1017     return (result == NULL) ? -1 : 0;
1018 }
1019 
1020 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1021 _Pickler_OpcodeBoundary(PicklerObject *self)
1022 {
1023     Py_ssize_t frame_len;
1024 
1025     if (!self->framing || self->frame_start == -1) {
1026         return 0;
1027     }
1028     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1029     if (frame_len >= FRAME_SIZE_TARGET) {
1030         if(_Pickler_CommitFrame(self)) {
1031             return -1;
1032         }
1033         /* Flush the content of the committed frame to the underlying
1034          * file and reuse the pickler buffer for the next frame so as
1035          * to limit memory usage when dumping large complex objects to
1036          * a file.
1037          *
1038          * self->write is NULL when called via dumps.
1039          */
1040         if (self->write != NULL) {
1041             if (_Pickler_FlushToFile(self) < 0) {
1042                 return -1;
1043             }
1044             if (_Pickler_ClearBuffer(self) < 0) {
1045                 return -1;
1046             }
1047         }
1048     }
1049     return 0;
1050 }
1051 
1052 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1053 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1054 {
1055     Py_ssize_t i, n, required;
1056     char *buffer;
1057     int need_new_frame;
1058 
1059     assert(s != NULL);
1060     need_new_frame = (self->framing && self->frame_start == -1);
1061 
1062     if (need_new_frame)
1063         n = data_len + FRAME_HEADER_SIZE;
1064     else
1065         n = data_len;
1066 
1067     required = self->output_len + n;
1068     if (required > self->max_output_len) {
1069         /* Make place in buffer for the pickle chunk */
1070         if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1071             PyErr_NoMemory();
1072             return -1;
1073         }
1074         self->max_output_len = (self->output_len + n) / 2 * 3;
1075         if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1076             return -1;
1077     }
1078     buffer = PyBytes_AS_STRING(self->output_buffer);
1079     if (need_new_frame) {
1080         /* Setup new frame */
1081         Py_ssize_t frame_start = self->output_len;
1082         self->frame_start = frame_start;
1083         for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1084             /* Write an invalid value, for debugging */
1085             buffer[frame_start + i] = 0xFE;
1086         }
1087         self->output_len += FRAME_HEADER_SIZE;
1088     }
1089     if (data_len < 8) {
1090         /* This is faster than memcpy when the string is short. */
1091         for (i = 0; i < data_len; i++) {
1092             buffer[self->output_len + i] = s[i];
1093         }
1094     }
1095     else {
1096         memcpy(buffer + self->output_len, s, data_len);
1097     }
1098     self->output_len += data_len;
1099     return data_len;
1100 }
1101 
1102 static PicklerObject *
_Pickler_New(void)1103 _Pickler_New(void)
1104 {
1105     PicklerObject *self;
1106 
1107     self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1108     if (self == NULL)
1109         return NULL;
1110 
1111     self->pers_func = NULL;
1112     self->dispatch_table = NULL;
1113     self->buffer_callback = NULL;
1114     self->write = NULL;
1115     self->proto = 0;
1116     self->bin = 0;
1117     self->framing = 0;
1118     self->frame_start = -1;
1119     self->fast = 0;
1120     self->fast_nesting = 0;
1121     self->fix_imports = 0;
1122     self->fast_memo = NULL;
1123     self->max_output_len = WRITE_BUF_SIZE;
1124     self->output_len = 0;
1125     self->reducer_override = NULL;
1126 
1127     self->memo = PyMemoTable_New();
1128     self->output_buffer = PyBytes_FromStringAndSize(NULL,
1129                                                     self->max_output_len);
1130 
1131     if (self->memo == NULL || self->output_buffer == NULL) {
1132         Py_DECREF(self);
1133         return NULL;
1134     }
1135 
1136     PyObject_GC_Track(self);
1137     return self;
1138 }
1139 
1140 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1141 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1142 {
1143     long proto;
1144 
1145     if (protocol == Py_None) {
1146         proto = DEFAULT_PROTOCOL;
1147     }
1148     else {
1149         proto = PyLong_AsLong(protocol);
1150         if (proto < 0) {
1151             if (proto == -1 && PyErr_Occurred())
1152                 return -1;
1153             proto = HIGHEST_PROTOCOL;
1154         }
1155         else if (proto > HIGHEST_PROTOCOL) {
1156             PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1157                          HIGHEST_PROTOCOL);
1158             return -1;
1159         }
1160     }
1161     self->proto = (int)proto;
1162     self->bin = proto > 0;
1163     self->fix_imports = fix_imports && proto < 3;
1164     return 0;
1165 }
1166 
1167 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1168    be called once on a freshly created Pickler. */
1169 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1170 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1171 {
1172     _Py_IDENTIFIER(write);
1173     assert(file != NULL);
1174     if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1175         return -1;
1176     }
1177     if (self->write == NULL) {
1178         PyErr_SetString(PyExc_TypeError,
1179                         "file must have a 'write' attribute");
1180         return -1;
1181     }
1182 
1183     return 0;
1184 }
1185 
1186 static int
_Pickler_SetBufferCallback(PicklerObject * self,PyObject * buffer_callback)1187 _Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1188 {
1189     if (buffer_callback == Py_None) {
1190         buffer_callback = NULL;
1191     }
1192     if (buffer_callback != NULL && self->proto < 5) {
1193         PyErr_SetString(PyExc_ValueError,
1194                         "buffer_callback needs protocol >= 5");
1195         return -1;
1196     }
1197 
1198     Py_XINCREF(buffer_callback);
1199     self->buffer_callback = buffer_callback;
1200     return 0;
1201 }
1202 
1203 /* Returns the size of the input on success, -1 on failure. This takes its
1204    own reference to `input`. */
1205 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1206 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1207 {
1208     if (self->buffer.buf != NULL)
1209         PyBuffer_Release(&self->buffer);
1210     if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1211         return -1;
1212     self->input_buffer = self->buffer.buf;
1213     self->input_len = self->buffer.len;
1214     self->next_read_idx = 0;
1215     self->prefetched_idx = self->input_len;
1216     return self->input_len;
1217 }
1218 
1219 static int
bad_readline(void)1220 bad_readline(void)
1221 {
1222     PickleState *st = _Pickle_GetGlobalState();
1223     PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1224     return -1;
1225 }
1226 
1227 /* Skip any consumed data that was only prefetched using peek() */
1228 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1229 _Unpickler_SkipConsumed(UnpicklerObject *self)
1230 {
1231     Py_ssize_t consumed;
1232     PyObject *r;
1233 
1234     consumed = self->next_read_idx - self->prefetched_idx;
1235     if (consumed <= 0)
1236         return 0;
1237 
1238     assert(self->peek);  /* otherwise we did something wrong */
1239     /* This makes a useless copy... */
1240     r = PyObject_CallFunction(self->read, "n", consumed);
1241     if (r == NULL)
1242         return -1;
1243     Py_DECREF(r);
1244 
1245     self->prefetched_idx = self->next_read_idx;
1246     return 0;
1247 }
1248 
1249 static const Py_ssize_t READ_WHOLE_LINE = -1;
1250 
1251 /* If reading from a file, we need to only pull the bytes we need, since there
1252    may be multiple pickle objects arranged contiguously in the same input
1253    buffer.
1254 
1255    If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1256    bytes from the input stream/buffer.
1257 
1258    Update the unpickler's input buffer with the newly-read data. Returns -1 on
1259    failure; on success, returns the number of bytes read from the file.
1260 
1261    On success, self->input_len will be 0; this is intentional so that when
1262    unpickling from a file, the "we've run out of data" code paths will trigger,
1263    causing the Unpickler to go back to the file for more data. Use the returned
1264    size to tell you how much data you can process. */
1265 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1266 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1267 {
1268     PyObject *data;
1269     Py_ssize_t read_size;
1270 
1271     assert(self->read != NULL);
1272 
1273     if (_Unpickler_SkipConsumed(self) < 0)
1274         return -1;
1275 
1276     if (n == READ_WHOLE_LINE) {
1277         data = _PyObject_CallNoArg(self->readline);
1278     }
1279     else {
1280         PyObject *len;
1281         /* Prefetch some data without advancing the file pointer, if possible */
1282         if (self->peek && n < PREFETCH) {
1283             len = PyLong_FromSsize_t(PREFETCH);
1284             if (len == NULL)
1285                 return -1;
1286             data = _Pickle_FastCall(self->peek, len);
1287             if (data == NULL) {
1288                 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1289                     return -1;
1290                 /* peek() is probably not supported by the given file object */
1291                 PyErr_Clear();
1292                 Py_CLEAR(self->peek);
1293             }
1294             else {
1295                 read_size = _Unpickler_SetStringInput(self, data);
1296                 Py_DECREF(data);
1297                 self->prefetched_idx = 0;
1298                 if (n <= read_size)
1299                     return n;
1300             }
1301         }
1302         len = PyLong_FromSsize_t(n);
1303         if (len == NULL)
1304             return -1;
1305         data = _Pickle_FastCall(self->read, len);
1306     }
1307     if (data == NULL)
1308         return -1;
1309 
1310     read_size = _Unpickler_SetStringInput(self, data);
1311     Py_DECREF(data);
1312     return read_size;
1313 }
1314 
1315 /* Don't call it directly: use _Unpickler_Read() */
1316 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,char ** s,Py_ssize_t n)1317 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1318 {
1319     Py_ssize_t num_read;
1320 
1321     *s = NULL;
1322     if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1323         PickleState *st = _Pickle_GetGlobalState();
1324         PyErr_SetString(st->UnpicklingError,
1325                         "read would overflow (invalid bytecode)");
1326         return -1;
1327     }
1328 
1329     /* This case is handled by the _Unpickler_Read() macro for efficiency */
1330     assert(self->next_read_idx + n > self->input_len);
1331 
1332     if (!self->read)
1333         return bad_readline();
1334 
1335     /* Extend the buffer to satisfy desired size */
1336     num_read = _Unpickler_ReadFromFile(self, n);
1337     if (num_read < 0)
1338         return -1;
1339     if (num_read < n)
1340         return bad_readline();
1341     *s = self->input_buffer;
1342     self->next_read_idx = n;
1343     return n;
1344 }
1345 
1346 /* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1347  *
1348  * This should only be used for non-small data reads where potentially
1349  * avoiding a copy is beneficial.  This method does not try to prefetch
1350  * more data into the input buffer.
1351  *
1352  * _Unpickler_Read() is recommended in most cases.
1353  */
1354 static Py_ssize_t
_Unpickler_ReadInto(UnpicklerObject * self,char * buf,Py_ssize_t n)1355 _Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
1356 {
1357     assert(n != READ_WHOLE_LINE);
1358 
1359     /* Read from available buffer data, if any */
1360     Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1361     if (in_buffer > 0) {
1362         Py_ssize_t to_read = Py_MIN(in_buffer, n);
1363         memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1364         self->next_read_idx += to_read;
1365         buf += to_read;
1366         n -= to_read;
1367         if (n == 0) {
1368             /* Entire read was satisfied from buffer */
1369             return n;
1370         }
1371     }
1372 
1373     /* Read from file */
1374     if (!self->readinto) {
1375         return bad_readline();
1376     }
1377     if (_Unpickler_SkipConsumed(self) < 0) {
1378         return -1;
1379     }
1380 
1381     /* Call readinto() into user buffer */
1382     PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1383     if (buf_obj == NULL) {
1384         return -1;
1385     }
1386     PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1387     if (read_size_obj == NULL) {
1388         return -1;
1389     }
1390     Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1391     Py_DECREF(read_size_obj);
1392 
1393     if (read_size < 0) {
1394         if (!PyErr_Occurred()) {
1395             PyErr_SetString(PyExc_ValueError,
1396                             "readinto() returned negative size");
1397         }
1398         return -1;
1399     }
1400     if (read_size < n) {
1401         return bad_readline();
1402     }
1403     return n;
1404 }
1405 
1406 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1407 
1408    This should be used for all data reads, rather than accessing the unpickler's
1409    input buffer directly. This method deals correctly with reading from input
1410    streams, which the input buffer doesn't deal with.
1411 
1412    Note that when reading from a file-like object, self->next_read_idx won't
1413    be updated (it should remain at 0 for the entire unpickling process). You
1414    should use this function's return value to know how many bytes you can
1415    consume.
1416 
1417    Returns -1 (with an exception set) on failure. On success, return the
1418    number of chars read. */
1419 #define _Unpickler_Read(self, s, n) \
1420     (((n) <= (self)->input_len - (self)->next_read_idx)      \
1421      ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1422         (self)->next_read_idx += (n),                        \
1423         (n))                                                 \
1424      : _Unpickler_ReadImpl(self, (s), (n)))
1425 
1426 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1427 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1428                     char **result)
1429 {
1430     char *input_line = PyMem_Realloc(self->input_line, len + 1);
1431     if (input_line == NULL) {
1432         PyErr_NoMemory();
1433         return -1;
1434     }
1435 
1436     memcpy(input_line, line, len);
1437     input_line[len] = '\0';
1438     self->input_line = input_line;
1439     *result = self->input_line;
1440     return len;
1441 }
1442 
1443 /* Read a line from the input stream/buffer. If we run off the end of the input
1444    before hitting \n, raise an error.
1445 
1446    Returns the number of chars read, or -1 on failure. */
1447 static Py_ssize_t
_Unpickler_Readline(UnpicklerObject * self,char ** result)1448 _Unpickler_Readline(UnpicklerObject *self, char **result)
1449 {
1450     Py_ssize_t i, num_read;
1451 
1452     for (i = self->next_read_idx; i < self->input_len; i++) {
1453         if (self->input_buffer[i] == '\n') {
1454             char *line_start = self->input_buffer + self->next_read_idx;
1455             num_read = i - self->next_read_idx + 1;
1456             self->next_read_idx = i + 1;
1457             return _Unpickler_CopyLine(self, line_start, num_read, result);
1458         }
1459     }
1460     if (!self->read)
1461         return bad_readline();
1462 
1463     num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1464     if (num_read < 0)
1465         return -1;
1466     if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1467         return bad_readline();
1468     self->next_read_idx = num_read;
1469     return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1470 }
1471 
1472 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1473    will be modified in place. */
1474 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1475 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1476 {
1477     size_t i;
1478 
1479     assert(new_size > self->memo_size);
1480 
1481     PyObject **memo_new = self->memo;
1482     PyMem_RESIZE(memo_new, PyObject *, new_size);
1483     if (memo_new == NULL) {
1484         PyErr_NoMemory();
1485         return -1;
1486     }
1487     self->memo = memo_new;
1488     for (i = self->memo_size; i < new_size; i++)
1489         self->memo[i] = NULL;
1490     self->memo_size = new_size;
1491     return 0;
1492 }
1493 
1494 /* Returns NULL if idx is out of bounds. */
1495 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1496 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1497 {
1498     if (idx >= self->memo_size)
1499         return NULL;
1500 
1501     return self->memo[idx];
1502 }
1503 
1504 /* Returns -1 (with an exception set) on failure, 0 on success.
1505    This takes its own reference to `value`. */
1506 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1507 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1508 {
1509     PyObject *old_item;
1510 
1511     if (idx >= self->memo_size) {
1512         if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1513             return -1;
1514         assert(idx < self->memo_size);
1515     }
1516     Py_INCREF(value);
1517     old_item = self->memo[idx];
1518     self->memo[idx] = value;
1519     if (old_item != NULL) {
1520         Py_DECREF(old_item);
1521     }
1522     else {
1523         self->memo_len++;
1524     }
1525     return 0;
1526 }
1527 
1528 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1529 _Unpickler_NewMemo(Py_ssize_t new_size)
1530 {
1531     PyObject **memo = PyMem_NEW(PyObject *, new_size);
1532     if (memo == NULL) {
1533         PyErr_NoMemory();
1534         return NULL;
1535     }
1536     memset(memo, 0, new_size * sizeof(PyObject *));
1537     return memo;
1538 }
1539 
1540 /* Free the unpickler's memo, taking care to decref any items left in it. */
1541 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1542 _Unpickler_MemoCleanup(UnpicklerObject *self)
1543 {
1544     Py_ssize_t i;
1545     PyObject **memo = self->memo;
1546 
1547     if (self->memo == NULL)
1548         return;
1549     self->memo = NULL;
1550     i = self->memo_size;
1551     while (--i >= 0) {
1552         Py_XDECREF(memo[i]);
1553     }
1554     PyMem_FREE(memo);
1555 }
1556 
1557 static UnpicklerObject *
_Unpickler_New(void)1558 _Unpickler_New(void)
1559 {
1560     UnpicklerObject *self;
1561 
1562     self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1563     if (self == NULL)
1564         return NULL;
1565 
1566     self->pers_func = NULL;
1567     self->input_buffer = NULL;
1568     self->input_line = NULL;
1569     self->input_len = 0;
1570     self->next_read_idx = 0;
1571     self->prefetched_idx = 0;
1572     self->read = NULL;
1573     self->readinto = NULL;
1574     self->readline = NULL;
1575     self->peek = NULL;
1576     self->buffers = NULL;
1577     self->encoding = NULL;
1578     self->errors = NULL;
1579     self->marks = NULL;
1580     self->num_marks = 0;
1581     self->marks_size = 0;
1582     self->proto = 0;
1583     self->fix_imports = 0;
1584     memset(&self->buffer, 0, sizeof(Py_buffer));
1585     self->memo_size = 32;
1586     self->memo_len = 0;
1587     self->memo = _Unpickler_NewMemo(self->memo_size);
1588     self->stack = (Pdata *)Pdata_New();
1589 
1590     if (self->memo == NULL || self->stack == NULL) {
1591         Py_DECREF(self);
1592         return NULL;
1593     }
1594 
1595     PyObject_GC_Track(self);
1596     return self;
1597 }
1598 
1599 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1600    be called once on a freshly created Unpickler. */
1601 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1602 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1603 {
1604     _Py_IDENTIFIER(peek);
1605     _Py_IDENTIFIER(read);
1606     _Py_IDENTIFIER(readinto);
1607     _Py_IDENTIFIER(readline);
1608 
1609     if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1610         return -1;
1611     }
1612     (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
1613     (void)_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto);
1614     (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
1615     if (!self->readline || !self->readinto || !self->read) {
1616         if (!PyErr_Occurred()) {
1617             PyErr_SetString(PyExc_TypeError,
1618                             "file must have 'read', 'readinto' and "
1619                             "'readline' attributes");
1620         }
1621         Py_CLEAR(self->read);
1622         Py_CLEAR(self->readinto);
1623         Py_CLEAR(self->readline);
1624         Py_CLEAR(self->peek);
1625         return -1;
1626     }
1627     return 0;
1628 }
1629 
1630 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1631    be called once on a freshly created Unpickler. */
1632 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1633 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1634                             const char *encoding,
1635                             const char *errors)
1636 {
1637     if (encoding == NULL)
1638         encoding = "ASCII";
1639     if (errors == NULL)
1640         errors = "strict";
1641 
1642     self->encoding = _PyMem_Strdup(encoding);
1643     self->errors = _PyMem_Strdup(errors);
1644     if (self->encoding == NULL || self->errors == NULL) {
1645         PyErr_NoMemory();
1646         return -1;
1647     }
1648     return 0;
1649 }
1650 
1651 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1652    be called once on a freshly created Unpickler. */
1653 static int
_Unpickler_SetBuffers(UnpicklerObject * self,PyObject * buffers)1654 _Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1655 {
1656     if (buffers == NULL || buffers == Py_None) {
1657         self->buffers = NULL;
1658     }
1659     else {
1660         self->buffers = PyObject_GetIter(buffers);
1661         if (self->buffers == NULL) {
1662             return -1;
1663         }
1664     }
1665     return 0;
1666 }
1667 
1668 /* Generate a GET opcode for an object stored in the memo. */
1669 static int
memo_get(PicklerObject * self,PyObject * key)1670 memo_get(PicklerObject *self, PyObject *key)
1671 {
1672     Py_ssize_t *value;
1673     char pdata[30];
1674     Py_ssize_t len;
1675 
1676     value = PyMemoTable_Get(self->memo, key);
1677     if (value == NULL)  {
1678         PyErr_SetObject(PyExc_KeyError, key);
1679         return -1;
1680     }
1681 
1682     if (!self->bin) {
1683         pdata[0] = GET;
1684         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1685                       "%" PY_FORMAT_SIZE_T "d\n", *value);
1686         len = strlen(pdata);
1687     }
1688     else {
1689         if (*value < 256) {
1690             pdata[0] = BINGET;
1691             pdata[1] = (unsigned char)(*value & 0xff);
1692             len = 2;
1693         }
1694         else if ((size_t)*value <= 0xffffffffUL) {
1695             pdata[0] = LONG_BINGET;
1696             pdata[1] = (unsigned char)(*value & 0xff);
1697             pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1698             pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1699             pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1700             len = 5;
1701         }
1702         else { /* unlikely */
1703             PickleState *st = _Pickle_GetGlobalState();
1704             PyErr_SetString(st->PicklingError,
1705                             "memo id too large for LONG_BINGET");
1706             return -1;
1707         }
1708     }
1709 
1710     if (_Pickler_Write(self, pdata, len) < 0)
1711         return -1;
1712 
1713     return 0;
1714 }
1715 
1716 /* Store an object in the memo, assign it a new unique ID based on the number
1717    of objects currently stored in the memo and generate a PUT opcode. */
1718 static int
memo_put(PicklerObject * self,PyObject * obj)1719 memo_put(PicklerObject *self, PyObject *obj)
1720 {
1721     char pdata[30];
1722     Py_ssize_t len;
1723     Py_ssize_t idx;
1724 
1725     const char memoize_op = MEMOIZE;
1726 
1727     if (self->fast)
1728         return 0;
1729 
1730     idx = PyMemoTable_Size(self->memo);
1731     if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1732         return -1;
1733 
1734     if (self->proto >= 4) {
1735         if (_Pickler_Write(self, &memoize_op, 1) < 0)
1736             return -1;
1737         return 0;
1738     }
1739     else if (!self->bin) {
1740         pdata[0] = PUT;
1741         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1742                       "%" PY_FORMAT_SIZE_T "d\n", idx);
1743         len = strlen(pdata);
1744     }
1745     else {
1746         if (idx < 256) {
1747             pdata[0] = BINPUT;
1748             pdata[1] = (unsigned char)idx;
1749             len = 2;
1750         }
1751         else if ((size_t)idx <= 0xffffffffUL) {
1752             pdata[0] = LONG_BINPUT;
1753             pdata[1] = (unsigned char)(idx & 0xff);
1754             pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1755             pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1756             pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1757             len = 5;
1758         }
1759         else { /* unlikely */
1760             PickleState *st = _Pickle_GetGlobalState();
1761             PyErr_SetString(st->PicklingError,
1762                             "memo id too large for LONG_BINPUT");
1763             return -1;
1764         }
1765     }
1766     if (_Pickler_Write(self, pdata, len) < 0)
1767         return -1;
1768 
1769     return 0;
1770 }
1771 
1772 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1773 get_dotted_path(PyObject *obj, PyObject *name)
1774 {
1775     _Py_static_string(PyId_dot, ".");
1776     PyObject *dotted_path;
1777     Py_ssize_t i, n;
1778 
1779     dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1780     if (dotted_path == NULL)
1781         return NULL;
1782     n = PyList_GET_SIZE(dotted_path);
1783     assert(n >= 1);
1784     for (i = 0; i < n; i++) {
1785         PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1786         if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1787             if (obj == NULL)
1788                 PyErr_Format(PyExc_AttributeError,
1789                              "Can't pickle local object %R", name);
1790             else
1791                 PyErr_Format(PyExc_AttributeError,
1792                              "Can't pickle local attribute %R on %R", name, obj);
1793             Py_DECREF(dotted_path);
1794             return NULL;
1795         }
1796     }
1797     return dotted_path;
1798 }
1799 
1800 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1801 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1802 {
1803     Py_ssize_t i, n;
1804     PyObject *parent = NULL;
1805 
1806     assert(PyList_CheckExact(names));
1807     Py_INCREF(obj);
1808     n = PyList_GET_SIZE(names);
1809     for (i = 0; i < n; i++) {
1810         PyObject *name = PyList_GET_ITEM(names, i);
1811         Py_XDECREF(parent);
1812         parent = obj;
1813         (void)_PyObject_LookupAttr(parent, name, &obj);
1814         if (obj == NULL) {
1815             Py_DECREF(parent);
1816             return NULL;
1817         }
1818     }
1819     if (pparent != NULL)
1820         *pparent = parent;
1821     else
1822         Py_XDECREF(parent);
1823     return obj;
1824 }
1825 
1826 
1827 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1828 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1829 {
1830     PyObject *dotted_path, *attr;
1831 
1832     if (allow_qualname) {
1833         dotted_path = get_dotted_path(obj, name);
1834         if (dotted_path == NULL)
1835             return NULL;
1836         attr = get_deep_attribute(obj, dotted_path, NULL);
1837         Py_DECREF(dotted_path);
1838     }
1839     else {
1840         (void)_PyObject_LookupAttr(obj, name, &attr);
1841     }
1842     if (attr == NULL && !PyErr_Occurred()) {
1843         PyErr_Format(PyExc_AttributeError,
1844                      "Can't get attribute %R on %R", name, obj);
1845     }
1846     return attr;
1847 }
1848 
1849 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1850 _checkmodule(PyObject *module_name, PyObject *module,
1851              PyObject *global, PyObject *dotted_path)
1852 {
1853     if (module == Py_None) {
1854         return -1;
1855     }
1856     if (PyUnicode_Check(module_name) &&
1857             _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1858         return -1;
1859     }
1860 
1861     PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1862     if (candidate == NULL) {
1863         return -1;
1864     }
1865     if (candidate != global) {
1866         Py_DECREF(candidate);
1867         return -1;
1868     }
1869     Py_DECREF(candidate);
1870     return 0;
1871 }
1872 
1873 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1874 whichmodule(PyObject *global, PyObject *dotted_path)
1875 {
1876     PyObject *module_name;
1877     PyObject *module = NULL;
1878     Py_ssize_t i;
1879     PyObject *modules;
1880     _Py_IDENTIFIER(__module__);
1881     _Py_IDENTIFIER(modules);
1882     _Py_IDENTIFIER(__main__);
1883 
1884     if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1885         return NULL;
1886     }
1887     if (module_name) {
1888         /* In some rare cases (e.g., bound methods of extension types),
1889            __module__ can be None. If it is so, then search sys.modules for
1890            the module of global. */
1891         if (module_name != Py_None)
1892             return module_name;
1893         Py_CLEAR(module_name);
1894     }
1895     assert(module_name == NULL);
1896 
1897     /* Fallback on walking sys.modules */
1898     modules = _PySys_GetObjectId(&PyId_modules);
1899     if (modules == NULL) {
1900         PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1901         return NULL;
1902     }
1903     if (PyDict_CheckExact(modules)) {
1904         i = 0;
1905         while (PyDict_Next(modules, &i, &module_name, &module)) {
1906             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1907                 Py_INCREF(module_name);
1908                 return module_name;
1909             }
1910             if (PyErr_Occurred()) {
1911                 return NULL;
1912             }
1913         }
1914     }
1915     else {
1916         PyObject *iterator = PyObject_GetIter(modules);
1917         if (iterator == NULL) {
1918             return NULL;
1919         }
1920         while ((module_name = PyIter_Next(iterator))) {
1921             module = PyObject_GetItem(modules, module_name);
1922             if (module == NULL) {
1923                 Py_DECREF(module_name);
1924                 Py_DECREF(iterator);
1925                 return NULL;
1926             }
1927             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1928                 Py_DECREF(module);
1929                 Py_DECREF(iterator);
1930                 return module_name;
1931             }
1932             Py_DECREF(module);
1933             Py_DECREF(module_name);
1934             if (PyErr_Occurred()) {
1935                 Py_DECREF(iterator);
1936                 return NULL;
1937             }
1938         }
1939         Py_DECREF(iterator);
1940     }
1941 
1942     /* If no module is found, use __main__. */
1943     module_name = _PyUnicode_FromId(&PyId___main__);
1944     Py_XINCREF(module_name);
1945     return module_name;
1946 }
1947 
1948 /* fast_save_enter() and fast_save_leave() are guards against recursive
1949    objects when Pickler is used with the "fast mode" (i.e., with object
1950    memoization disabled). If the nesting of a list or dict object exceed
1951    FAST_NESTING_LIMIT, these guards will start keeping an internal
1952    reference to the seen list or dict objects and check whether these objects
1953    are recursive. These are not strictly necessary, since save() has a
1954    hard-coded recursion limit, but they give a nicer error message than the
1955    typical RuntimeError. */
1956 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1957 fast_save_enter(PicklerObject *self, PyObject *obj)
1958 {
1959     /* if fast_nesting < 0, we're doing an error exit. */
1960     if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1961         PyObject *key = NULL;
1962         if (self->fast_memo == NULL) {
1963             self->fast_memo = PyDict_New();
1964             if (self->fast_memo == NULL) {
1965                 self->fast_nesting = -1;
1966                 return 0;
1967             }
1968         }
1969         key = PyLong_FromVoidPtr(obj);
1970         if (key == NULL) {
1971             self->fast_nesting = -1;
1972             return 0;
1973         }
1974         if (PyDict_GetItemWithError(self->fast_memo, key)) {
1975             Py_DECREF(key);
1976             PyErr_Format(PyExc_ValueError,
1977                          "fast mode: can't pickle cyclic objects "
1978                          "including object type %.200s at %p",
1979                          obj->ob_type->tp_name, obj);
1980             self->fast_nesting = -1;
1981             return 0;
1982         }
1983         if (PyErr_Occurred()) {
1984             Py_DECREF(key);
1985             self->fast_nesting = -1;
1986             return 0;
1987         }
1988         if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1989             Py_DECREF(key);
1990             self->fast_nesting = -1;
1991             return 0;
1992         }
1993         Py_DECREF(key);
1994     }
1995     return 1;
1996 }
1997 
1998 static int
fast_save_leave(PicklerObject * self,PyObject * obj)1999 fast_save_leave(PicklerObject *self, PyObject *obj)
2000 {
2001     if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2002         PyObject *key = PyLong_FromVoidPtr(obj);
2003         if (key == NULL)
2004             return 0;
2005         if (PyDict_DelItem(self->fast_memo, key) < 0) {
2006             Py_DECREF(key);
2007             return 0;
2008         }
2009         Py_DECREF(key);
2010     }
2011     return 1;
2012 }
2013 
2014 static int
save_none(PicklerObject * self,PyObject * obj)2015 save_none(PicklerObject *self, PyObject *obj)
2016 {
2017     const char none_op = NONE;
2018     if (_Pickler_Write(self, &none_op, 1) < 0)
2019         return -1;
2020 
2021     return 0;
2022 }
2023 
2024 static int
save_bool(PicklerObject * self,PyObject * obj)2025 save_bool(PicklerObject *self, PyObject *obj)
2026 {
2027     if (self->proto >= 2) {
2028         const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
2029         if (_Pickler_Write(self, &bool_op, 1) < 0)
2030             return -1;
2031     }
2032     else {
2033         /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2034          * so that unpicklers written before bools were introduced unpickle them
2035          * as ints, but unpicklers after can recognize that bools were intended.
2036          * Note that protocol 2 added direct ways to pickle bools.
2037          */
2038         const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2039         if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2040             return -1;
2041     }
2042     return 0;
2043 }
2044 
2045 static int
save_long(PicklerObject * self,PyObject * obj)2046 save_long(PicklerObject *self, PyObject *obj)
2047 {
2048     PyObject *repr = NULL;
2049     Py_ssize_t size;
2050     long val;
2051     int overflow;
2052     int status = 0;
2053 
2054     val= PyLong_AsLongAndOverflow(obj, &overflow);
2055     if (!overflow && (sizeof(long) <= 4 ||
2056             (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2057     {
2058         /* result fits in a signed 4-byte integer.
2059 
2060            Note: we can't use -0x80000000L in the above condition because some
2061            compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2062            before applying the unary minus when sizeof(long) <= 4. The
2063            resulting value stays unsigned which is commonly not what we want,
2064            so MSVC happily warns us about it.  However, that result would have
2065            been fine because we guard for sizeof(long) <= 4 which turns the
2066            condition true in that particular case. */
2067         char pdata[32];
2068         Py_ssize_t len = 0;
2069 
2070         if (self->bin) {
2071             pdata[1] = (unsigned char)(val & 0xff);
2072             pdata[2] = (unsigned char)((val >> 8) & 0xff);
2073             pdata[3] = (unsigned char)((val >> 16) & 0xff);
2074             pdata[4] = (unsigned char)((val >> 24) & 0xff);
2075 
2076             if ((pdata[4] != 0) || (pdata[3] != 0)) {
2077                 pdata[0] = BININT;
2078                 len = 5;
2079             }
2080             else if (pdata[2] != 0) {
2081                 pdata[0] = BININT2;
2082                 len = 3;
2083             }
2084             else {
2085                 pdata[0] = BININT1;
2086                 len = 2;
2087             }
2088         }
2089         else {
2090             sprintf(pdata, "%c%ld\n", INT,  val);
2091             len = strlen(pdata);
2092         }
2093         if (_Pickler_Write(self, pdata, len) < 0)
2094             return -1;
2095 
2096         return 0;
2097     }
2098     assert(!PyErr_Occurred());
2099 
2100     if (self->proto >= 2) {
2101         /* Linear-time pickling. */
2102         size_t nbits;
2103         size_t nbytes;
2104         unsigned char *pdata;
2105         char header[5];
2106         int i;
2107         int sign = _PyLong_Sign(obj);
2108 
2109         if (sign == 0) {
2110             header[0] = LONG1;
2111             header[1] = 0;      /* It's 0 -- an empty bytestring. */
2112             if (_Pickler_Write(self, header, 2) < 0)
2113                 goto error;
2114             return 0;
2115         }
2116         nbits = _PyLong_NumBits(obj);
2117         if (nbits == (size_t)-1 && PyErr_Occurred())
2118             goto error;
2119         /* How many bytes do we need?  There are nbits >> 3 full
2120          * bytes of data, and nbits & 7 leftover bits.  If there
2121          * are any leftover bits, then we clearly need another
2122          * byte.  What's not so obvious is that we *probably*
2123          * need another byte even if there aren't any leftovers:
2124          * the most-significant bit of the most-significant byte
2125          * acts like a sign bit, and it's usually got a sense
2126          * opposite of the one we need.  The exception is ints
2127          * of the form -(2**(8*j-1)) for j > 0.  Such an int is
2128          * its own 256's-complement, so has the right sign bit
2129          * even without the extra byte.  That's a pain to check
2130          * for in advance, though, so we always grab an extra
2131          * byte at the start, and cut it back later if possible.
2132          */
2133         nbytes = (nbits >> 3) + 1;
2134         if (nbytes > 0x7fffffffL) {
2135             PyErr_SetString(PyExc_OverflowError,
2136                             "int too large to pickle");
2137             goto error;
2138         }
2139         repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2140         if (repr == NULL)
2141             goto error;
2142         pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2143         i = _PyLong_AsByteArray((PyLongObject *)obj,
2144                                 pdata, nbytes,
2145                                 1 /* little endian */ , 1 /* signed */ );
2146         if (i < 0)
2147             goto error;
2148         /* If the int is negative, this may be a byte more than
2149          * needed.  This is so iff the MSB is all redundant sign
2150          * bits.
2151          */
2152         if (sign < 0 &&
2153             nbytes > 1 &&
2154             pdata[nbytes - 1] == 0xff &&
2155             (pdata[nbytes - 2] & 0x80) != 0) {
2156             nbytes--;
2157         }
2158 
2159         if (nbytes < 256) {
2160             header[0] = LONG1;
2161             header[1] = (unsigned char)nbytes;
2162             size = 2;
2163         }
2164         else {
2165             header[0] = LONG4;
2166             size = (Py_ssize_t) nbytes;
2167             for (i = 1; i < 5; i++) {
2168                 header[i] = (unsigned char)(size & 0xff);
2169                 size >>= 8;
2170             }
2171             size = 5;
2172         }
2173         if (_Pickler_Write(self, header, size) < 0 ||
2174             _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2175             goto error;
2176     }
2177     else {
2178         const char long_op = LONG;
2179         const char *string;
2180 
2181         /* proto < 2: write the repr and newline.  This is quadratic-time (in
2182            the number of digits), in both directions.  We add a trailing 'L'
2183            to the repr, for compatibility with Python 2.x. */
2184 
2185         repr = PyObject_Repr(obj);
2186         if (repr == NULL)
2187             goto error;
2188 
2189         string = PyUnicode_AsUTF8AndSize(repr, &size);
2190         if (string == NULL)
2191             goto error;
2192 
2193         if (_Pickler_Write(self, &long_op, 1) < 0 ||
2194             _Pickler_Write(self, string, size) < 0 ||
2195             _Pickler_Write(self, "L\n", 2) < 0)
2196             goto error;
2197     }
2198 
2199     if (0) {
2200   error:
2201       status = -1;
2202     }
2203     Py_XDECREF(repr);
2204 
2205     return status;
2206 }
2207 
2208 static int
save_float(PicklerObject * self,PyObject * obj)2209 save_float(PicklerObject *self, PyObject *obj)
2210 {
2211     double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2212 
2213     if (self->bin) {
2214         char pdata[9];
2215         pdata[0] = BINFLOAT;
2216         if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2217             return -1;
2218         if (_Pickler_Write(self, pdata, 9) < 0)
2219             return -1;
2220    }
2221     else {
2222         int result = -1;
2223         char *buf = NULL;
2224         char op = FLOAT;
2225 
2226         if (_Pickler_Write(self, &op, 1) < 0)
2227             goto done;
2228 
2229         buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2230         if (!buf) {
2231             PyErr_NoMemory();
2232             goto done;
2233         }
2234 
2235         if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2236             goto done;
2237 
2238         if (_Pickler_Write(self, "\n", 1) < 0)
2239             goto done;
2240 
2241         result = 0;
2242 done:
2243         PyMem_Free(buf);
2244         return result;
2245     }
2246 
2247     return 0;
2248 }
2249 
2250 /* Perform direct write of the header and payload of the binary object.
2251 
2252    The large contiguous data is written directly into the underlying file
2253    object, bypassing the output_buffer of the Pickler.  We intentionally
2254    do not insert a protocol 4 frame opcode to make it possible to optimize
2255    file.read calls in the loader.
2256  */
2257 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2258 _Pickler_write_bytes(PicklerObject *self,
2259                      const char *header, Py_ssize_t header_size,
2260                      const char *data, Py_ssize_t data_size,
2261                      PyObject *payload)
2262 {
2263     int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2264     int framing = self->framing;
2265 
2266     if (bypass_buffer) {
2267         assert(self->output_buffer != NULL);
2268         /* Commit the previous frame. */
2269         if (_Pickler_CommitFrame(self)) {
2270             return -1;
2271         }
2272         /* Disable framing temporarily */
2273         self->framing = 0;
2274     }
2275 
2276     if (_Pickler_Write(self, header, header_size) < 0) {
2277         return -1;
2278     }
2279 
2280     if (bypass_buffer && self->write != NULL) {
2281         /* Bypass the in-memory buffer to directly stream large data
2282            into the underlying file object. */
2283         PyObject *result, *mem = NULL;
2284         /* Dump the output buffer to the file. */
2285         if (_Pickler_FlushToFile(self) < 0) {
2286             return -1;
2287         }
2288 
2289         /* Stream write the payload into the file without going through the
2290            output buffer. */
2291         if (payload == NULL) {
2292             /* TODO: It would be better to use a memoryview with a linked
2293                original string if this is possible. */
2294             payload = mem = PyBytes_FromStringAndSize(data, data_size);
2295             if (payload == NULL) {
2296                 return -1;
2297             }
2298         }
2299         result = PyObject_CallFunctionObjArgs(self->write, payload, NULL);
2300         Py_XDECREF(mem);
2301         if (result == NULL) {
2302             return -1;
2303         }
2304         Py_DECREF(result);
2305 
2306         /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2307         if (_Pickler_ClearBuffer(self) < 0) {
2308             return -1;
2309         }
2310     }
2311     else {
2312         if (_Pickler_Write(self, data, data_size) < 0) {
2313             return -1;
2314         }
2315     }
2316 
2317     /* Re-enable framing for subsequent calls to _Pickler_Write. */
2318     self->framing = framing;
2319 
2320     return 0;
2321 }
2322 
2323 static int
_save_bytes_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2324 _save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
2325                  Py_ssize_t size)
2326 {
2327     assert(self->proto >= 3);
2328 
2329     char header[9];
2330     Py_ssize_t len;
2331 
2332     if (size < 0)
2333         return -1;
2334 
2335     if (size <= 0xff) {
2336         header[0] = SHORT_BINBYTES;
2337         header[1] = (unsigned char)size;
2338         len = 2;
2339     }
2340     else if ((size_t)size <= 0xffffffffUL) {
2341         header[0] = BINBYTES;
2342         header[1] = (unsigned char)(size & 0xff);
2343         header[2] = (unsigned char)((size >> 8) & 0xff);
2344         header[3] = (unsigned char)((size >> 16) & 0xff);
2345         header[4] = (unsigned char)((size >> 24) & 0xff);
2346         len = 5;
2347     }
2348     else if (self->proto >= 4) {
2349         header[0] = BINBYTES8;
2350         _write_size64(header + 1, size);
2351         len = 9;
2352     }
2353     else {
2354         PyErr_SetString(PyExc_OverflowError,
2355                         "serializing a bytes object larger than 4 GiB "
2356                         "requires pickle protocol 4 or higher");
2357         return -1;
2358     }
2359 
2360     if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2361         return -1;
2362     }
2363 
2364     if (memo_put(self, obj) < 0) {
2365         return -1;
2366     }
2367 
2368     return 0;
2369 }
2370 
2371 static int
save_bytes(PicklerObject * self,PyObject * obj)2372 save_bytes(PicklerObject *self, PyObject *obj)
2373 {
2374     if (self->proto < 3) {
2375         /* Older pickle protocols do not have an opcode for pickling bytes
2376            objects. Therefore, we need to fake the copy protocol (i.e.,
2377            the __reduce__ method) to permit bytes object unpickling.
2378 
2379            Here we use a hack to be compatible with Python 2. Since in Python
2380            2 'bytes' is just an alias for 'str' (which has different
2381            parameters than the actual bytes object), we use codecs.encode
2382            to create the appropriate 'str' object when unpickled using
2383            Python 2 *and* the appropriate 'bytes' object when unpickled
2384            using Python 3. Again this is a hack and we don't need to do this
2385            with newer protocols. */
2386         PyObject *reduce_value;
2387         int status;
2388 
2389         if (PyBytes_GET_SIZE(obj) == 0) {
2390             reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2391         }
2392         else {
2393             PickleState *st = _Pickle_GetGlobalState();
2394             PyObject *unicode_str =
2395                 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2396                                        PyBytes_GET_SIZE(obj),
2397                                        "strict");
2398             _Py_IDENTIFIER(latin1);
2399 
2400             if (unicode_str == NULL)
2401                 return -1;
2402             reduce_value = Py_BuildValue("(O(OO))",
2403                                          st->codecs_encode, unicode_str,
2404                                          _PyUnicode_FromId(&PyId_latin1));
2405             Py_DECREF(unicode_str);
2406         }
2407 
2408         if (reduce_value == NULL)
2409             return -1;
2410 
2411         /* save_reduce() will memoize the object automatically. */
2412         status = save_reduce(self, reduce_value, obj);
2413         Py_DECREF(reduce_value);
2414         return status;
2415     }
2416     else {
2417         return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
2418                                 PyBytes_GET_SIZE(obj));
2419     }
2420 }
2421 
2422 static int
_save_bytearray_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2423 _save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
2424                      Py_ssize_t size)
2425 {
2426     assert(self->proto >= 5);
2427 
2428     char header[9];
2429     Py_ssize_t len;
2430 
2431     if (size < 0)
2432         return -1;
2433 
2434     header[0] = BYTEARRAY8;
2435     _write_size64(header + 1, size);
2436     len = 9;
2437 
2438     if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2439         return -1;
2440     }
2441 
2442     if (memo_put(self, obj) < 0) {
2443         return -1;
2444     }
2445 
2446     return 0;
2447 }
2448 
2449 static int
save_bytearray(PicklerObject * self,PyObject * obj)2450 save_bytearray(PicklerObject *self, PyObject *obj)
2451 {
2452     if (self->proto < 5) {
2453         /* Older pickle protocols do not have an opcode for pickling
2454          * bytearrays. */
2455         PyObject *reduce_value = NULL;
2456         int status;
2457 
2458         if (PyByteArray_GET_SIZE(obj) == 0) {
2459             reduce_value = Py_BuildValue("(O())",
2460                                          (PyObject *) &PyByteArray_Type);
2461         }
2462         else {
2463             PyObject *bytes_obj = PyBytes_FromObject(obj);
2464             if (bytes_obj != NULL) {
2465                 reduce_value = Py_BuildValue("(O(O))",
2466                                              (PyObject *) &PyByteArray_Type,
2467                                              bytes_obj);
2468                 Py_DECREF(bytes_obj);
2469             }
2470         }
2471         if (reduce_value == NULL)
2472             return -1;
2473 
2474         /* save_reduce() will memoize the object automatically. */
2475         status = save_reduce(self, reduce_value, obj);
2476         Py_DECREF(reduce_value);
2477         return status;
2478     }
2479     else {
2480         return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
2481                                     PyByteArray_GET_SIZE(obj));
2482     }
2483 }
2484 
2485 static int
save_picklebuffer(PicklerObject * self,PyObject * obj)2486 save_picklebuffer(PicklerObject *self, PyObject *obj)
2487 {
2488     if (self->proto < 5) {
2489         PickleState *st = _Pickle_GetGlobalState();
2490         PyErr_SetString(st->PicklingError,
2491                         "PickleBuffer can only pickled with protocol >= 5");
2492         return -1;
2493     }
2494     const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2495     if (view == NULL) {
2496         return -1;
2497     }
2498     if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2499         PickleState *st = _Pickle_GetGlobalState();
2500         PyErr_SetString(st->PicklingError,
2501                         "PickleBuffer can not be pickled when "
2502                         "pointing to a non-contiguous buffer");
2503         return -1;
2504     }
2505     int in_band = 1;
2506     if (self->buffer_callback != NULL) {
2507         PyObject *ret = PyObject_CallFunctionObjArgs(self->buffer_callback,
2508                                                      obj, NULL);
2509         if (ret == NULL) {
2510             return -1;
2511         }
2512         in_band = PyObject_IsTrue(ret);
2513         Py_DECREF(ret);
2514         if (in_band == -1) {
2515             return -1;
2516         }
2517     }
2518     if (in_band) {
2519         /* Write data in-band */
2520         if (view->readonly) {
2521             return _save_bytes_data(self, obj, (const char*) view->buf,
2522                                     view->len);
2523         }
2524         else {
2525             return _save_bytearray_data(self, obj, (const char*) view->buf,
2526                                         view->len);
2527         }
2528     }
2529     else {
2530         /* Write data out-of-band */
2531         const char next_buffer_op = NEXT_BUFFER;
2532         if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2533             return -1;
2534         }
2535         if (view->readonly) {
2536             const char readonly_buffer_op = READONLY_BUFFER;
2537             if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2538                 return -1;
2539             }
2540         }
2541     }
2542     return 0;
2543 }
2544 
2545 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2546    backslash and newline characters to \uXXXX escapes. */
2547 static PyObject *
raw_unicode_escape(PyObject * obj)2548 raw_unicode_escape(PyObject *obj)
2549 {
2550     char *p;
2551     Py_ssize_t i, size;
2552     void *data;
2553     unsigned int kind;
2554     _PyBytesWriter writer;
2555 
2556     if (PyUnicode_READY(obj))
2557         return NULL;
2558 
2559     _PyBytesWriter_Init(&writer);
2560 
2561     size = PyUnicode_GET_LENGTH(obj);
2562     data = PyUnicode_DATA(obj);
2563     kind = PyUnicode_KIND(obj);
2564 
2565     p = _PyBytesWriter_Alloc(&writer, size);
2566     if (p == NULL)
2567         goto error;
2568     writer.overallocate = 1;
2569 
2570     for (i=0; i < size; i++) {
2571         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2572         /* Map 32-bit characters to '\Uxxxxxxxx' */
2573         if (ch >= 0x10000) {
2574             /* -1: subtract 1 preallocated byte */
2575             p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2576             if (p == NULL)
2577                 goto error;
2578 
2579             *p++ = '\\';
2580             *p++ = 'U';
2581             *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2582             *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2583             *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2584             *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2585             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2586             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2587             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2588             *p++ = Py_hexdigits[ch & 15];
2589         }
2590         /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2591         else if (ch >= 256 ||
2592                  ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2593                  ch == 0x1a)
2594         {
2595             /* -1: subtract 1 preallocated byte */
2596             p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2597             if (p == NULL)
2598                 goto error;
2599 
2600             *p++ = '\\';
2601             *p++ = 'u';
2602             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2603             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2604             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2605             *p++ = Py_hexdigits[ch & 15];
2606         }
2607         /* Copy everything else as-is */
2608         else
2609             *p++ = (char) ch;
2610     }
2611 
2612     return _PyBytesWriter_Finish(&writer, p);
2613 
2614 error:
2615     _PyBytesWriter_Dealloc(&writer);
2616     return NULL;
2617 }
2618 
2619 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2620 write_unicode_binary(PicklerObject *self, PyObject *obj)
2621 {
2622     char header[9];
2623     Py_ssize_t len;
2624     PyObject *encoded = NULL;
2625     Py_ssize_t size;
2626     const char *data;
2627 
2628     if (PyUnicode_READY(obj))
2629         return -1;
2630 
2631     data = PyUnicode_AsUTF8AndSize(obj, &size);
2632     if (data == NULL) {
2633         /* Issue #8383: for strings with lone surrogates, fallback on the
2634            "surrogatepass" error handler. */
2635         PyErr_Clear();
2636         encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2637         if (encoded == NULL)
2638             return -1;
2639 
2640         data = PyBytes_AS_STRING(encoded);
2641         size = PyBytes_GET_SIZE(encoded);
2642     }
2643 
2644     assert(size >= 0);
2645     if (size <= 0xff && self->proto >= 4) {
2646         header[0] = SHORT_BINUNICODE;
2647         header[1] = (unsigned char)(size & 0xff);
2648         len = 2;
2649     }
2650     else if ((size_t)size <= 0xffffffffUL) {
2651         header[0] = BINUNICODE;
2652         header[1] = (unsigned char)(size & 0xff);
2653         header[2] = (unsigned char)((size >> 8) & 0xff);
2654         header[3] = (unsigned char)((size >> 16) & 0xff);
2655         header[4] = (unsigned char)((size >> 24) & 0xff);
2656         len = 5;
2657     }
2658     else if (self->proto >= 4) {
2659         header[0] = BINUNICODE8;
2660         _write_size64(header + 1, size);
2661         len = 9;
2662     }
2663     else {
2664         PyErr_SetString(PyExc_OverflowError,
2665                         "serializing a string larger than 4 GiB "
2666                         "requires pickle protocol 4 or higher");
2667         Py_XDECREF(encoded);
2668         return -1;
2669     }
2670 
2671     if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2672         Py_XDECREF(encoded);
2673         return -1;
2674     }
2675     Py_XDECREF(encoded);
2676     return 0;
2677 }
2678 
2679 static int
save_unicode(PicklerObject * self,PyObject * obj)2680 save_unicode(PicklerObject *self, PyObject *obj)
2681 {
2682     if (self->bin) {
2683         if (write_unicode_binary(self, obj) < 0)
2684             return -1;
2685     }
2686     else {
2687         PyObject *encoded;
2688         Py_ssize_t size;
2689         const char unicode_op = UNICODE;
2690 
2691         encoded = raw_unicode_escape(obj);
2692         if (encoded == NULL)
2693             return -1;
2694 
2695         if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2696             Py_DECREF(encoded);
2697             return -1;
2698         }
2699 
2700         size = PyBytes_GET_SIZE(encoded);
2701         if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2702             Py_DECREF(encoded);
2703             return -1;
2704         }
2705         Py_DECREF(encoded);
2706 
2707         if (_Pickler_Write(self, "\n", 1) < 0)
2708             return -1;
2709     }
2710     if (memo_put(self, obj) < 0)
2711         return -1;
2712 
2713     return 0;
2714 }
2715 
2716 /* A helper for save_tuple.  Push the len elements in tuple t on the stack. */
2717 static int
store_tuple_elements(PicklerObject * self,PyObject * t,Py_ssize_t len)2718 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2719 {
2720     Py_ssize_t i;
2721 
2722     assert(PyTuple_Size(t) == len);
2723 
2724     for (i = 0; i < len; i++) {
2725         PyObject *element = PyTuple_GET_ITEM(t, i);
2726 
2727         if (element == NULL)
2728             return -1;
2729         if (save(self, element, 0) < 0)
2730             return -1;
2731     }
2732 
2733     return 0;
2734 }
2735 
2736 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2737  * used across protocols to minimize the space needed to pickle them.
2738  * Tuples are also the only builtin immutable type that can be recursive
2739  * (a tuple can be reached from itself), and that requires some subtle
2740  * magic so that it works in all cases.  IOW, this is a long routine.
2741  */
2742 static int
save_tuple(PicklerObject * self,PyObject * obj)2743 save_tuple(PicklerObject *self, PyObject *obj)
2744 {
2745     Py_ssize_t len, i;
2746 
2747     const char mark_op = MARK;
2748     const char tuple_op = TUPLE;
2749     const char pop_op = POP;
2750     const char pop_mark_op = POP_MARK;
2751     const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2752 
2753     if ((len = PyTuple_Size(obj)) < 0)
2754         return -1;
2755 
2756     if (len == 0) {
2757         char pdata[2];
2758 
2759         if (self->proto) {
2760             pdata[0] = EMPTY_TUPLE;
2761             len = 1;
2762         }
2763         else {
2764             pdata[0] = MARK;
2765             pdata[1] = TUPLE;
2766             len = 2;
2767         }
2768         if (_Pickler_Write(self, pdata, len) < 0)
2769             return -1;
2770         return 0;
2771     }
2772 
2773     /* The tuple isn't in the memo now.  If it shows up there after
2774      * saving the tuple elements, the tuple must be recursive, in
2775      * which case we'll pop everything we put on the stack, and fetch
2776      * its value from the memo.
2777      */
2778     if (len <= 3 && self->proto >= 2) {
2779         /* Use TUPLE{1,2,3} opcodes. */
2780         if (store_tuple_elements(self, obj, len) < 0)
2781             return -1;
2782 
2783         if (PyMemoTable_Get(self->memo, obj)) {
2784             /* pop the len elements */
2785             for (i = 0; i < len; i++)
2786                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2787                     return -1;
2788             /* fetch from memo */
2789             if (memo_get(self, obj) < 0)
2790                 return -1;
2791 
2792             return 0;
2793         }
2794         else { /* Not recursive. */
2795             if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2796                 return -1;
2797         }
2798         goto memoize;
2799     }
2800 
2801     /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2802      * Generate MARK e1 e2 ... TUPLE
2803      */
2804     if (_Pickler_Write(self, &mark_op, 1) < 0)
2805         return -1;
2806 
2807     if (store_tuple_elements(self, obj, len) < 0)
2808         return -1;
2809 
2810     if (PyMemoTable_Get(self->memo, obj)) {
2811         /* pop the stack stuff we pushed */
2812         if (self->bin) {
2813             if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2814                 return -1;
2815         }
2816         else {
2817             /* Note that we pop one more than len, to remove
2818              * the MARK too.
2819              */
2820             for (i = 0; i <= len; i++)
2821                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2822                     return -1;
2823         }
2824         /* fetch from memo */
2825         if (memo_get(self, obj) < 0)
2826             return -1;
2827 
2828         return 0;
2829     }
2830     else { /* Not recursive. */
2831         if (_Pickler_Write(self, &tuple_op, 1) < 0)
2832             return -1;
2833     }
2834 
2835   memoize:
2836     if (memo_put(self, obj) < 0)
2837         return -1;
2838 
2839     return 0;
2840 }
2841 
2842 /* iter is an iterator giving items, and we batch up chunks of
2843  *     MARK item item ... item APPENDS
2844  * opcode sequences.  Calling code should have arranged to first create an
2845  * empty list, or list-like object, for the APPENDS to operate on.
2846  * Returns 0 on success, <0 on error.
2847  */
2848 static int
batch_list(PicklerObject * self,PyObject * iter)2849 batch_list(PicklerObject *self, PyObject *iter)
2850 {
2851     PyObject *obj = NULL;
2852     PyObject *firstitem = NULL;
2853     int i, n;
2854 
2855     const char mark_op = MARK;
2856     const char append_op = APPEND;
2857     const char appends_op = APPENDS;
2858 
2859     assert(iter != NULL);
2860 
2861     /* XXX: I think this function could be made faster by avoiding the
2862        iterator interface and fetching objects directly from list using
2863        PyList_GET_ITEM.
2864     */
2865 
2866     if (self->proto == 0) {
2867         /* APPENDS isn't available; do one at a time. */
2868         for (;;) {
2869             obj = PyIter_Next(iter);
2870             if (obj == NULL) {
2871                 if (PyErr_Occurred())
2872                     return -1;
2873                 break;
2874             }
2875             i = save(self, obj, 0);
2876             Py_DECREF(obj);
2877             if (i < 0)
2878                 return -1;
2879             if (_Pickler_Write(self, &append_op, 1) < 0)
2880                 return -1;
2881         }
2882         return 0;
2883     }
2884 
2885     /* proto > 0:  write in batches of BATCHSIZE. */
2886     do {
2887         /* Get first item */
2888         firstitem = PyIter_Next(iter);
2889         if (firstitem == NULL) {
2890             if (PyErr_Occurred())
2891                 goto error;
2892 
2893             /* nothing more to add */
2894             break;
2895         }
2896 
2897         /* Try to get a second item */
2898         obj = PyIter_Next(iter);
2899         if (obj == NULL) {
2900             if (PyErr_Occurred())
2901                 goto error;
2902 
2903             /* Only one item to write */
2904             if (save(self, firstitem, 0) < 0)
2905                 goto error;
2906             if (_Pickler_Write(self, &append_op, 1) < 0)
2907                 goto error;
2908             Py_CLEAR(firstitem);
2909             break;
2910         }
2911 
2912         /* More than one item to write */
2913 
2914         /* Pump out MARK, items, APPENDS. */
2915         if (_Pickler_Write(self, &mark_op, 1) < 0)
2916             goto error;
2917 
2918         if (save(self, firstitem, 0) < 0)
2919             goto error;
2920         Py_CLEAR(firstitem);
2921         n = 1;
2922 
2923         /* Fetch and save up to BATCHSIZE items */
2924         while (obj) {
2925             if (save(self, obj, 0) < 0)
2926                 goto error;
2927             Py_CLEAR(obj);
2928             n += 1;
2929 
2930             if (n == BATCHSIZE)
2931                 break;
2932 
2933             obj = PyIter_Next(iter);
2934             if (obj == NULL) {
2935                 if (PyErr_Occurred())
2936                     goto error;
2937                 break;
2938             }
2939         }
2940 
2941         if (_Pickler_Write(self, &appends_op, 1) < 0)
2942             goto error;
2943 
2944     } while (n == BATCHSIZE);
2945     return 0;
2946 
2947   error:
2948     Py_XDECREF(firstitem);
2949     Py_XDECREF(obj);
2950     return -1;
2951 }
2952 
2953 /* This is a variant of batch_list() above, specialized for lists (with no
2954  * support for list subclasses). Like batch_list(), we batch up chunks of
2955  *     MARK item item ... item APPENDS
2956  * opcode sequences.  Calling code should have arranged to first create an
2957  * empty list, or list-like object, for the APPENDS to operate on.
2958  * Returns 0 on success, -1 on error.
2959  *
2960  * This version is considerably faster than batch_list(), if less general.
2961  *
2962  * Note that this only works for protocols > 0.
2963  */
2964 static int
batch_list_exact(PicklerObject * self,PyObject * obj)2965 batch_list_exact(PicklerObject *self, PyObject *obj)
2966 {
2967     PyObject *item = NULL;
2968     Py_ssize_t this_batch, total;
2969 
2970     const char append_op = APPEND;
2971     const char appends_op = APPENDS;
2972     const char mark_op = MARK;
2973 
2974     assert(obj != NULL);
2975     assert(self->proto > 0);
2976     assert(PyList_CheckExact(obj));
2977 
2978     if (PyList_GET_SIZE(obj) == 1) {
2979         item = PyList_GET_ITEM(obj, 0);
2980         if (save(self, item, 0) < 0)
2981             return -1;
2982         if (_Pickler_Write(self, &append_op, 1) < 0)
2983             return -1;
2984         return 0;
2985     }
2986 
2987     /* Write in batches of BATCHSIZE. */
2988     total = 0;
2989     do {
2990         this_batch = 0;
2991         if (_Pickler_Write(self, &mark_op, 1) < 0)
2992             return -1;
2993         while (total < PyList_GET_SIZE(obj)) {
2994             item = PyList_GET_ITEM(obj, total);
2995             if (save(self, item, 0) < 0)
2996                 return -1;
2997             total++;
2998             if (++this_batch == BATCHSIZE)
2999                 break;
3000         }
3001         if (_Pickler_Write(self, &appends_op, 1) < 0)
3002             return -1;
3003 
3004     } while (total < PyList_GET_SIZE(obj));
3005 
3006     return 0;
3007 }
3008 
3009 static int
save_list(PicklerObject * self,PyObject * obj)3010 save_list(PicklerObject *self, PyObject *obj)
3011 {
3012     char header[3];
3013     Py_ssize_t len;
3014     int status = 0;
3015 
3016     if (self->fast && !fast_save_enter(self, obj))
3017         goto error;
3018 
3019     /* Create an empty list. */
3020     if (self->bin) {
3021         header[0] = EMPTY_LIST;
3022         len = 1;
3023     }
3024     else {
3025         header[0] = MARK;
3026         header[1] = LIST;
3027         len = 2;
3028     }
3029 
3030     if (_Pickler_Write(self, header, len) < 0)
3031         goto error;
3032 
3033     /* Get list length, and bow out early if empty. */
3034     if ((len = PyList_Size(obj)) < 0)
3035         goto error;
3036 
3037     if (memo_put(self, obj) < 0)
3038         goto error;
3039 
3040     if (len != 0) {
3041         /* Materialize the list elements. */
3042         if (PyList_CheckExact(obj) && self->proto > 0) {
3043             if (Py_EnterRecursiveCall(" while pickling an object"))
3044                 goto error;
3045             status = batch_list_exact(self, obj);
3046             Py_LeaveRecursiveCall();
3047         } else {
3048             PyObject *iter = PyObject_GetIter(obj);
3049             if (iter == NULL)
3050                 goto error;
3051 
3052             if (Py_EnterRecursiveCall(" while pickling an object")) {
3053                 Py_DECREF(iter);
3054                 goto error;
3055             }
3056             status = batch_list(self, iter);
3057             Py_LeaveRecursiveCall();
3058             Py_DECREF(iter);
3059         }
3060     }
3061     if (0) {
3062   error:
3063         status = -1;
3064     }
3065 
3066     if (self->fast && !fast_save_leave(self, obj))
3067         status = -1;
3068 
3069     return status;
3070 }
3071 
3072 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3073  *     MARK key value ... key value SETITEMS
3074  * opcode sequences.  Calling code should have arranged to first create an
3075  * empty dict, or dict-like object, for the SETITEMS to operate on.
3076  * Returns 0 on success, <0 on error.
3077  *
3078  * This is very much like batch_list().  The difference between saving
3079  * elements directly, and picking apart two-tuples, is so long-winded at
3080  * the C level, though, that attempts to combine these routines were too
3081  * ugly to bear.
3082  */
3083 static int
batch_dict(PicklerObject * self,PyObject * iter)3084 batch_dict(PicklerObject *self, PyObject *iter)
3085 {
3086     PyObject *obj = NULL;
3087     PyObject *firstitem = NULL;
3088     int i, n;
3089 
3090     const char mark_op = MARK;
3091     const char setitem_op = SETITEM;
3092     const char setitems_op = SETITEMS;
3093 
3094     assert(iter != NULL);
3095 
3096     if (self->proto == 0) {
3097         /* SETITEMS isn't available; do one at a time. */
3098         for (;;) {
3099             obj = PyIter_Next(iter);
3100             if (obj == NULL) {
3101                 if (PyErr_Occurred())
3102                     return -1;
3103                 break;
3104             }
3105             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3106                 PyErr_SetString(PyExc_TypeError, "dict items "
3107                                 "iterator must return 2-tuples");
3108                 return -1;
3109             }
3110             i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
3111             if (i >= 0)
3112                 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
3113             Py_DECREF(obj);
3114             if (i < 0)
3115                 return -1;
3116             if (_Pickler_Write(self, &setitem_op, 1) < 0)
3117                 return -1;
3118         }
3119         return 0;
3120     }
3121 
3122     /* proto > 0:  write in batches of BATCHSIZE. */
3123     do {
3124         /* Get first item */
3125         firstitem = PyIter_Next(iter);
3126         if (firstitem == NULL) {
3127             if (PyErr_Occurred())
3128                 goto error;
3129 
3130             /* nothing more to add */
3131             break;
3132         }
3133         if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3134             PyErr_SetString(PyExc_TypeError, "dict items "
3135                                 "iterator must return 2-tuples");
3136             goto error;
3137         }
3138 
3139         /* Try to get a second item */
3140         obj = PyIter_Next(iter);
3141         if (obj == NULL) {
3142             if (PyErr_Occurred())
3143                 goto error;
3144 
3145             /* Only one item to write */
3146             if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3147                 goto error;
3148             if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3149                 goto error;
3150             if (_Pickler_Write(self, &setitem_op, 1) < 0)
3151                 goto error;
3152             Py_CLEAR(firstitem);
3153             break;
3154         }
3155 
3156         /* More than one item to write */
3157 
3158         /* Pump out MARK, items, SETITEMS. */
3159         if (_Pickler_Write(self, &mark_op, 1) < 0)
3160             goto error;
3161 
3162         if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3163             goto error;
3164         if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3165             goto error;
3166         Py_CLEAR(firstitem);
3167         n = 1;
3168 
3169         /* Fetch and save up to BATCHSIZE items */
3170         while (obj) {
3171             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3172                 PyErr_SetString(PyExc_TypeError, "dict items "
3173                     "iterator must return 2-tuples");
3174                 goto error;
3175             }
3176             if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3177                 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3178                 goto error;
3179             Py_CLEAR(obj);
3180             n += 1;
3181 
3182             if (n == BATCHSIZE)
3183                 break;
3184 
3185             obj = PyIter_Next(iter);
3186             if (obj == NULL) {
3187                 if (PyErr_Occurred())
3188                     goto error;
3189                 break;
3190             }
3191         }
3192 
3193         if (_Pickler_Write(self, &setitems_op, 1) < 0)
3194             goto error;
3195 
3196     } while (n == BATCHSIZE);
3197     return 0;
3198 
3199   error:
3200     Py_XDECREF(firstitem);
3201     Py_XDECREF(obj);
3202     return -1;
3203 }
3204 
3205 /* This is a variant of batch_dict() above that specializes for dicts, with no
3206  * support for dict subclasses. Like batch_dict(), we batch up chunks of
3207  *     MARK key value ... key value SETITEMS
3208  * opcode sequences.  Calling code should have arranged to first create an
3209  * empty dict, or dict-like object, for the SETITEMS to operate on.
3210  * Returns 0 on success, -1 on error.
3211  *
3212  * Note that this currently doesn't work for protocol 0.
3213  */
3214 static int
batch_dict_exact(PicklerObject * self,PyObject * obj)3215 batch_dict_exact(PicklerObject *self, PyObject *obj)
3216 {
3217     PyObject *key = NULL, *value = NULL;
3218     int i;
3219     Py_ssize_t dict_size, ppos = 0;
3220 
3221     const char mark_op = MARK;
3222     const char setitem_op = SETITEM;
3223     const char setitems_op = SETITEMS;
3224 
3225     assert(obj != NULL && PyDict_CheckExact(obj));
3226     assert(self->proto > 0);
3227 
3228     dict_size = PyDict_GET_SIZE(obj);
3229 
3230     /* Special-case len(d) == 1 to save space. */
3231     if (dict_size == 1) {
3232         PyDict_Next(obj, &ppos, &key, &value);
3233         if (save(self, key, 0) < 0)
3234             return -1;
3235         if (save(self, value, 0) < 0)
3236             return -1;
3237         if (_Pickler_Write(self, &setitem_op, 1) < 0)
3238             return -1;
3239         return 0;
3240     }
3241 
3242     /* Write in batches of BATCHSIZE. */
3243     do {
3244         i = 0;
3245         if (_Pickler_Write(self, &mark_op, 1) < 0)
3246             return -1;
3247         while (PyDict_Next(obj, &ppos, &key, &value)) {
3248             if (save(self, key, 0) < 0)
3249                 return -1;
3250             if (save(self, value, 0) < 0)
3251                 return -1;
3252             if (++i == BATCHSIZE)
3253                 break;
3254         }
3255         if (_Pickler_Write(self, &setitems_op, 1) < 0)
3256             return -1;
3257         if (PyDict_GET_SIZE(obj) != dict_size) {
3258             PyErr_Format(
3259                 PyExc_RuntimeError,
3260                 "dictionary changed size during iteration");
3261             return -1;
3262         }
3263 
3264     } while (i == BATCHSIZE);
3265     return 0;
3266 }
3267 
3268 static int
save_dict(PicklerObject * self,PyObject * obj)3269 save_dict(PicklerObject *self, PyObject *obj)
3270 {
3271     PyObject *items, *iter;
3272     char header[3];
3273     Py_ssize_t len;
3274     int status = 0;
3275     assert(PyDict_Check(obj));
3276 
3277     if (self->fast && !fast_save_enter(self, obj))
3278         goto error;
3279 
3280     /* Create an empty dict. */
3281     if (self->bin) {
3282         header[0] = EMPTY_DICT;
3283         len = 1;
3284     }
3285     else {
3286         header[0] = MARK;
3287         header[1] = DICT;
3288         len = 2;
3289     }
3290 
3291     if (_Pickler_Write(self, header, len) < 0)
3292         goto error;
3293 
3294     if (memo_put(self, obj) < 0)
3295         goto error;
3296 
3297     if (PyDict_GET_SIZE(obj)) {
3298         /* Save the dict items. */
3299         if (PyDict_CheckExact(obj) && self->proto > 0) {
3300             /* We can take certain shortcuts if we know this is a dict and
3301                not a dict subclass. */
3302             if (Py_EnterRecursiveCall(" while pickling an object"))
3303                 goto error;
3304             status = batch_dict_exact(self, obj);
3305             Py_LeaveRecursiveCall();
3306         } else {
3307             _Py_IDENTIFIER(items);
3308 
3309             items = _PyObject_CallMethodId(obj, &PyId_items, NULL);
3310             if (items == NULL)
3311                 goto error;
3312             iter = PyObject_GetIter(items);
3313             Py_DECREF(items);
3314             if (iter == NULL)
3315                 goto error;
3316             if (Py_EnterRecursiveCall(" while pickling an object")) {
3317                 Py_DECREF(iter);
3318                 goto error;
3319             }
3320             status = batch_dict(self, iter);
3321             Py_LeaveRecursiveCall();
3322             Py_DECREF(iter);
3323         }
3324     }
3325 
3326     if (0) {
3327   error:
3328         status = -1;
3329     }
3330 
3331     if (self->fast && !fast_save_leave(self, obj))
3332         status = -1;
3333 
3334     return status;
3335 }
3336 
3337 static int
save_set(PicklerObject * self,PyObject * obj)3338 save_set(PicklerObject *self, PyObject *obj)
3339 {
3340     PyObject *item;
3341     int i;
3342     Py_ssize_t set_size, ppos = 0;
3343     Py_hash_t hash;
3344 
3345     const char empty_set_op = EMPTY_SET;
3346     const char mark_op = MARK;
3347     const char additems_op = ADDITEMS;
3348 
3349     if (self->proto < 4) {
3350         PyObject *items;
3351         PyObject *reduce_value;
3352         int status;
3353 
3354         items = PySequence_List(obj);
3355         if (items == NULL) {
3356             return -1;
3357         }
3358         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3359         Py_DECREF(items);
3360         if (reduce_value == NULL) {
3361             return -1;
3362         }
3363         /* save_reduce() will memoize the object automatically. */
3364         status = save_reduce(self, reduce_value, obj);
3365         Py_DECREF(reduce_value);
3366         return status;
3367     }
3368 
3369     if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3370         return -1;
3371 
3372     if (memo_put(self, obj) < 0)
3373         return -1;
3374 
3375     set_size = PySet_GET_SIZE(obj);
3376     if (set_size == 0)
3377         return 0;  /* nothing to do */
3378 
3379     /* Write in batches of BATCHSIZE. */
3380     do {
3381         i = 0;
3382         if (_Pickler_Write(self, &mark_op, 1) < 0)
3383             return -1;
3384         while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3385             if (save(self, item, 0) < 0)
3386                 return -1;
3387             if (++i == BATCHSIZE)
3388                 break;
3389         }
3390         if (_Pickler_Write(self, &additems_op, 1) < 0)
3391             return -1;
3392         if (PySet_GET_SIZE(obj) != set_size) {
3393             PyErr_Format(
3394                 PyExc_RuntimeError,
3395                 "set changed size during iteration");
3396             return -1;
3397         }
3398     } while (i == BATCHSIZE);
3399 
3400     return 0;
3401 }
3402 
3403 static int
save_frozenset(PicklerObject * self,PyObject * obj)3404 save_frozenset(PicklerObject *self, PyObject *obj)
3405 {
3406     PyObject *iter;
3407 
3408     const char mark_op = MARK;
3409     const char frozenset_op = FROZENSET;
3410 
3411     if (self->fast && !fast_save_enter(self, obj))
3412         return -1;
3413 
3414     if (self->proto < 4) {
3415         PyObject *items;
3416         PyObject *reduce_value;
3417         int status;
3418 
3419         items = PySequence_List(obj);
3420         if (items == NULL) {
3421             return -1;
3422         }
3423         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3424                                      items);
3425         Py_DECREF(items);
3426         if (reduce_value == NULL) {
3427             return -1;
3428         }
3429         /* save_reduce() will memoize the object automatically. */
3430         status = save_reduce(self, reduce_value, obj);
3431         Py_DECREF(reduce_value);
3432         return status;
3433     }
3434 
3435     if (_Pickler_Write(self, &mark_op, 1) < 0)
3436         return -1;
3437 
3438     iter = PyObject_GetIter(obj);
3439     if (iter == NULL) {
3440         return -1;
3441     }
3442     for (;;) {
3443         PyObject *item;
3444 
3445         item = PyIter_Next(iter);
3446         if (item == NULL) {
3447             if (PyErr_Occurred()) {
3448                 Py_DECREF(iter);
3449                 return -1;
3450             }
3451             break;
3452         }
3453         if (save(self, item, 0) < 0) {
3454             Py_DECREF(item);
3455             Py_DECREF(iter);
3456             return -1;
3457         }
3458         Py_DECREF(item);
3459     }
3460     Py_DECREF(iter);
3461 
3462     /* If the object is already in the memo, this means it is
3463        recursive. In this case, throw away everything we put on the
3464        stack, and fetch the object back from the memo. */
3465     if (PyMemoTable_Get(self->memo, obj)) {
3466         const char pop_mark_op = POP_MARK;
3467 
3468         if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3469             return -1;
3470         if (memo_get(self, obj) < 0)
3471             return -1;
3472         return 0;
3473     }
3474 
3475     if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3476         return -1;
3477     if (memo_put(self, obj) < 0)
3478         return -1;
3479 
3480     return 0;
3481 }
3482 
3483 static int
fix_imports(PyObject ** module_name,PyObject ** global_name)3484 fix_imports(PyObject **module_name, PyObject **global_name)
3485 {
3486     PyObject *key;
3487     PyObject *item;
3488     PickleState *st = _Pickle_GetGlobalState();
3489 
3490     key = PyTuple_Pack(2, *module_name, *global_name);
3491     if (key == NULL)
3492         return -1;
3493     item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3494     Py_DECREF(key);
3495     if (item) {
3496         PyObject *fixed_module_name;
3497         PyObject *fixed_global_name;
3498 
3499         if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3500             PyErr_Format(PyExc_RuntimeError,
3501                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3502                          "should be 2-tuples, not %.200s",
3503                          Py_TYPE(item)->tp_name);
3504             return -1;
3505         }
3506         fixed_module_name = PyTuple_GET_ITEM(item, 0);
3507         fixed_global_name = PyTuple_GET_ITEM(item, 1);
3508         if (!PyUnicode_Check(fixed_module_name) ||
3509             !PyUnicode_Check(fixed_global_name)) {
3510             PyErr_Format(PyExc_RuntimeError,
3511                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3512                          "should be pairs of str, not (%.200s, %.200s)",
3513                          Py_TYPE(fixed_module_name)->tp_name,
3514                          Py_TYPE(fixed_global_name)->tp_name);
3515             return -1;
3516         }
3517 
3518         Py_CLEAR(*module_name);
3519         Py_CLEAR(*global_name);
3520         Py_INCREF(fixed_module_name);
3521         Py_INCREF(fixed_global_name);
3522         *module_name = fixed_module_name;
3523         *global_name = fixed_global_name;
3524         return 0;
3525     }
3526     else if (PyErr_Occurred()) {
3527         return -1;
3528     }
3529 
3530     item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3531     if (item) {
3532         if (!PyUnicode_Check(item)) {
3533             PyErr_Format(PyExc_RuntimeError,
3534                          "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3535                          "should be strings, not %.200s",
3536                          Py_TYPE(item)->tp_name);
3537             return -1;
3538         }
3539         Py_INCREF(item);
3540         Py_XSETREF(*module_name, item);
3541     }
3542     else if (PyErr_Occurred()) {
3543         return -1;
3544     }
3545 
3546     return 0;
3547 }
3548 
3549 static int
save_global(PicklerObject * self,PyObject * obj,PyObject * name)3550 save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3551 {
3552     PyObject *global_name = NULL;
3553     PyObject *module_name = NULL;
3554     PyObject *module = NULL;
3555     PyObject *parent = NULL;
3556     PyObject *dotted_path = NULL;
3557     PyObject *lastname = NULL;
3558     PyObject *cls;
3559     PickleState *st = _Pickle_GetGlobalState();
3560     int status = 0;
3561     _Py_IDENTIFIER(__name__);
3562     _Py_IDENTIFIER(__qualname__);
3563 
3564     const char global_op = GLOBAL;
3565 
3566     if (name) {
3567         Py_INCREF(name);
3568         global_name = name;
3569     }
3570     else {
3571         if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3572             goto error;
3573         if (global_name == NULL) {
3574             global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3575             if (global_name == NULL)
3576                 goto error;
3577         }
3578     }
3579 
3580     dotted_path = get_dotted_path(module, global_name);
3581     if (dotted_path == NULL)
3582         goto error;
3583     module_name = whichmodule(obj, dotted_path);
3584     if (module_name == NULL)
3585         goto error;
3586 
3587     /* XXX: Change to use the import C API directly with level=0 to disallow
3588        relative imports.
3589 
3590        XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3591        builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3592        custom import functions (IMHO, this would be a nice security
3593        feature). The import C API would need to be extended to support the
3594        extra parameters of __import__ to fix that. */
3595     module = PyImport_Import(module_name);
3596     if (module == NULL) {
3597         PyErr_Format(st->PicklingError,
3598                      "Can't pickle %R: import of module %R failed",
3599                      obj, module_name);
3600         goto error;
3601     }
3602     lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3603     Py_INCREF(lastname);
3604     cls = get_deep_attribute(module, dotted_path, &parent);
3605     Py_CLEAR(dotted_path);
3606     if (cls == NULL) {
3607         PyErr_Format(st->PicklingError,
3608                      "Can't pickle %R: attribute lookup %S on %S failed",
3609                      obj, global_name, module_name);
3610         goto error;
3611     }
3612     if (cls != obj) {
3613         Py_DECREF(cls);
3614         PyErr_Format(st->PicklingError,
3615                      "Can't pickle %R: it's not the same object as %S.%S",
3616                      obj, module_name, global_name);
3617         goto error;
3618     }
3619     Py_DECREF(cls);
3620 
3621     if (self->proto >= 2) {
3622         /* See whether this is in the extension registry, and if
3623          * so generate an EXT opcode.
3624          */
3625         PyObject *extension_key;
3626         PyObject *code_obj;      /* extension code as Python object */
3627         long code;               /* extension code as C value */
3628         char pdata[5];
3629         Py_ssize_t n;
3630 
3631         extension_key = PyTuple_Pack(2, module_name, global_name);
3632         if (extension_key == NULL) {
3633             goto error;
3634         }
3635         code_obj = PyDict_GetItemWithError(st->extension_registry,
3636                                            extension_key);
3637         Py_DECREF(extension_key);
3638         /* The object is not registered in the extension registry.
3639            This is the most likely code path. */
3640         if (code_obj == NULL) {
3641             if (PyErr_Occurred()) {
3642                 goto error;
3643             }
3644             goto gen_global;
3645         }
3646 
3647         /* XXX: pickle.py doesn't check neither the type, nor the range
3648            of the value returned by the extension_registry. It should for
3649            consistency. */
3650 
3651         /* Verify code_obj has the right type and value. */
3652         if (!PyLong_Check(code_obj)) {
3653             PyErr_Format(st->PicklingError,
3654                          "Can't pickle %R: extension code %R isn't an integer",
3655                          obj, code_obj);
3656             goto error;
3657         }
3658         code = PyLong_AS_LONG(code_obj);
3659         if (code <= 0 || code > 0x7fffffffL) {
3660             if (!PyErr_Occurred())
3661                 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3662                              "code %ld is out of range", obj, code);
3663             goto error;
3664         }
3665 
3666         /* Generate an EXT opcode. */
3667         if (code <= 0xff) {
3668             pdata[0] = EXT1;
3669             pdata[1] = (unsigned char)code;
3670             n = 2;
3671         }
3672         else if (code <= 0xffff) {
3673             pdata[0] = EXT2;
3674             pdata[1] = (unsigned char)(code & 0xff);
3675             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3676             n = 3;
3677         }
3678         else {
3679             pdata[0] = EXT4;
3680             pdata[1] = (unsigned char)(code & 0xff);
3681             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3682             pdata[3] = (unsigned char)((code >> 16) & 0xff);
3683             pdata[4] = (unsigned char)((code >> 24) & 0xff);
3684             n = 5;
3685         }
3686 
3687         if (_Pickler_Write(self, pdata, n) < 0)
3688             goto error;
3689     }
3690     else {
3691   gen_global:
3692         if (parent == module) {
3693             Py_INCREF(lastname);
3694             Py_DECREF(global_name);
3695             global_name = lastname;
3696         }
3697         if (self->proto >= 4) {
3698             const char stack_global_op = STACK_GLOBAL;
3699 
3700             if (save(self, module_name, 0) < 0)
3701                 goto error;
3702             if (save(self, global_name, 0) < 0)
3703                 goto error;
3704 
3705             if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3706                 goto error;
3707         }
3708         else if (parent != module) {
3709             PickleState *st = _Pickle_GetGlobalState();
3710             PyObject *reduce_value = Py_BuildValue("(O(OO))",
3711                                         st->getattr, parent, lastname);
3712             if (reduce_value == NULL)
3713                 goto error;
3714             status = save_reduce(self, reduce_value, NULL);
3715             Py_DECREF(reduce_value);
3716             if (status < 0)
3717                 goto error;
3718         }
3719         else {
3720             /* Generate a normal global opcode if we are using a pickle
3721                protocol < 4, or if the object is not registered in the
3722                extension registry. */
3723             PyObject *encoded;
3724             PyObject *(*unicode_encoder)(PyObject *);
3725 
3726             if (_Pickler_Write(self, &global_op, 1) < 0)
3727                 goto error;
3728 
3729             /* For protocol < 3 and if the user didn't request against doing
3730                so, we convert module names to the old 2.x module names. */
3731             if (self->proto < 3 && self->fix_imports) {
3732                 if (fix_imports(&module_name, &global_name) < 0) {
3733                     goto error;
3734                 }
3735             }
3736 
3737             /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3738                both the module name and the global name using UTF-8. We do so
3739                only when we are using the pickle protocol newer than version
3740                3. This is to ensure compatibility with older Unpickler running
3741                on Python 2.x. */
3742             if (self->proto == 3) {
3743                 unicode_encoder = PyUnicode_AsUTF8String;
3744             }
3745             else {
3746                 unicode_encoder = PyUnicode_AsASCIIString;
3747             }
3748             encoded = unicode_encoder(module_name);
3749             if (encoded == NULL) {
3750                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3751                     PyErr_Format(st->PicklingError,
3752                                  "can't pickle module identifier '%S' using "
3753                                  "pickle protocol %i",
3754                                  module_name, self->proto);
3755                 goto error;
3756             }
3757             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3758                                PyBytes_GET_SIZE(encoded)) < 0) {
3759                 Py_DECREF(encoded);
3760                 goto error;
3761             }
3762             Py_DECREF(encoded);
3763             if(_Pickler_Write(self, "\n", 1) < 0)
3764                 goto error;
3765 
3766             /* Save the name of the module. */
3767             encoded = unicode_encoder(global_name);
3768             if (encoded == NULL) {
3769                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3770                     PyErr_Format(st->PicklingError,
3771                                  "can't pickle global identifier '%S' using "
3772                                  "pickle protocol %i",
3773                                  global_name, self->proto);
3774                 goto error;
3775             }
3776             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3777                                PyBytes_GET_SIZE(encoded)) < 0) {
3778                 Py_DECREF(encoded);
3779                 goto error;
3780             }
3781             Py_DECREF(encoded);
3782             if (_Pickler_Write(self, "\n", 1) < 0)
3783                 goto error;
3784         }
3785         /* Memoize the object. */
3786         if (memo_put(self, obj) < 0)
3787             goto error;
3788     }
3789 
3790     if (0) {
3791   error:
3792         status = -1;
3793     }
3794     Py_XDECREF(module_name);
3795     Py_XDECREF(global_name);
3796     Py_XDECREF(module);
3797     Py_XDECREF(parent);
3798     Py_XDECREF(dotted_path);
3799     Py_XDECREF(lastname);
3800 
3801     return status;
3802 }
3803 
3804 static int
save_singleton_type(PicklerObject * self,PyObject * obj,PyObject * singleton)3805 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3806 {
3807     PyObject *reduce_value;
3808     int status;
3809 
3810     reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3811     if (reduce_value == NULL) {
3812         return -1;
3813     }
3814     status = save_reduce(self, reduce_value, obj);
3815     Py_DECREF(reduce_value);
3816     return status;
3817 }
3818 
3819 static int
save_type(PicklerObject * self,PyObject * obj)3820 save_type(PicklerObject *self, PyObject *obj)
3821 {
3822     if (obj == (PyObject *)&_PyNone_Type) {
3823         return save_singleton_type(self, obj, Py_None);
3824     }
3825     else if (obj == (PyObject *)&PyEllipsis_Type) {
3826         return save_singleton_type(self, obj, Py_Ellipsis);
3827     }
3828     else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3829         return save_singleton_type(self, obj, Py_NotImplemented);
3830     }
3831     return save_global(self, obj, NULL);
3832 }
3833 
3834 static int
save_pers(PicklerObject * self,PyObject * obj)3835 save_pers(PicklerObject *self, PyObject *obj)
3836 {
3837     PyObject *pid = NULL;
3838     int status = 0;
3839 
3840     const char persid_op = PERSID;
3841     const char binpersid_op = BINPERSID;
3842 
3843     pid = call_method(self->pers_func, self->pers_func_self, obj);
3844     if (pid == NULL)
3845         return -1;
3846 
3847     if (pid != Py_None) {
3848         if (self->bin) {
3849             if (save(self, pid, 1) < 0 ||
3850                 _Pickler_Write(self, &binpersid_op, 1) < 0)
3851                 goto error;
3852         }
3853         else {
3854             PyObject *pid_str;
3855 
3856             pid_str = PyObject_Str(pid);
3857             if (pid_str == NULL)
3858                 goto error;
3859 
3860             /* XXX: Should it check whether the pid contains embedded
3861                newlines? */
3862             if (!PyUnicode_IS_ASCII(pid_str)) {
3863                 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3864                                 "persistent IDs in protocol 0 must be "
3865                                 "ASCII strings");
3866                 Py_DECREF(pid_str);
3867                 goto error;
3868             }
3869 
3870             if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3871                 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3872                                PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3873                 _Pickler_Write(self, "\n", 1) < 0) {
3874                 Py_DECREF(pid_str);
3875                 goto error;
3876             }
3877             Py_DECREF(pid_str);
3878         }
3879         status = 1;
3880     }
3881 
3882     if (0) {
3883   error:
3884         status = -1;
3885     }
3886     Py_XDECREF(pid);
3887 
3888     return status;
3889 }
3890 
3891 static PyObject *
get_class(PyObject * obj)3892 get_class(PyObject *obj)
3893 {
3894     PyObject *cls;
3895     _Py_IDENTIFIER(__class__);
3896 
3897     if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3898         cls = (PyObject *) Py_TYPE(obj);
3899         Py_INCREF(cls);
3900     }
3901     return cls;
3902 }
3903 
3904 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3905  * appropriate __reduce__ method for obj.
3906  */
3907 static int
save_reduce(PicklerObject * self,PyObject * args,PyObject * obj)3908 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3909 {
3910     PyObject *callable;
3911     PyObject *argtup;
3912     PyObject *state = NULL;
3913     PyObject *listitems = Py_None;
3914     PyObject *dictitems = Py_None;
3915     PyObject *state_setter = Py_None;
3916     PickleState *st = _Pickle_GetGlobalState();
3917     Py_ssize_t size;
3918     int use_newobj = 0, use_newobj_ex = 0;
3919 
3920     const char reduce_op = REDUCE;
3921     const char build_op = BUILD;
3922     const char newobj_op = NEWOBJ;
3923     const char newobj_ex_op = NEWOBJ_EX;
3924 
3925     size = PyTuple_Size(args);
3926     if (size < 2 || size > 6) {
3927         PyErr_SetString(st->PicklingError, "tuple returned by "
3928                         "__reduce__ must contain 2 through 6 elements");
3929         return -1;
3930     }
3931 
3932     if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3933                            &callable, &argtup, &state, &listitems, &dictitems,
3934                            &state_setter))
3935         return -1;
3936 
3937     if (!PyCallable_Check(callable)) {
3938         PyErr_SetString(st->PicklingError, "first item of the tuple "
3939                         "returned by __reduce__ must be callable");
3940         return -1;
3941     }
3942     if (!PyTuple_Check(argtup)) {
3943         PyErr_SetString(st->PicklingError, "second item of the tuple "
3944                         "returned by __reduce__ must be a tuple");
3945         return -1;
3946     }
3947 
3948     if (state == Py_None)
3949         state = NULL;
3950 
3951     if (listitems == Py_None)
3952         listitems = NULL;
3953     else if (!PyIter_Check(listitems)) {
3954         PyErr_Format(st->PicklingError, "fourth element of the tuple "
3955                      "returned by __reduce__ must be an iterator, not %s",
3956                      Py_TYPE(listitems)->tp_name);
3957         return -1;
3958     }
3959 
3960     if (dictitems == Py_None)
3961         dictitems = NULL;
3962     else if (!PyIter_Check(dictitems)) {
3963         PyErr_Format(st->PicklingError, "fifth element of the tuple "
3964                      "returned by __reduce__ must be an iterator, not %s",
3965                      Py_TYPE(dictitems)->tp_name);
3966         return -1;
3967     }
3968 
3969     if (state_setter == Py_None)
3970         state_setter = NULL;
3971     else if (!PyCallable_Check(state_setter)) {
3972         PyErr_Format(st->PicklingError, "sixth element of the tuple "
3973                      "returned by __reduce__ must be a function, not %s",
3974                      Py_TYPE(state_setter)->tp_name);
3975         return -1;
3976     }
3977 
3978     if (self->proto >= 2) {
3979         PyObject *name;
3980         _Py_IDENTIFIER(__name__);
3981 
3982         if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
3983             return -1;
3984         }
3985         if (name != NULL && PyUnicode_Check(name)) {
3986             _Py_IDENTIFIER(__newobj_ex__);
3987             use_newobj_ex = _PyUnicode_EqualToASCIIId(
3988                     name, &PyId___newobj_ex__);
3989             if (!use_newobj_ex) {
3990                 _Py_IDENTIFIER(__newobj__);
3991                 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
3992             }
3993         }
3994         Py_XDECREF(name);
3995     }
3996 
3997     if (use_newobj_ex) {
3998         PyObject *cls;
3999         PyObject *args;
4000         PyObject *kwargs;
4001 
4002         if (PyTuple_GET_SIZE(argtup) != 3) {
4003             PyErr_Format(st->PicklingError,
4004                          "length of the NEWOBJ_EX argument tuple must be "
4005                          "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
4006             return -1;
4007         }
4008 
4009         cls = PyTuple_GET_ITEM(argtup, 0);
4010         if (!PyType_Check(cls)) {
4011             PyErr_Format(st->PicklingError,
4012                          "first item from NEWOBJ_EX argument tuple must "
4013                          "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4014             return -1;
4015         }
4016         args = PyTuple_GET_ITEM(argtup, 1);
4017         if (!PyTuple_Check(args)) {
4018             PyErr_Format(st->PicklingError,
4019                          "second item from NEWOBJ_EX argument tuple must "
4020                          "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4021             return -1;
4022         }
4023         kwargs = PyTuple_GET_ITEM(argtup, 2);
4024         if (!PyDict_Check(kwargs)) {
4025             PyErr_Format(st->PicklingError,
4026                          "third item from NEWOBJ_EX argument tuple must "
4027                          "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4028             return -1;
4029         }
4030 
4031         if (self->proto >= 4) {
4032             if (save(self, cls, 0) < 0 ||
4033                 save(self, args, 0) < 0 ||
4034                 save(self, kwargs, 0) < 0 ||
4035                 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4036                 return -1;
4037             }
4038         }
4039         else {
4040             PyObject *newargs;
4041             PyObject *cls_new;
4042             Py_ssize_t i;
4043             _Py_IDENTIFIER(__new__);
4044 
4045             newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
4046             if (newargs == NULL)
4047                 return -1;
4048 
4049             cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
4050             if (cls_new == NULL) {
4051                 Py_DECREF(newargs);
4052                 return -1;
4053             }
4054             PyTuple_SET_ITEM(newargs, 0, cls_new);
4055             Py_INCREF(cls);
4056             PyTuple_SET_ITEM(newargs, 1, cls);
4057             for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
4058                 PyObject *item = PyTuple_GET_ITEM(args, i);
4059                 Py_INCREF(item);
4060                 PyTuple_SET_ITEM(newargs, i + 2, item);
4061             }
4062 
4063             callable = PyObject_Call(st->partial, newargs, kwargs);
4064             Py_DECREF(newargs);
4065             if (callable == NULL)
4066                 return -1;
4067 
4068             newargs = PyTuple_New(0);
4069             if (newargs == NULL) {
4070                 Py_DECREF(callable);
4071                 return -1;
4072             }
4073 
4074             if (save(self, callable, 0) < 0 ||
4075                 save(self, newargs, 0) < 0 ||
4076                 _Pickler_Write(self, &reduce_op, 1) < 0) {
4077                 Py_DECREF(newargs);
4078                 Py_DECREF(callable);
4079                 return -1;
4080             }
4081             Py_DECREF(newargs);
4082             Py_DECREF(callable);
4083         }
4084     }
4085     else if (use_newobj) {
4086         PyObject *cls;
4087         PyObject *newargtup;
4088         PyObject *obj_class;
4089         int p;
4090 
4091         /* Sanity checks. */
4092         if (PyTuple_GET_SIZE(argtup) < 1) {
4093             PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
4094             return -1;
4095         }
4096 
4097         cls = PyTuple_GET_ITEM(argtup, 0);
4098         if (!PyType_Check(cls)) {
4099             PyErr_SetString(st->PicklingError, "args[0] from "
4100                             "__newobj__ args is not a type");
4101             return -1;
4102         }
4103 
4104         if (obj != NULL) {
4105             obj_class = get_class(obj);
4106             if (obj_class == NULL) {
4107                 return -1;
4108             }
4109             p = obj_class != cls;
4110             Py_DECREF(obj_class);
4111             if (p) {
4112                 PyErr_SetString(st->PicklingError, "args[0] from "
4113                                 "__newobj__ args has the wrong class");
4114                 return -1;
4115             }
4116         }
4117         /* XXX: These calls save() are prone to infinite recursion. Imagine
4118            what happen if the value returned by the __reduce__() method of
4119            some extension type contains another object of the same type. Ouch!
4120 
4121            Here is a quick example, that I ran into, to illustrate what I
4122            mean:
4123 
4124              >>> import pickle, copyreg
4125              >>> copyreg.dispatch_table.pop(complex)
4126              >>> pickle.dumps(1+2j)
4127              Traceback (most recent call last):
4128                ...
4129              RecursionError: maximum recursion depth exceeded
4130 
4131            Removing the complex class from copyreg.dispatch_table made the
4132            __reduce_ex__() method emit another complex object:
4133 
4134              >>> (1+1j).__reduce_ex__(2)
4135              (<function __newobj__ at 0xb7b71c3c>,
4136                (<class 'complex'>, (1+1j)), None, None, None)
4137 
4138            Thus when save() was called on newargstup (the 2nd item) recursion
4139            ensued. Of course, the bug was in the complex class which had a
4140            broken __getnewargs__() that emitted another complex object. But,
4141            the point, here, is it is quite easy to end up with a broken reduce
4142            function. */
4143 
4144         /* Save the class and its __new__ arguments. */
4145         if (save(self, cls, 0) < 0)
4146             return -1;
4147 
4148         newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
4149         if (newargtup == NULL)
4150             return -1;
4151 
4152         p = save(self, newargtup, 0);
4153         Py_DECREF(newargtup);
4154         if (p < 0)
4155             return -1;
4156 
4157         /* Add NEWOBJ opcode. */
4158         if (_Pickler_Write(self, &newobj_op, 1) < 0)
4159             return -1;
4160     }
4161     else { /* Not using NEWOBJ. */
4162         if (save(self, callable, 0) < 0 ||
4163             save(self, argtup, 0) < 0 ||
4164             _Pickler_Write(self, &reduce_op, 1) < 0)
4165             return -1;
4166     }
4167 
4168     /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4169        the caller do not want to memoize the object. Not particularly useful,
4170        but that is to mimic the behavior save_reduce() in pickle.py when
4171        obj is None. */
4172     if (obj != NULL) {
4173         /* If the object is already in the memo, this means it is
4174            recursive. In this case, throw away everything we put on the
4175            stack, and fetch the object back from the memo. */
4176         if (PyMemoTable_Get(self->memo, obj)) {
4177             const char pop_op = POP;
4178 
4179             if (_Pickler_Write(self, &pop_op, 1) < 0)
4180                 return -1;
4181             if (memo_get(self, obj) < 0)
4182                 return -1;
4183 
4184             return 0;
4185         }
4186         else if (memo_put(self, obj) < 0)
4187             return -1;
4188     }
4189 
4190     if (listitems && batch_list(self, listitems) < 0)
4191         return -1;
4192 
4193     if (dictitems && batch_dict(self, dictitems) < 0)
4194         return -1;
4195 
4196     if (state) {
4197         if (state_setter == NULL) {
4198             if (save(self, state, 0) < 0 ||
4199                 _Pickler_Write(self, &build_op, 1) < 0)
4200                 return -1;
4201         }
4202         else {
4203 
4204             /* If a state_setter is specified, call it instead of load_build to
4205              * update obj's with its previous state.
4206              * The first 4 save/write instructions push state_setter and its
4207              * tuple of expected arguments (obj, state) onto the stack. The
4208              * REDUCE opcode triggers the state_setter(obj, state) function
4209              * call. Finally, because state-updating routines only do in-place
4210              * modification, the whole operation has to be stack-transparent.
4211              * Thus, we finally pop the call's output from the stack.*/
4212 
4213             const char tupletwo_op = TUPLE2;
4214             const char pop_op = POP;
4215             if (save(self, state_setter, 0) < 0 ||
4216                 save(self, obj, 0) < 0 || save(self, state, 0) < 0 ||
4217                 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4218                 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4219                 _Pickler_Write(self, &pop_op, 1) < 0)
4220                 return -1;
4221         }
4222     }
4223     return 0;
4224 }
4225 
4226 static int
save(PicklerObject * self,PyObject * obj,int pers_save)4227 save(PicklerObject *self, PyObject *obj, int pers_save)
4228 {
4229     PyTypeObject *type;
4230     PyObject *reduce_func = NULL;
4231     PyObject *reduce_value = NULL;
4232     int status = 0;
4233 
4234     if (_Pickler_OpcodeBoundary(self) < 0)
4235         return -1;
4236 
4237     /* The extra pers_save argument is necessary to avoid calling save_pers()
4238        on its returned object. */
4239     if (!pers_save && self->pers_func) {
4240         /* save_pers() returns:
4241             -1   to signal an error;
4242              0   if it did nothing successfully;
4243              1   if a persistent id was saved.
4244          */
4245         if ((status = save_pers(self, obj)) != 0)
4246             return status;
4247     }
4248 
4249     type = Py_TYPE(obj);
4250 
4251     /* The old cPickle had an optimization that used switch-case statement
4252        dispatching on the first letter of the type name.  This has was removed
4253        since benchmarks shown that this optimization was actually slowing
4254        things down. */
4255 
4256     /* Atom types; these aren't memoized, so don't check the memo. */
4257 
4258     if (obj == Py_None) {
4259         return save_none(self, obj);
4260     }
4261     else if (obj == Py_False || obj == Py_True) {
4262         return save_bool(self, obj);
4263     }
4264     else if (type == &PyLong_Type) {
4265         return save_long(self, obj);
4266     }
4267     else if (type == &PyFloat_Type) {
4268         return save_float(self, obj);
4269     }
4270 
4271     /* Check the memo to see if it has the object. If so, generate
4272        a GET (or BINGET) opcode, instead of pickling the object
4273        once again. */
4274     if (PyMemoTable_Get(self->memo, obj)) {
4275         return memo_get(self, obj);
4276     }
4277 
4278     if (type == &PyBytes_Type) {
4279         return save_bytes(self, obj);
4280     }
4281     else if (type == &PyUnicode_Type) {
4282         return save_unicode(self, obj);
4283     }
4284 
4285     /* We're only calling Py_EnterRecursiveCall here so that atomic
4286        types above are pickled faster. */
4287     if (Py_EnterRecursiveCall(" while pickling an object")) {
4288         return -1;
4289     }
4290 
4291     if (type == &PyDict_Type) {
4292         status = save_dict(self, obj);
4293         goto done;
4294     }
4295     else if (type == &PySet_Type) {
4296         status = save_set(self, obj);
4297         goto done;
4298     }
4299     else if (type == &PyFrozenSet_Type) {
4300         status = save_frozenset(self, obj);
4301         goto done;
4302     }
4303     else if (type == &PyList_Type) {
4304         status = save_list(self, obj);
4305         goto done;
4306     }
4307     else if (type == &PyTuple_Type) {
4308         status = save_tuple(self, obj);
4309         goto done;
4310     }
4311     else if (type == &PyByteArray_Type) {
4312         status = save_bytearray(self, obj);
4313         goto done;
4314     }
4315     else if (type == &PyPickleBuffer_Type) {
4316         status = save_picklebuffer(self, obj);
4317         goto done;
4318     }
4319 
4320     /* Now, check reducer_override.  If it returns NotImplemented,
4321      * fallback to save_type or save_global, and then perhaps to the
4322      * regular reduction mechanism.
4323      */
4324     if (self->reducer_override != NULL) {
4325         reduce_value = PyObject_CallFunctionObjArgs(self->reducer_override,
4326                                                     obj, NULL);
4327         if (reduce_value == NULL) {
4328             goto error;
4329         }
4330         if (reduce_value != Py_NotImplemented) {
4331             goto reduce;
4332         }
4333         Py_DECREF(reduce_value);
4334         reduce_value = NULL;
4335     }
4336 
4337     if (type == &PyType_Type) {
4338         status = save_type(self, obj);
4339         goto done;
4340     }
4341     else if (type == &PyFunction_Type) {
4342         status = save_global(self, obj, NULL);
4343         goto done;
4344     }
4345 
4346     /* XXX: This part needs some unit tests. */
4347 
4348     /* Get a reduction callable, and call it.  This may come from
4349      * self.dispatch_table, copyreg.dispatch_table, the object's
4350      * __reduce_ex__ method, or the object's __reduce__ method.
4351      */
4352     if (self->dispatch_table == NULL) {
4353         PickleState *st = _Pickle_GetGlobalState();
4354         reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4355                                               (PyObject *)type);
4356         if (reduce_func == NULL) {
4357             if (PyErr_Occurred()) {
4358                 goto error;
4359             }
4360         } else {
4361             /* PyDict_GetItemWithError() returns a borrowed reference.
4362                Increase the reference count to be consistent with
4363                PyObject_GetItem and _PyObject_GetAttrId used below. */
4364             Py_INCREF(reduce_func);
4365         }
4366     } else {
4367         reduce_func = PyObject_GetItem(self->dispatch_table,
4368                                        (PyObject *)type);
4369         if (reduce_func == NULL) {
4370             if (PyErr_ExceptionMatches(PyExc_KeyError))
4371                 PyErr_Clear();
4372             else
4373                 goto error;
4374         }
4375     }
4376     if (reduce_func != NULL) {
4377         Py_INCREF(obj);
4378         reduce_value = _Pickle_FastCall(reduce_func, obj);
4379     }
4380     else if (PyType_IsSubtype(type, &PyType_Type)) {
4381         status = save_global(self, obj, NULL);
4382         goto done;
4383     }
4384     else {
4385         _Py_IDENTIFIER(__reduce__);
4386         _Py_IDENTIFIER(__reduce_ex__);
4387 
4388         /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4389            automatically defined as __reduce__. While this is convenient, this
4390            make it impossible to know which method was actually called. Of
4391            course, this is not a big deal. But still, it would be nice to let
4392            the user know which method was called when something go
4393            wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4394            don't actually have to check for a __reduce__ method. */
4395 
4396         /* Check for a __reduce_ex__ method. */
4397         if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4398             goto error;
4399         }
4400         if (reduce_func != NULL) {
4401             PyObject *proto;
4402             proto = PyLong_FromLong(self->proto);
4403             if (proto != NULL) {
4404                 reduce_value = _Pickle_FastCall(reduce_func, proto);
4405             }
4406         }
4407         else {
4408             /* Check for a __reduce__ method. */
4409             if (_PyObject_LookupAttrId(obj, &PyId___reduce__, &reduce_func) < 0) {
4410                 goto error;
4411             }
4412             if (reduce_func != NULL) {
4413                 reduce_value = _PyObject_CallNoArg(reduce_func);
4414             }
4415             else {
4416                 PickleState *st = _Pickle_GetGlobalState();
4417                 PyErr_Format(st->PicklingError,
4418                              "can't pickle '%.200s' object: %R",
4419                              type->tp_name, obj);
4420                 goto error;
4421             }
4422         }
4423     }
4424 
4425     if (reduce_value == NULL)
4426         goto error;
4427 
4428   reduce:
4429     if (PyUnicode_Check(reduce_value)) {
4430         status = save_global(self, obj, reduce_value);
4431         goto done;
4432     }
4433 
4434     if (!PyTuple_Check(reduce_value)) {
4435         PickleState *st = _Pickle_GetGlobalState();
4436         PyErr_SetString(st->PicklingError,
4437                         "__reduce__ must return a string or tuple");
4438         goto error;
4439     }
4440 
4441     status = save_reduce(self, reduce_value, obj);
4442 
4443     if (0) {
4444   error:
4445         status = -1;
4446     }
4447   done:
4448 
4449     Py_LeaveRecursiveCall();
4450     Py_XDECREF(reduce_func);
4451     Py_XDECREF(reduce_value);
4452 
4453     return status;
4454 }
4455 
4456 static int
dump(PicklerObject * self,PyObject * obj)4457 dump(PicklerObject *self, PyObject *obj)
4458 {
4459     const char stop_op = STOP;
4460     PyObject *tmp;
4461     _Py_IDENTIFIER(reducer_override);
4462 
4463     if (_PyObject_LookupAttrId((PyObject *)self, &PyId_reducer_override,
4464                                &tmp) < 0) {
4465         return -1;
4466     }
4467     /* Cache the reducer_override method, if it exists. */
4468     if (tmp != NULL) {
4469         Py_XSETREF(self->reducer_override, tmp);
4470     }
4471     else {
4472         Py_CLEAR(self->reducer_override);
4473     }
4474 
4475     if (self->proto >= 2) {
4476         char header[2];
4477 
4478         header[0] = PROTO;
4479         assert(self->proto >= 0 && self->proto < 256);
4480         header[1] = (unsigned char)self->proto;
4481         if (_Pickler_Write(self, header, 2) < 0)
4482             return -1;
4483         if (self->proto >= 4)
4484             self->framing = 1;
4485     }
4486 
4487     if (save(self, obj, 0) < 0 ||
4488         _Pickler_Write(self, &stop_op, 1) < 0 ||
4489         _Pickler_CommitFrame(self) < 0)
4490         return -1;
4491     self->framing = 0;
4492     return 0;
4493 }
4494 
4495 /*[clinic input]
4496 
4497 _pickle.Pickler.clear_memo
4498 
4499 Clears the pickler's "memo".
4500 
4501 The memo is the data structure that remembers which objects the
4502 pickler has already seen, so that shared or recursive objects are
4503 pickled by reference and not by value.  This method is useful when
4504 re-using picklers.
4505 [clinic start generated code]*/
4506 
4507 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4508 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4509 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4510 {
4511     if (self->memo)
4512         PyMemoTable_Clear(self->memo);
4513 
4514     Py_RETURN_NONE;
4515 }
4516 
4517 /*[clinic input]
4518 
4519 _pickle.Pickler.dump
4520 
4521   obj: object
4522   /
4523 
4524 Write a pickled representation of the given object to the open file.
4525 [clinic start generated code]*/
4526 
4527 static PyObject *
_pickle_Pickler_dump(PicklerObject * self,PyObject * obj)4528 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4529 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4530 {
4531     /* Check whether the Pickler was initialized correctly (issue3664).
4532        Developers often forget to call __init__() in their subclasses, which
4533        would trigger a segfault without this check. */
4534     if (self->write == NULL) {
4535         PickleState *st = _Pickle_GetGlobalState();
4536         PyErr_Format(st->PicklingError,
4537                      "Pickler.__init__() was not called by %s.__init__()",
4538                      Py_TYPE(self)->tp_name);
4539         return NULL;
4540     }
4541 
4542     if (_Pickler_ClearBuffer(self) < 0)
4543         return NULL;
4544 
4545     if (dump(self, obj) < 0)
4546         return NULL;
4547 
4548     if (_Pickler_FlushToFile(self) < 0)
4549         return NULL;
4550 
4551     Py_RETURN_NONE;
4552 }
4553 
4554 /*[clinic input]
4555 
4556 _pickle.Pickler.__sizeof__ -> Py_ssize_t
4557 
4558 Returns size in memory, in bytes.
4559 [clinic start generated code]*/
4560 
4561 static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4562 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4563 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4564 {
4565     Py_ssize_t res, s;
4566 
4567     res = _PyObject_SIZE(Py_TYPE(self));
4568     if (self->memo != NULL) {
4569         res += sizeof(PyMemoTable);
4570         res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4571     }
4572     if (self->output_buffer != NULL) {
4573         s = _PySys_GetSizeOf(self->output_buffer);
4574         if (s == -1)
4575             return -1;
4576         res += s;
4577     }
4578     return res;
4579 }
4580 
4581 static struct PyMethodDef Pickler_methods[] = {
4582     _PICKLE_PICKLER_DUMP_METHODDEF
4583     _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4584     _PICKLE_PICKLER___SIZEOF___METHODDEF
4585     {NULL, NULL}                /* sentinel */
4586 };
4587 
4588 static void
Pickler_dealloc(PicklerObject * self)4589 Pickler_dealloc(PicklerObject *self)
4590 {
4591     PyObject_GC_UnTrack(self);
4592 
4593     Py_XDECREF(self->output_buffer);
4594     Py_XDECREF(self->write);
4595     Py_XDECREF(self->pers_func);
4596     Py_XDECREF(self->dispatch_table);
4597     Py_XDECREF(self->fast_memo);
4598     Py_XDECREF(self->reducer_override);
4599     Py_XDECREF(self->buffer_callback);
4600 
4601     PyMemoTable_Del(self->memo);
4602 
4603     Py_TYPE(self)->tp_free((PyObject *)self);
4604 }
4605 
4606 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4607 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4608 {
4609     Py_VISIT(self->write);
4610     Py_VISIT(self->pers_func);
4611     Py_VISIT(self->dispatch_table);
4612     Py_VISIT(self->fast_memo);
4613     Py_VISIT(self->reducer_override);
4614     Py_VISIT(self->buffer_callback);
4615     return 0;
4616 }
4617 
4618 static int
Pickler_clear(PicklerObject * self)4619 Pickler_clear(PicklerObject *self)
4620 {
4621     Py_CLEAR(self->output_buffer);
4622     Py_CLEAR(self->write);
4623     Py_CLEAR(self->pers_func);
4624     Py_CLEAR(self->dispatch_table);
4625     Py_CLEAR(self->fast_memo);
4626     Py_CLEAR(self->reducer_override);
4627     Py_CLEAR(self->buffer_callback);
4628 
4629     if (self->memo != NULL) {
4630         PyMemoTable *memo = self->memo;
4631         self->memo = NULL;
4632         PyMemoTable_Del(memo);
4633     }
4634     return 0;
4635 }
4636 
4637 
4638 /*[clinic input]
4639 
4640 _pickle.Pickler.__init__
4641 
4642   file: object
4643   protocol: object = None
4644   fix_imports: bool = True
4645   buffer_callback: object = None
4646 
4647 This takes a binary file for writing a pickle data stream.
4648 
4649 The optional *protocol* argument tells the pickler to use the given
4650 protocol; supported protocols are 0, 1, 2, 3 and 4.  The default
4651 protocol is 3; a backward-incompatible protocol designed for Python 3.
4652 
4653 Specifying a negative protocol version selects the highest protocol
4654 version supported.  The higher the protocol used, the more recent the
4655 version of Python needed to read the pickle produced.
4656 
4657 The *file* argument must have a write() method that accepts a single
4658 bytes argument. It can thus be a file object opened for binary
4659 writing, an io.BytesIO instance, or any other custom object that meets
4660 this interface.
4661 
4662 If *fix_imports* is True and protocol is less than 3, pickle will try
4663 to map the new Python 3 names to the old module names used in Python
4664 2, so that the pickle data stream is readable with Python 2.
4665 
4666 If *buffer_callback* is None (the default), buffer views are
4667 serialized into *file* as part of the pickle stream.
4668 
4669 If *buffer_callback* is not None, then it can be called any number
4670 of times with a buffer view.  If the callback returns a false value
4671 (such as None), the given buffer is out-of-band; otherwise the
4672 buffer is serialized in-band, i.e. inside the pickle stream.
4673 
4674 It is an error if *buffer_callback* is not None and *protocol*
4675 is None or smaller than 5.
4676 
4677 [clinic start generated code]*/
4678 
4679 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)4680 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4681                               PyObject *protocol, int fix_imports,
4682                               PyObject *buffer_callback)
4683 /*[clinic end generated code: output=0abedc50590d259b input=bb886e00443a7811]*/
4684 {
4685     _Py_IDENTIFIER(persistent_id);
4686     _Py_IDENTIFIER(dispatch_table);
4687 
4688     /* In case of multiple __init__() calls, clear previous content. */
4689     if (self->write != NULL)
4690         (void)Pickler_clear(self);
4691 
4692     if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4693         return -1;
4694 
4695     if (_Pickler_SetOutputStream(self, file) < 0)
4696         return -1;
4697 
4698     if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4699         return -1;
4700 
4701     /* memo and output_buffer may have already been created in _Pickler_New */
4702     if (self->memo == NULL) {
4703         self->memo = PyMemoTable_New();
4704         if (self->memo == NULL)
4705             return -1;
4706     }
4707     self->output_len = 0;
4708     if (self->output_buffer == NULL) {
4709         self->max_output_len = WRITE_BUF_SIZE;
4710         self->output_buffer = PyBytes_FromStringAndSize(NULL,
4711                                                         self->max_output_len);
4712         if (self->output_buffer == NULL)
4713             return -1;
4714     }
4715 
4716     self->fast = 0;
4717     self->fast_nesting = 0;
4718     self->fast_memo = NULL;
4719 
4720     if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4721                         &self->pers_func, &self->pers_func_self) < 0)
4722     {
4723         return -1;
4724     }
4725 
4726     if (_PyObject_LookupAttrId((PyObject *)self,
4727                                     &PyId_dispatch_table, &self->dispatch_table) < 0) {
4728         return -1;
4729     }
4730 
4731     return 0;
4732 }
4733 
4734 
4735 /* Define a proxy object for the Pickler's internal memo object. This is to
4736  * avoid breaking code like:
4737  *  pickler.memo.clear()
4738  * and
4739  *  pickler.memo = saved_memo
4740  * Is this a good idea? Not really, but we don't want to break code that uses
4741  * it. Note that we don't implement the entire mapping API here. This is
4742  * intentional, as these should be treated as black-box implementation details.
4743  */
4744 
4745 /*[clinic input]
4746 _pickle.PicklerMemoProxy.clear
4747 
4748 Remove all items from memo.
4749 [clinic start generated code]*/
4750 
4751 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4752 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4753 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4754 {
4755     if (self->pickler->memo)
4756         PyMemoTable_Clear(self->pickler->memo);
4757     Py_RETURN_NONE;
4758 }
4759 
4760 /*[clinic input]
4761 _pickle.PicklerMemoProxy.copy
4762 
4763 Copy the memo to a new object.
4764 [clinic start generated code]*/
4765 
4766 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4767 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4768 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4769 {
4770     PyMemoTable *memo;
4771     PyObject *new_memo = PyDict_New();
4772     if (new_memo == NULL)
4773         return NULL;
4774 
4775     memo = self->pickler->memo;
4776     for (size_t i = 0; i < memo->mt_allocated; ++i) {
4777         PyMemoEntry entry = memo->mt_table[i];
4778         if (entry.me_key != NULL) {
4779             int status;
4780             PyObject *key, *value;
4781 
4782             key = PyLong_FromVoidPtr(entry.me_key);
4783             value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4784 
4785             if (key == NULL || value == NULL) {
4786                 Py_XDECREF(key);
4787                 Py_XDECREF(value);
4788                 goto error;
4789             }
4790             status = PyDict_SetItem(new_memo, key, value);
4791             Py_DECREF(key);
4792             Py_DECREF(value);
4793             if (status < 0)
4794                 goto error;
4795         }
4796     }
4797     return new_memo;
4798 
4799   error:
4800     Py_XDECREF(new_memo);
4801     return NULL;
4802 }
4803 
4804 /*[clinic input]
4805 _pickle.PicklerMemoProxy.__reduce__
4806 
4807 Implement pickle support.
4808 [clinic start generated code]*/
4809 
4810 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4811 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4812 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4813 {
4814     PyObject *reduce_value, *dict_args;
4815     PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4816     if (contents == NULL)
4817         return NULL;
4818 
4819     reduce_value = PyTuple_New(2);
4820     if (reduce_value == NULL) {
4821         Py_DECREF(contents);
4822         return NULL;
4823     }
4824     dict_args = PyTuple_New(1);
4825     if (dict_args == NULL) {
4826         Py_DECREF(contents);
4827         Py_DECREF(reduce_value);
4828         return NULL;
4829     }
4830     PyTuple_SET_ITEM(dict_args, 0, contents);
4831     Py_INCREF((PyObject *)&PyDict_Type);
4832     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4833     PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4834     return reduce_value;
4835 }
4836 
4837 static PyMethodDef picklerproxy_methods[] = {
4838     _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4839     _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4840     _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4841     {NULL, NULL} /* sentinel */
4842 };
4843 
4844 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4845 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4846 {
4847     PyObject_GC_UnTrack(self);
4848     Py_XDECREF(self->pickler);
4849     PyObject_GC_Del((PyObject *)self);
4850 }
4851 
4852 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4853 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4854                           visitproc visit, void *arg)
4855 {
4856     Py_VISIT(self->pickler);
4857     return 0;
4858 }
4859 
4860 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4861 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4862 {
4863     Py_CLEAR(self->pickler);
4864     return 0;
4865 }
4866 
4867 static PyTypeObject PicklerMemoProxyType = {
4868     PyVarObject_HEAD_INIT(NULL, 0)
4869     "_pickle.PicklerMemoProxy",                 /*tp_name*/
4870     sizeof(PicklerMemoProxyObject),             /*tp_basicsize*/
4871     0,
4872     (destructor)PicklerMemoProxy_dealloc,       /* tp_dealloc */
4873     0,                                          /* tp_vectorcall_offset */
4874     0,                                          /* tp_getattr */
4875     0,                                          /* tp_setattr */
4876     0,                                          /* tp_as_async */
4877     0,                                          /* tp_repr */
4878     0,                                          /* tp_as_number */
4879     0,                                          /* tp_as_sequence */
4880     0,                                          /* tp_as_mapping */
4881     PyObject_HashNotImplemented,                /* tp_hash */
4882     0,                                          /* tp_call */
4883     0,                                          /* tp_str */
4884     PyObject_GenericGetAttr,                    /* tp_getattro */
4885     PyObject_GenericSetAttr,                    /* tp_setattro */
4886     0,                                          /* tp_as_buffer */
4887     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4888     0,                                          /* tp_doc */
4889     (traverseproc)PicklerMemoProxy_traverse,    /* tp_traverse */
4890     (inquiry)PicklerMemoProxy_clear,            /* tp_clear */
4891     0,                                          /* tp_richcompare */
4892     0,                                          /* tp_weaklistoffset */
4893     0,                                          /* tp_iter */
4894     0,                                          /* tp_iternext */
4895     picklerproxy_methods,                       /* tp_methods */
4896 };
4897 
4898 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4899 PicklerMemoProxy_New(PicklerObject *pickler)
4900 {
4901     PicklerMemoProxyObject *self;
4902 
4903     self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4904     if (self == NULL)
4905         return NULL;
4906     Py_INCREF(pickler);
4907     self->pickler = pickler;
4908     PyObject_GC_Track(self);
4909     return (PyObject *)self;
4910 }
4911 
4912 /*****************************************************************************/
4913 
4914 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4915 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4916 {
4917     return PicklerMemoProxy_New(self);
4918 }
4919 
4920 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4921 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4922 {
4923     PyMemoTable *new_memo = NULL;
4924 
4925     if (obj == NULL) {
4926         PyErr_SetString(PyExc_TypeError,
4927                         "attribute deletion is not supported");
4928         return -1;
4929     }
4930 
4931     if (Py_TYPE(obj) == &PicklerMemoProxyType) {
4932         PicklerObject *pickler =
4933             ((PicklerMemoProxyObject *)obj)->pickler;
4934 
4935         new_memo = PyMemoTable_Copy(pickler->memo);
4936         if (new_memo == NULL)
4937             return -1;
4938     }
4939     else if (PyDict_Check(obj)) {
4940         Py_ssize_t i = 0;
4941         PyObject *key, *value;
4942 
4943         new_memo = PyMemoTable_New();
4944         if (new_memo == NULL)
4945             return -1;
4946 
4947         while (PyDict_Next(obj, &i, &key, &value)) {
4948             Py_ssize_t memo_id;
4949             PyObject *memo_obj;
4950 
4951             if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
4952                 PyErr_SetString(PyExc_TypeError,
4953                                 "'memo' values must be 2-item tuples");
4954                 goto error;
4955             }
4956             memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
4957             if (memo_id == -1 && PyErr_Occurred())
4958                 goto error;
4959             memo_obj = PyTuple_GET_ITEM(value, 1);
4960             if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
4961                 goto error;
4962         }
4963     }
4964     else {
4965         PyErr_Format(PyExc_TypeError,
4966                      "'memo' attribute must be a PicklerMemoProxy object "
4967                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
4968         return -1;
4969     }
4970 
4971     PyMemoTable_Del(self->memo);
4972     self->memo = new_memo;
4973 
4974     return 0;
4975 
4976   error:
4977     if (new_memo)
4978         PyMemoTable_Del(new_memo);
4979     return -1;
4980 }
4981 
4982 static PyObject *
Pickler_get_persid(PicklerObject * self,void * Py_UNUSED (ignored))4983 Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
4984 {
4985     if (self->pers_func == NULL) {
4986         PyErr_SetString(PyExc_AttributeError, "persistent_id");
4987         return NULL;
4988     }
4989     return reconstruct_method(self->pers_func, self->pers_func_self);
4990 }
4991 
4992 static int
Pickler_set_persid(PicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))4993 Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
4994 {
4995     if (value == NULL) {
4996         PyErr_SetString(PyExc_TypeError,
4997                         "attribute deletion is not supported");
4998         return -1;
4999     }
5000     if (!PyCallable_Check(value)) {
5001         PyErr_SetString(PyExc_TypeError,
5002                         "persistent_id must be a callable taking one argument");
5003         return -1;
5004     }
5005 
5006     self->pers_func_self = NULL;
5007     Py_INCREF(value);
5008     Py_XSETREF(self->pers_func, value);
5009 
5010     return 0;
5011 }
5012 
5013 static PyMemberDef Pickler_members[] = {
5014     {"bin", T_INT, offsetof(PicklerObject, bin)},
5015     {"fast", T_INT, offsetof(PicklerObject, fast)},
5016     {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
5017     {NULL}
5018 };
5019 
5020 static PyGetSetDef Pickler_getsets[] = {
5021     {"memo",          (getter)Pickler_get_memo,
5022                       (setter)Pickler_set_memo},
5023     {"persistent_id", (getter)Pickler_get_persid,
5024                       (setter)Pickler_set_persid},
5025     {NULL}
5026 };
5027 
5028 static PyTypeObject Pickler_Type = {
5029     PyVarObject_HEAD_INIT(NULL, 0)
5030     "_pickle.Pickler"  ,                /*tp_name*/
5031     sizeof(PicklerObject),              /*tp_basicsize*/
5032     0,                                  /*tp_itemsize*/
5033     (destructor)Pickler_dealloc,        /*tp_dealloc*/
5034     0,                                  /*tp_vectorcall_offset*/
5035     0,                                  /*tp_getattr*/
5036     0,                                  /*tp_setattr*/
5037     0,                                  /*tp_as_async*/
5038     0,                                  /*tp_repr*/
5039     0,                                  /*tp_as_number*/
5040     0,                                  /*tp_as_sequence*/
5041     0,                                  /*tp_as_mapping*/
5042     0,                                  /*tp_hash*/
5043     0,                                  /*tp_call*/
5044     0,                                  /*tp_str*/
5045     0,                                  /*tp_getattro*/
5046     0,                                  /*tp_setattro*/
5047     0,                                  /*tp_as_buffer*/
5048     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5049     _pickle_Pickler___init____doc__,    /*tp_doc*/
5050     (traverseproc)Pickler_traverse,     /*tp_traverse*/
5051     (inquiry)Pickler_clear,             /*tp_clear*/
5052     0,                                  /*tp_richcompare*/
5053     0,                                  /*tp_weaklistoffset*/
5054     0,                                  /*tp_iter*/
5055     0,                                  /*tp_iternext*/
5056     Pickler_methods,                    /*tp_methods*/
5057     Pickler_members,                    /*tp_members*/
5058     Pickler_getsets,                    /*tp_getset*/
5059     0,                                  /*tp_base*/
5060     0,                                  /*tp_dict*/
5061     0,                                  /*tp_descr_get*/
5062     0,                                  /*tp_descr_set*/
5063     0,                                  /*tp_dictoffset*/
5064     _pickle_Pickler___init__,           /*tp_init*/
5065     PyType_GenericAlloc,                /*tp_alloc*/
5066     PyType_GenericNew,                  /*tp_new*/
5067     PyObject_GC_Del,                    /*tp_free*/
5068     0,                                  /*tp_is_gc*/
5069 };
5070 
5071 /* Temporary helper for calling self.find_class().
5072 
5073    XXX: It would be nice to able to avoid Python function call overhead, by
5074    using directly the C version of find_class(), when find_class() is not
5075    overridden by a subclass. Although, this could become rather hackish. A
5076    simpler optimization would be to call the C function when self is not a
5077    subclass instance. */
5078 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)5079 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5080 {
5081     _Py_IDENTIFIER(find_class);
5082 
5083     return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
5084                                          module_name, global_name, NULL);
5085 }
5086 
5087 static Py_ssize_t
marker(UnpicklerObject * self)5088 marker(UnpicklerObject *self)
5089 {
5090     Py_ssize_t mark;
5091 
5092     if (self->num_marks < 1) {
5093         PickleState *st = _Pickle_GetGlobalState();
5094         PyErr_SetString(st->UnpicklingError, "could not find MARK");
5095         return -1;
5096     }
5097 
5098     mark = self->marks[--self->num_marks];
5099     self->stack->mark_set = self->num_marks != 0;
5100     self->stack->fence = self->num_marks ?
5101             self->marks[self->num_marks - 1] : 0;
5102     return mark;
5103 }
5104 
5105 static int
load_none(UnpicklerObject * self)5106 load_none(UnpicklerObject *self)
5107 {
5108     PDATA_APPEND(self->stack, Py_None, -1);
5109     return 0;
5110 }
5111 
5112 static int
load_int(UnpicklerObject * self)5113 load_int(UnpicklerObject *self)
5114 {
5115     PyObject *value;
5116     char *endptr, *s;
5117     Py_ssize_t len;
5118     long x;
5119 
5120     if ((len = _Unpickler_Readline(self, &s)) < 0)
5121         return -1;
5122     if (len < 2)
5123         return bad_readline();
5124 
5125     errno = 0;
5126     /* XXX: Should the base argument of strtol() be explicitly set to 10?
5127        XXX(avassalotti): Should this uses PyOS_strtol()? */
5128     x = strtol(s, &endptr, 0);
5129 
5130     if (errno || (*endptr != '\n' && *endptr != '\0')) {
5131         /* Hm, maybe we've got something long.  Let's try reading
5132          * it as a Python int object. */
5133         errno = 0;
5134         /* XXX: Same thing about the base here. */
5135         value = PyLong_FromString(s, NULL, 0);
5136         if (value == NULL) {
5137             PyErr_SetString(PyExc_ValueError,
5138                             "could not convert string to int");
5139             return -1;
5140         }
5141     }
5142     else {
5143         if (len == 3 && (x == 0 || x == 1)) {
5144             if ((value = PyBool_FromLong(x)) == NULL)
5145                 return -1;
5146         }
5147         else {
5148             if ((value = PyLong_FromLong(x)) == NULL)
5149                 return -1;
5150         }
5151     }
5152 
5153     PDATA_PUSH(self->stack, value, -1);
5154     return 0;
5155 }
5156 
5157 static int
load_bool(UnpicklerObject * self,PyObject * boolean)5158 load_bool(UnpicklerObject *self, PyObject *boolean)
5159 {
5160     assert(boolean == Py_True || boolean == Py_False);
5161     PDATA_APPEND(self->stack, boolean, -1);
5162     return 0;
5163 }
5164 
5165 /* s contains x bytes of an unsigned little-endian integer.  Return its value
5166  * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5167  */
5168 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)5169 calc_binsize(char *bytes, int nbytes)
5170 {
5171     unsigned char *s = (unsigned char *)bytes;
5172     int i;
5173     size_t x = 0;
5174 
5175     if (nbytes > (int)sizeof(size_t)) {
5176         /* Check for integer overflow.  BINBYTES8 and BINUNICODE8 opcodes
5177          * have 64-bit size that can't be represented on 32-bit platform.
5178          */
5179         for (i = (int)sizeof(size_t); i < nbytes; i++) {
5180             if (s[i])
5181                 return -1;
5182         }
5183         nbytes = (int)sizeof(size_t);
5184     }
5185     for (i = 0; i < nbytes; i++) {
5186         x |= (size_t) s[i] << (8 * i);
5187     }
5188 
5189     if (x > PY_SSIZE_T_MAX)
5190         return -1;
5191     else
5192         return (Py_ssize_t) x;
5193 }
5194 
5195 /* s contains x bytes of a little-endian integer.  Return its value as a
5196  * C int.  Obscure:  when x is 1 or 2, this is an unsigned little-endian
5197  * int, but when x is 4 it's a signed one.  This is a historical source
5198  * of x-platform bugs.
5199  */
5200 static long
calc_binint(char * bytes,int nbytes)5201 calc_binint(char *bytes, int nbytes)
5202 {
5203     unsigned char *s = (unsigned char *)bytes;
5204     Py_ssize_t i;
5205     long x = 0;
5206 
5207     for (i = 0; i < nbytes; i++) {
5208         x |= (long)s[i] << (8 * i);
5209     }
5210 
5211     /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5212      * is signed, so on a box with longs bigger than 4 bytes we need
5213      * to extend a BININT's sign bit to the full width.
5214      */
5215     if (SIZEOF_LONG > 4 && nbytes == 4) {
5216         x |= -(x & (1L << 31));
5217     }
5218 
5219     return x;
5220 }
5221 
5222 static int
load_binintx(UnpicklerObject * self,char * s,int size)5223 load_binintx(UnpicklerObject *self, char *s, int size)
5224 {
5225     PyObject *value;
5226     long x;
5227 
5228     x = calc_binint(s, size);
5229 
5230     if ((value = PyLong_FromLong(x)) == NULL)
5231         return -1;
5232 
5233     PDATA_PUSH(self->stack, value, -1);
5234     return 0;
5235 }
5236 
5237 static int
load_binint(UnpicklerObject * self)5238 load_binint(UnpicklerObject *self)
5239 {
5240     char *s;
5241 
5242     if (_Unpickler_Read(self, &s, 4) < 0)
5243         return -1;
5244 
5245     return load_binintx(self, s, 4);
5246 }
5247 
5248 static int
load_binint1(UnpicklerObject * self)5249 load_binint1(UnpicklerObject *self)
5250 {
5251     char *s;
5252 
5253     if (_Unpickler_Read(self, &s, 1) < 0)
5254         return -1;
5255 
5256     return load_binintx(self, s, 1);
5257 }
5258 
5259 static int
load_binint2(UnpicklerObject * self)5260 load_binint2(UnpicklerObject *self)
5261 {
5262     char *s;
5263 
5264     if (_Unpickler_Read(self, &s, 2) < 0)
5265         return -1;
5266 
5267     return load_binintx(self, s, 2);
5268 }
5269 
5270 static int
load_long(UnpicklerObject * self)5271 load_long(UnpicklerObject *self)
5272 {
5273     PyObject *value;
5274     char *s = NULL;
5275     Py_ssize_t len;
5276 
5277     if ((len = _Unpickler_Readline(self, &s)) < 0)
5278         return -1;
5279     if (len < 2)
5280         return bad_readline();
5281 
5282     /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5283        the 'L' before calling PyLong_FromString.  In order to maintain
5284        compatibility with Python 3.0.0, we don't actually *require*
5285        the 'L' to be present. */
5286     if (s[len-2] == 'L')
5287         s[len-2] = '\0';
5288     /* XXX: Should the base argument explicitly set to 10? */
5289     value = PyLong_FromString(s, NULL, 0);
5290     if (value == NULL)
5291         return -1;
5292 
5293     PDATA_PUSH(self->stack, value, -1);
5294     return 0;
5295 }
5296 
5297 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
5298  * data following.
5299  */
5300 static int
load_counted_long(UnpicklerObject * self,int size)5301 load_counted_long(UnpicklerObject *self, int size)
5302 {
5303     PyObject *value;
5304     char *nbytes;
5305     char *pdata;
5306 
5307     assert(size == 1 || size == 4);
5308     if (_Unpickler_Read(self, &nbytes, size) < 0)
5309         return -1;
5310 
5311     size = calc_binint(nbytes, size);
5312     if (size < 0) {
5313         PickleState *st = _Pickle_GetGlobalState();
5314         /* Corrupt or hostile pickle -- we never write one like this */
5315         PyErr_SetString(st->UnpicklingError,
5316                         "LONG pickle has negative byte count");
5317         return -1;
5318     }
5319 
5320     if (size == 0)
5321         value = PyLong_FromLong(0L);
5322     else {
5323         /* Read the raw little-endian bytes and convert. */
5324         if (_Unpickler_Read(self, &pdata, size) < 0)
5325             return -1;
5326         value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5327                                       1 /* little endian */ , 1 /* signed */ );
5328     }
5329     if (value == NULL)
5330         return -1;
5331     PDATA_PUSH(self->stack, value, -1);
5332     return 0;
5333 }
5334 
5335 static int
load_float(UnpicklerObject * self)5336 load_float(UnpicklerObject *self)
5337 {
5338     PyObject *value;
5339     char *endptr, *s;
5340     Py_ssize_t len;
5341     double d;
5342 
5343     if ((len = _Unpickler_Readline(self, &s)) < 0)
5344         return -1;
5345     if (len < 2)
5346         return bad_readline();
5347 
5348     errno = 0;
5349     d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5350     if (d == -1.0 && PyErr_Occurred())
5351         return -1;
5352     if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5353         PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5354         return -1;
5355     }
5356     value = PyFloat_FromDouble(d);
5357     if (value == NULL)
5358         return -1;
5359 
5360     PDATA_PUSH(self->stack, value, -1);
5361     return 0;
5362 }
5363 
5364 static int
load_binfloat(UnpicklerObject * self)5365 load_binfloat(UnpicklerObject *self)
5366 {
5367     PyObject *value;
5368     double x;
5369     char *s;
5370 
5371     if (_Unpickler_Read(self, &s, 8) < 0)
5372         return -1;
5373 
5374     x = _PyFloat_Unpack8((unsigned char *)s, 0);
5375     if (x == -1.0 && PyErr_Occurred())
5376         return -1;
5377 
5378     if ((value = PyFloat_FromDouble(x)) == NULL)
5379         return -1;
5380 
5381     PDATA_PUSH(self->stack, value, -1);
5382     return 0;
5383 }
5384 
5385 static int
load_string(UnpicklerObject * self)5386 load_string(UnpicklerObject *self)
5387 {
5388     PyObject *bytes;
5389     PyObject *obj;
5390     Py_ssize_t len;
5391     char *s, *p;
5392 
5393     if ((len = _Unpickler_Readline(self, &s)) < 0)
5394         return -1;
5395     /* Strip the newline */
5396     len--;
5397     /* Strip outermost quotes */
5398     if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5399         p = s + 1;
5400         len -= 2;
5401     }
5402     else {
5403         PickleState *st = _Pickle_GetGlobalState();
5404         PyErr_SetString(st->UnpicklingError,
5405                         "the STRING opcode argument must be quoted");
5406         return -1;
5407     }
5408     assert(len >= 0);
5409 
5410     /* Use the PyBytes API to decode the string, since that is what is used
5411        to encode, and then coerce the result to Unicode. */
5412     bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5413     if (bytes == NULL)
5414         return -1;
5415 
5416     /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5417        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5418     if (strcmp(self->encoding, "bytes") == 0) {
5419         obj = bytes;
5420     }
5421     else {
5422         obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5423         Py_DECREF(bytes);
5424         if (obj == NULL) {
5425             return -1;
5426         }
5427     }
5428 
5429     PDATA_PUSH(self->stack, obj, -1);
5430     return 0;
5431 }
5432 
5433 static int
load_counted_binstring(UnpicklerObject * self,int nbytes)5434 load_counted_binstring(UnpicklerObject *self, int nbytes)
5435 {
5436     PyObject *obj;
5437     Py_ssize_t size;
5438     char *s;
5439 
5440     if (_Unpickler_Read(self, &s, nbytes) < 0)
5441         return -1;
5442 
5443     size = calc_binsize(s, nbytes);
5444     if (size < 0) {
5445         PickleState *st = _Pickle_GetGlobalState();
5446         PyErr_Format(st->UnpicklingError,
5447                      "BINSTRING exceeds system's maximum size of %zd bytes",
5448                      PY_SSIZE_T_MAX);
5449         return -1;
5450     }
5451 
5452     if (_Unpickler_Read(self, &s, size) < 0)
5453         return -1;
5454 
5455     /* Convert Python 2.x strings to bytes if the *encoding* given to the
5456        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5457     if (strcmp(self->encoding, "bytes") == 0) {
5458         obj = PyBytes_FromStringAndSize(s, size);
5459     }
5460     else {
5461         obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5462     }
5463     if (obj == NULL) {
5464         return -1;
5465     }
5466 
5467     PDATA_PUSH(self->stack, obj, -1);
5468     return 0;
5469 }
5470 
5471 static int
load_counted_binbytes(UnpicklerObject * self,int nbytes)5472 load_counted_binbytes(UnpicklerObject *self, int nbytes)
5473 {
5474     PyObject *bytes;
5475     Py_ssize_t size;
5476     char *s;
5477 
5478     if (_Unpickler_Read(self, &s, nbytes) < 0)
5479         return -1;
5480 
5481     size = calc_binsize(s, nbytes);
5482     if (size < 0) {
5483         PyErr_Format(PyExc_OverflowError,
5484                      "BINBYTES exceeds system's maximum size of %zd bytes",
5485                      PY_SSIZE_T_MAX);
5486         return -1;
5487     }
5488 
5489     bytes = PyBytes_FromStringAndSize(NULL, size);
5490     if (bytes == NULL)
5491         return -1;
5492     if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
5493         Py_DECREF(bytes);
5494         return -1;
5495     }
5496 
5497     PDATA_PUSH(self->stack, bytes, -1);
5498     return 0;
5499 }
5500 
5501 static int
load_counted_bytearray(UnpicklerObject * self)5502 load_counted_bytearray(UnpicklerObject *self)
5503 {
5504     PyObject *bytearray;
5505     Py_ssize_t size;
5506     char *s;
5507 
5508     if (_Unpickler_Read(self, &s, 8) < 0) {
5509         return -1;
5510     }
5511 
5512     size = calc_binsize(s, 8);
5513     if (size < 0) {
5514         PyErr_Format(PyExc_OverflowError,
5515                      "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5516                      PY_SSIZE_T_MAX);
5517         return -1;
5518     }
5519 
5520     bytearray = PyByteArray_FromStringAndSize(NULL, size);
5521     if (bytearray == NULL) {
5522         return -1;
5523     }
5524     if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
5525         Py_DECREF(bytearray);
5526         return -1;
5527     }
5528 
5529     PDATA_PUSH(self->stack, bytearray, -1);
5530     return 0;
5531 }
5532 
5533 static int
load_next_buffer(UnpicklerObject * self)5534 load_next_buffer(UnpicklerObject *self)
5535 {
5536     if (self->buffers == NULL) {
5537         PickleState *st = _Pickle_GetGlobalState();
5538         PyErr_SetString(st->UnpicklingError,
5539                         "pickle stream refers to out-of-band data "
5540                         "but no *buffers* argument was given");
5541         return -1;
5542     }
5543     PyObject *buf = PyIter_Next(self->buffers);
5544     if (buf == NULL) {
5545         if (!PyErr_Occurred()) {
5546             PickleState *st = _Pickle_GetGlobalState();
5547             PyErr_SetString(st->UnpicklingError,
5548                             "not enough out-of-band buffers");
5549         }
5550         return -1;
5551     }
5552 
5553     PDATA_PUSH(self->stack, buf, -1);
5554     return 0;
5555 }
5556 
5557 static int
load_readonly_buffer(UnpicklerObject * self)5558 load_readonly_buffer(UnpicklerObject *self)
5559 {
5560     Py_ssize_t len = Py_SIZE(self->stack);
5561     if (len <= self->stack->fence) {
5562         return Pdata_stack_underflow(self->stack);
5563     }
5564 
5565     PyObject *obj = self->stack->data[len - 1];
5566     PyObject *view = PyMemoryView_FromObject(obj);
5567     if (view == NULL) {
5568         return -1;
5569     }
5570     if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5571         /* Original object is writable */
5572         PyMemoryView_GET_BUFFER(view)->readonly = 1;
5573         self->stack->data[len - 1] = view;
5574         Py_DECREF(obj);
5575     }
5576     else {
5577         /* Original object is read-only, no need to replace it */
5578         Py_DECREF(view);
5579     }
5580     return 0;
5581 }
5582 
5583 static int
load_unicode(UnpicklerObject * self)5584 load_unicode(UnpicklerObject *self)
5585 {
5586     PyObject *str;
5587     Py_ssize_t len;
5588     char *s = NULL;
5589 
5590     if ((len = _Unpickler_Readline(self, &s)) < 0)
5591         return -1;
5592     if (len < 1)
5593         return bad_readline();
5594 
5595     str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5596     if (str == NULL)
5597         return -1;
5598 
5599     PDATA_PUSH(self->stack, str, -1);
5600     return 0;
5601 }
5602 
5603 static int
load_counted_binunicode(UnpicklerObject * self,int nbytes)5604 load_counted_binunicode(UnpicklerObject *self, int nbytes)
5605 {
5606     PyObject *str;
5607     Py_ssize_t size;
5608     char *s;
5609 
5610     if (_Unpickler_Read(self, &s, nbytes) < 0)
5611         return -1;
5612 
5613     size = calc_binsize(s, nbytes);
5614     if (size < 0) {
5615         PyErr_Format(PyExc_OverflowError,
5616                      "BINUNICODE exceeds system's maximum size of %zd bytes",
5617                      PY_SSIZE_T_MAX);
5618         return -1;
5619     }
5620 
5621     if (_Unpickler_Read(self, &s, size) < 0)
5622         return -1;
5623 
5624     str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5625     if (str == NULL)
5626         return -1;
5627 
5628     PDATA_PUSH(self->stack, str, -1);
5629     return 0;
5630 }
5631 
5632 static int
load_counted_tuple(UnpicklerObject * self,Py_ssize_t len)5633 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5634 {
5635     PyObject *tuple;
5636 
5637     if (Py_SIZE(self->stack) < len)
5638         return Pdata_stack_underflow(self->stack);
5639 
5640     tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5641     if (tuple == NULL)
5642         return -1;
5643     PDATA_PUSH(self->stack, tuple, -1);
5644     return 0;
5645 }
5646 
5647 static int
load_tuple(UnpicklerObject * self)5648 load_tuple(UnpicklerObject *self)
5649 {
5650     Py_ssize_t i;
5651 
5652     if ((i = marker(self)) < 0)
5653         return -1;
5654 
5655     return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5656 }
5657 
5658 static int
load_empty_list(UnpicklerObject * self)5659 load_empty_list(UnpicklerObject *self)
5660 {
5661     PyObject *list;
5662 
5663     if ((list = PyList_New(0)) == NULL)
5664         return -1;
5665     PDATA_PUSH(self->stack, list, -1);
5666     return 0;
5667 }
5668 
5669 static int
load_empty_dict(UnpicklerObject * self)5670 load_empty_dict(UnpicklerObject *self)
5671 {
5672     PyObject *dict;
5673 
5674     if ((dict = PyDict_New()) == NULL)
5675         return -1;
5676     PDATA_PUSH(self->stack, dict, -1);
5677     return 0;
5678 }
5679 
5680 static int
load_empty_set(UnpicklerObject * self)5681 load_empty_set(UnpicklerObject *self)
5682 {
5683     PyObject *set;
5684 
5685     if ((set = PySet_New(NULL)) == NULL)
5686         return -1;
5687     PDATA_PUSH(self->stack, set, -1);
5688     return 0;
5689 }
5690 
5691 static int
load_list(UnpicklerObject * self)5692 load_list(UnpicklerObject *self)
5693 {
5694     PyObject *list;
5695     Py_ssize_t i;
5696 
5697     if ((i = marker(self)) < 0)
5698         return -1;
5699 
5700     list = Pdata_poplist(self->stack, i);
5701     if (list == NULL)
5702         return -1;
5703     PDATA_PUSH(self->stack, list, -1);
5704     return 0;
5705 }
5706 
5707 static int
load_dict(UnpicklerObject * self)5708 load_dict(UnpicklerObject *self)
5709 {
5710     PyObject *dict, *key, *value;
5711     Py_ssize_t i, j, k;
5712 
5713     if ((i = marker(self)) < 0)
5714         return -1;
5715     j = Py_SIZE(self->stack);
5716 
5717     if ((dict = PyDict_New()) == NULL)
5718         return -1;
5719 
5720     if ((j - i) % 2 != 0) {
5721         PickleState *st = _Pickle_GetGlobalState();
5722         PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5723         Py_DECREF(dict);
5724         return -1;
5725     }
5726 
5727     for (k = i + 1; k < j; k += 2) {
5728         key = self->stack->data[k - 1];
5729         value = self->stack->data[k];
5730         if (PyDict_SetItem(dict, key, value) < 0) {
5731             Py_DECREF(dict);
5732             return -1;
5733         }
5734     }
5735     Pdata_clear(self->stack, i);
5736     PDATA_PUSH(self->stack, dict, -1);
5737     return 0;
5738 }
5739 
5740 static int
load_frozenset(UnpicklerObject * self)5741 load_frozenset(UnpicklerObject *self)
5742 {
5743     PyObject *items;
5744     PyObject *frozenset;
5745     Py_ssize_t i;
5746 
5747     if ((i = marker(self)) < 0)
5748         return -1;
5749 
5750     items = Pdata_poptuple(self->stack, i);
5751     if (items == NULL)
5752         return -1;
5753 
5754     frozenset = PyFrozenSet_New(items);
5755     Py_DECREF(items);
5756     if (frozenset == NULL)
5757         return -1;
5758 
5759     PDATA_PUSH(self->stack, frozenset, -1);
5760     return 0;
5761 }
5762 
5763 static PyObject *
instantiate(PyObject * cls,PyObject * args)5764 instantiate(PyObject *cls, PyObject *args)
5765 {
5766     /* Caller must assure args are a tuple.  Normally, args come from
5767        Pdata_poptuple which packs objects from the top of the stack
5768        into a newly created tuple. */
5769     assert(PyTuple_Check(args));
5770     if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5771         _Py_IDENTIFIER(__getinitargs__);
5772         _Py_IDENTIFIER(__new__);
5773         PyObject *func;
5774         if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5775             return NULL;
5776         }
5777         if (func == NULL) {
5778             return _PyObject_CallMethodIdObjArgs(cls, &PyId___new__, cls, NULL);
5779         }
5780         Py_DECREF(func);
5781     }
5782     return PyObject_CallObject(cls, args);
5783 }
5784 
5785 static int
load_obj(UnpicklerObject * self)5786 load_obj(UnpicklerObject *self)
5787 {
5788     PyObject *cls, *args, *obj = NULL;
5789     Py_ssize_t i;
5790 
5791     if ((i = marker(self)) < 0)
5792         return -1;
5793 
5794     if (Py_SIZE(self->stack) - i < 1)
5795         return Pdata_stack_underflow(self->stack);
5796 
5797     args = Pdata_poptuple(self->stack, i + 1);
5798     if (args == NULL)
5799         return -1;
5800 
5801     PDATA_POP(self->stack, cls);
5802     if (cls) {
5803         obj = instantiate(cls, args);
5804         Py_DECREF(cls);
5805     }
5806     Py_DECREF(args);
5807     if (obj == NULL)
5808         return -1;
5809 
5810     PDATA_PUSH(self->stack, obj, -1);
5811     return 0;
5812 }
5813 
5814 static int
load_inst(UnpicklerObject * self)5815 load_inst(UnpicklerObject *self)
5816 {
5817     PyObject *cls = NULL;
5818     PyObject *args = NULL;
5819     PyObject *obj = NULL;
5820     PyObject *module_name;
5821     PyObject *class_name;
5822     Py_ssize_t len;
5823     Py_ssize_t i;
5824     char *s;
5825 
5826     if ((i = marker(self)) < 0)
5827         return -1;
5828     if ((len = _Unpickler_Readline(self, &s)) < 0)
5829         return -1;
5830     if (len < 2)
5831         return bad_readline();
5832 
5833     /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5834        identifiers are permitted in Python 3.0, since the INST opcode is only
5835        supported by older protocols on Python 2.x. */
5836     module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5837     if (module_name == NULL)
5838         return -1;
5839 
5840     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5841         if (len < 2) {
5842             Py_DECREF(module_name);
5843             return bad_readline();
5844         }
5845         class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5846         if (class_name != NULL) {
5847             cls = find_class(self, module_name, class_name);
5848             Py_DECREF(class_name);
5849         }
5850     }
5851     Py_DECREF(module_name);
5852 
5853     if (cls == NULL)
5854         return -1;
5855 
5856     if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5857         obj = instantiate(cls, args);
5858         Py_DECREF(args);
5859     }
5860     Py_DECREF(cls);
5861 
5862     if (obj == NULL)
5863         return -1;
5864 
5865     PDATA_PUSH(self->stack, obj, -1);
5866     return 0;
5867 }
5868 
5869 static int
load_newobj(UnpicklerObject * self)5870 load_newobj(UnpicklerObject *self)
5871 {
5872     PyObject *args = NULL;
5873     PyObject *clsraw = NULL;
5874     PyTypeObject *cls;          /* clsraw cast to its true type */
5875     PyObject *obj;
5876     PickleState *st = _Pickle_GetGlobalState();
5877 
5878     /* Stack is ... cls argtuple, and we want to call
5879      * cls.__new__(cls, *argtuple).
5880      */
5881     PDATA_POP(self->stack, args);
5882     if (args == NULL)
5883         goto error;
5884     if (!PyTuple_Check(args)) {
5885         PyErr_SetString(st->UnpicklingError,
5886                         "NEWOBJ expected an arg " "tuple.");
5887         goto error;
5888     }
5889 
5890     PDATA_POP(self->stack, clsraw);
5891     cls = (PyTypeObject *)clsraw;
5892     if (cls == NULL)
5893         goto error;
5894     if (!PyType_Check(cls)) {
5895         PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5896                         "isn't a type object");
5897         goto error;
5898     }
5899     if (cls->tp_new == NULL) {
5900         PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5901                         "has NULL tp_new");
5902         goto error;
5903     }
5904 
5905     /* Call __new__. */
5906     obj = cls->tp_new(cls, args, NULL);
5907     if (obj == NULL)
5908         goto error;
5909 
5910     Py_DECREF(args);
5911     Py_DECREF(clsraw);
5912     PDATA_PUSH(self->stack, obj, -1);
5913     return 0;
5914 
5915   error:
5916     Py_XDECREF(args);
5917     Py_XDECREF(clsraw);
5918     return -1;
5919 }
5920 
5921 static int
load_newobj_ex(UnpicklerObject * self)5922 load_newobj_ex(UnpicklerObject *self)
5923 {
5924     PyObject *cls, *args, *kwargs;
5925     PyObject *obj;
5926     PickleState *st = _Pickle_GetGlobalState();
5927 
5928     PDATA_POP(self->stack, kwargs);
5929     if (kwargs == NULL) {
5930         return -1;
5931     }
5932     PDATA_POP(self->stack, args);
5933     if (args == NULL) {
5934         Py_DECREF(kwargs);
5935         return -1;
5936     }
5937     PDATA_POP(self->stack, cls);
5938     if (cls == NULL) {
5939         Py_DECREF(kwargs);
5940         Py_DECREF(args);
5941         return -1;
5942     }
5943 
5944     if (!PyType_Check(cls)) {
5945         Py_DECREF(kwargs);
5946         Py_DECREF(args);
5947         PyErr_Format(st->UnpicklingError,
5948                      "NEWOBJ_EX class argument must be a type, not %.200s",
5949                      Py_TYPE(cls)->tp_name);
5950         Py_DECREF(cls);
5951         return -1;
5952     }
5953 
5954     if (((PyTypeObject *)cls)->tp_new == NULL) {
5955         Py_DECREF(kwargs);
5956         Py_DECREF(args);
5957         Py_DECREF(cls);
5958         PyErr_SetString(st->UnpicklingError,
5959                         "NEWOBJ_EX class argument doesn't have __new__");
5960         return -1;
5961     }
5962     obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5963     Py_DECREF(kwargs);
5964     Py_DECREF(args);
5965     Py_DECREF(cls);
5966     if (obj == NULL) {
5967         return -1;
5968     }
5969     PDATA_PUSH(self->stack, obj, -1);
5970     return 0;
5971 }
5972 
5973 static int
load_global(UnpicklerObject * self)5974 load_global(UnpicklerObject *self)
5975 {
5976     PyObject *global = NULL;
5977     PyObject *module_name;
5978     PyObject *global_name;
5979     Py_ssize_t len;
5980     char *s;
5981 
5982     if ((len = _Unpickler_Readline(self, &s)) < 0)
5983         return -1;
5984     if (len < 2)
5985         return bad_readline();
5986     module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5987     if (!module_name)
5988         return -1;
5989 
5990     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5991         if (len < 2) {
5992             Py_DECREF(module_name);
5993             return bad_readline();
5994         }
5995         global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5996         if (global_name) {
5997             global = find_class(self, module_name, global_name);
5998             Py_DECREF(global_name);
5999         }
6000     }
6001     Py_DECREF(module_name);
6002 
6003     if (global == NULL)
6004         return -1;
6005     PDATA_PUSH(self->stack, global, -1);
6006     return 0;
6007 }
6008 
6009 static int
load_stack_global(UnpicklerObject * self)6010 load_stack_global(UnpicklerObject *self)
6011 {
6012     PyObject *global;
6013     PyObject *module_name;
6014     PyObject *global_name;
6015 
6016     PDATA_POP(self->stack, global_name);
6017     PDATA_POP(self->stack, module_name);
6018     if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
6019         global_name == NULL || !PyUnicode_CheckExact(global_name)) {
6020         PickleState *st = _Pickle_GetGlobalState();
6021         PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
6022         Py_XDECREF(global_name);
6023         Py_XDECREF(module_name);
6024         return -1;
6025     }
6026     global = find_class(self, module_name, global_name);
6027     Py_DECREF(global_name);
6028     Py_DECREF(module_name);
6029     if (global == NULL)
6030         return -1;
6031     PDATA_PUSH(self->stack, global, -1);
6032     return 0;
6033 }
6034 
6035 static int
load_persid(UnpicklerObject * self)6036 load_persid(UnpicklerObject *self)
6037 {
6038     PyObject *pid, *obj;
6039     Py_ssize_t len;
6040     char *s;
6041 
6042     if (self->pers_func) {
6043         if ((len = _Unpickler_Readline(self, &s)) < 0)
6044             return -1;
6045         if (len < 1)
6046             return bad_readline();
6047 
6048         pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6049         if (pid == NULL) {
6050             if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6051                 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
6052                                 "persistent IDs in protocol 0 must be "
6053                                 "ASCII strings");
6054             }
6055             return -1;
6056         }
6057 
6058         obj = call_method(self->pers_func, self->pers_func_self, pid);
6059         Py_DECREF(pid);
6060         if (obj == NULL)
6061             return -1;
6062 
6063         PDATA_PUSH(self->stack, obj, -1);
6064         return 0;
6065     }
6066     else {
6067         PickleState *st = _Pickle_GetGlobalState();
6068         PyErr_SetString(st->UnpicklingError,
6069                         "A load persistent id instruction was encountered,\n"
6070                         "but no persistent_load function was specified.");
6071         return -1;
6072     }
6073 }
6074 
6075 static int
load_binpersid(UnpicklerObject * self)6076 load_binpersid(UnpicklerObject *self)
6077 {
6078     PyObject *pid, *obj;
6079 
6080     if (self->pers_func) {
6081         PDATA_POP(self->stack, pid);
6082         if (pid == NULL)
6083             return -1;
6084 
6085         obj = call_method(self->pers_func, self->pers_func_self, pid);
6086         Py_DECREF(pid);
6087         if (obj == NULL)
6088             return -1;
6089 
6090         PDATA_PUSH(self->stack, obj, -1);
6091         return 0;
6092     }
6093     else {
6094         PickleState *st = _Pickle_GetGlobalState();
6095         PyErr_SetString(st->UnpicklingError,
6096                         "A load persistent id instruction was encountered,\n"
6097                         "but no persistent_load function was specified.");
6098         return -1;
6099     }
6100 }
6101 
6102 static int
load_pop(UnpicklerObject * self)6103 load_pop(UnpicklerObject *self)
6104 {
6105     Py_ssize_t len = Py_SIZE(self->stack);
6106 
6107     /* Note that we split the (pickle.py) stack into two stacks,
6108      * an object stack and a mark stack. We have to be clever and
6109      * pop the right one. We do this by looking at the top of the
6110      * mark stack first, and only signalling a stack underflow if
6111      * the object stack is empty and the mark stack doesn't match
6112      * our expectations.
6113      */
6114     if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
6115         self->num_marks--;
6116         self->stack->mark_set = self->num_marks != 0;
6117         self->stack->fence = self->num_marks ?
6118                 self->marks[self->num_marks - 1] : 0;
6119     } else if (len <= self->stack->fence)
6120         return Pdata_stack_underflow(self->stack);
6121     else {
6122         len--;
6123         Py_DECREF(self->stack->data[len]);
6124         Py_SIZE(self->stack) = len;
6125     }
6126     return 0;
6127 }
6128 
6129 static int
load_pop_mark(UnpicklerObject * self)6130 load_pop_mark(UnpicklerObject *self)
6131 {
6132     Py_ssize_t i;
6133 
6134     if ((i = marker(self)) < 0)
6135         return -1;
6136 
6137     Pdata_clear(self->stack, i);
6138 
6139     return 0;
6140 }
6141 
6142 static int
load_dup(UnpicklerObject * self)6143 load_dup(UnpicklerObject *self)
6144 {
6145     PyObject *last;
6146     Py_ssize_t len = Py_SIZE(self->stack);
6147 
6148     if (len <= self->stack->fence)
6149         return Pdata_stack_underflow(self->stack);
6150     last = self->stack->data[len - 1];
6151     PDATA_APPEND(self->stack, last, -1);
6152     return 0;
6153 }
6154 
6155 static int
load_get(UnpicklerObject * self)6156 load_get(UnpicklerObject *self)
6157 {
6158     PyObject *key, *value;
6159     Py_ssize_t idx;
6160     Py_ssize_t len;
6161     char *s;
6162 
6163     if ((len = _Unpickler_Readline(self, &s)) < 0)
6164         return -1;
6165     if (len < 2)
6166         return bad_readline();
6167 
6168     key = PyLong_FromString(s, NULL, 10);
6169     if (key == NULL)
6170         return -1;
6171     idx = PyLong_AsSsize_t(key);
6172     if (idx == -1 && PyErr_Occurred()) {
6173         Py_DECREF(key);
6174         return -1;
6175     }
6176 
6177     value = _Unpickler_MemoGet(self, idx);
6178     if (value == NULL) {
6179         if (!PyErr_Occurred())
6180             PyErr_SetObject(PyExc_KeyError, key);
6181         Py_DECREF(key);
6182         return -1;
6183     }
6184     Py_DECREF(key);
6185 
6186     PDATA_APPEND(self->stack, value, -1);
6187     return 0;
6188 }
6189 
6190 static int
load_binget(UnpicklerObject * self)6191 load_binget(UnpicklerObject *self)
6192 {
6193     PyObject *value;
6194     Py_ssize_t idx;
6195     char *s;
6196 
6197     if (_Unpickler_Read(self, &s, 1) < 0)
6198         return -1;
6199 
6200     idx = Py_CHARMASK(s[0]);
6201 
6202     value = _Unpickler_MemoGet(self, idx);
6203     if (value == NULL) {
6204         PyObject *key = PyLong_FromSsize_t(idx);
6205         if (key != NULL) {
6206             PyErr_SetObject(PyExc_KeyError, key);
6207             Py_DECREF(key);
6208         }
6209         return -1;
6210     }
6211 
6212     PDATA_APPEND(self->stack, value, -1);
6213     return 0;
6214 }
6215 
6216 static int
load_long_binget(UnpicklerObject * self)6217 load_long_binget(UnpicklerObject *self)
6218 {
6219     PyObject *value;
6220     Py_ssize_t idx;
6221     char *s;
6222 
6223     if (_Unpickler_Read(self, &s, 4) < 0)
6224         return -1;
6225 
6226     idx = calc_binsize(s, 4);
6227 
6228     value = _Unpickler_MemoGet(self, idx);
6229     if (value == NULL) {
6230         PyObject *key = PyLong_FromSsize_t(idx);
6231         if (key != NULL) {
6232             PyErr_SetObject(PyExc_KeyError, key);
6233             Py_DECREF(key);
6234         }
6235         return -1;
6236     }
6237 
6238     PDATA_APPEND(self->stack, value, -1);
6239     return 0;
6240 }
6241 
6242 /* Push an object from the extension registry (EXT[124]).  nbytes is
6243  * the number of bytes following the opcode, holding the index (code) value.
6244  */
6245 static int
load_extension(UnpicklerObject * self,int nbytes)6246 load_extension(UnpicklerObject *self, int nbytes)
6247 {
6248     char *codebytes;            /* the nbytes bytes after the opcode */
6249     long code;                  /* calc_binint returns long */
6250     PyObject *py_code;          /* code as a Python int */
6251     PyObject *obj;              /* the object to push */
6252     PyObject *pair;             /* (module_name, class_name) */
6253     PyObject *module_name, *class_name;
6254     PickleState *st = _Pickle_GetGlobalState();
6255 
6256     assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
6257     if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
6258         return -1;
6259     code = calc_binint(codebytes, nbytes);
6260     if (code <= 0) {            /* note that 0 is forbidden */
6261         /* Corrupt or hostile pickle. */
6262         PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
6263         return -1;
6264     }
6265 
6266     /* Look for the code in the cache. */
6267     py_code = PyLong_FromLong(code);
6268     if (py_code == NULL)
6269         return -1;
6270     obj = PyDict_GetItemWithError(st->extension_cache, py_code);
6271     if (obj != NULL) {
6272         /* Bingo. */
6273         Py_DECREF(py_code);
6274         PDATA_APPEND(self->stack, obj, -1);
6275         return 0;
6276     }
6277     if (PyErr_Occurred()) {
6278         Py_DECREF(py_code);
6279         return -1;
6280     }
6281 
6282     /* Look up the (module_name, class_name) pair. */
6283     pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
6284     if (pair == NULL) {
6285         Py_DECREF(py_code);
6286         if (!PyErr_Occurred()) {
6287             PyErr_Format(PyExc_ValueError, "unregistered extension "
6288                          "code %ld", code);
6289         }
6290         return -1;
6291     }
6292     /* Since the extension registry is manipulable via Python code,
6293      * confirm that pair is really a 2-tuple of strings.
6294      */
6295     if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6296         goto error;
6297     }
6298 
6299     module_name = PyTuple_GET_ITEM(pair, 0);
6300     if (!PyUnicode_Check(module_name)) {
6301         goto error;
6302     }
6303 
6304     class_name = PyTuple_GET_ITEM(pair, 1);
6305     if (!PyUnicode_Check(class_name)) {
6306         goto error;
6307     }
6308 
6309     /* Load the object. */
6310     obj = find_class(self, module_name, class_name);
6311     if (obj == NULL) {
6312         Py_DECREF(py_code);
6313         return -1;
6314     }
6315     /* Cache code -> obj. */
6316     code = PyDict_SetItem(st->extension_cache, py_code, obj);
6317     Py_DECREF(py_code);
6318     if (code < 0) {
6319         Py_DECREF(obj);
6320         return -1;
6321     }
6322     PDATA_PUSH(self->stack, obj, -1);
6323     return 0;
6324 
6325 error:
6326     Py_DECREF(py_code);
6327     PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6328                  "isn't a 2-tuple of strings", code);
6329     return -1;
6330 }
6331 
6332 static int
load_put(UnpicklerObject * self)6333 load_put(UnpicklerObject *self)
6334 {
6335     PyObject *key, *value;
6336     Py_ssize_t idx;
6337     Py_ssize_t len;
6338     char *s = NULL;
6339 
6340     if ((len = _Unpickler_Readline(self, &s)) < 0)
6341         return -1;
6342     if (len < 2)
6343         return bad_readline();
6344     if (Py_SIZE(self->stack) <= self->stack->fence)
6345         return Pdata_stack_underflow(self->stack);
6346     value = self->stack->data[Py_SIZE(self->stack) - 1];
6347 
6348     key = PyLong_FromString(s, NULL, 10);
6349     if (key == NULL)
6350         return -1;
6351     idx = PyLong_AsSsize_t(key);
6352     Py_DECREF(key);
6353     if (idx < 0) {
6354         if (!PyErr_Occurred())
6355             PyErr_SetString(PyExc_ValueError,
6356                             "negative PUT argument");
6357         return -1;
6358     }
6359 
6360     return _Unpickler_MemoPut(self, idx, value);
6361 }
6362 
6363 static int
load_binput(UnpicklerObject * self)6364 load_binput(UnpicklerObject *self)
6365 {
6366     PyObject *value;
6367     Py_ssize_t idx;
6368     char *s;
6369 
6370     if (_Unpickler_Read(self, &s, 1) < 0)
6371         return -1;
6372 
6373     if (Py_SIZE(self->stack) <= self->stack->fence)
6374         return Pdata_stack_underflow(self->stack);
6375     value = self->stack->data[Py_SIZE(self->stack) - 1];
6376 
6377     idx = Py_CHARMASK(s[0]);
6378 
6379     return _Unpickler_MemoPut(self, idx, value);
6380 }
6381 
6382 static int
load_long_binput(UnpicklerObject * self)6383 load_long_binput(UnpicklerObject *self)
6384 {
6385     PyObject *value;
6386     Py_ssize_t idx;
6387     char *s;
6388 
6389     if (_Unpickler_Read(self, &s, 4) < 0)
6390         return -1;
6391 
6392     if (Py_SIZE(self->stack) <= self->stack->fence)
6393         return Pdata_stack_underflow(self->stack);
6394     value = self->stack->data[Py_SIZE(self->stack) - 1];
6395 
6396     idx = calc_binsize(s, 4);
6397     if (idx < 0) {
6398         PyErr_SetString(PyExc_ValueError,
6399                         "negative LONG_BINPUT argument");
6400         return -1;
6401     }
6402 
6403     return _Unpickler_MemoPut(self, idx, value);
6404 }
6405 
6406 static int
load_memoize(UnpicklerObject * self)6407 load_memoize(UnpicklerObject *self)
6408 {
6409     PyObject *value;
6410 
6411     if (Py_SIZE(self->stack) <= self->stack->fence)
6412         return Pdata_stack_underflow(self->stack);
6413     value = self->stack->data[Py_SIZE(self->stack) - 1];
6414 
6415     return _Unpickler_MemoPut(self, self->memo_len, value);
6416 }
6417 
6418 static int
do_append(UnpicklerObject * self,Py_ssize_t x)6419 do_append(UnpicklerObject *self, Py_ssize_t x)
6420 {
6421     PyObject *value;
6422     PyObject *slice;
6423     PyObject *list;
6424     PyObject *result;
6425     Py_ssize_t len, i;
6426 
6427     len = Py_SIZE(self->stack);
6428     if (x > len || x <= self->stack->fence)
6429         return Pdata_stack_underflow(self->stack);
6430     if (len == x)  /* nothing to do */
6431         return 0;
6432 
6433     list = self->stack->data[x - 1];
6434 
6435     if (PyList_CheckExact(list)) {
6436         Py_ssize_t list_len;
6437         int ret;
6438 
6439         slice = Pdata_poplist(self->stack, x);
6440         if (!slice)
6441             return -1;
6442         list_len = PyList_GET_SIZE(list);
6443         ret = PyList_SetSlice(list, list_len, list_len, slice);
6444         Py_DECREF(slice);
6445         return ret;
6446     }
6447     else {
6448         PyObject *extend_func;
6449         _Py_IDENTIFIER(extend);
6450 
6451         if (_PyObject_LookupAttrId(list, &PyId_extend, &extend_func) < 0) {
6452             return -1;
6453         }
6454         if (extend_func != NULL) {
6455             slice = Pdata_poplist(self->stack, x);
6456             if (!slice) {
6457                 Py_DECREF(extend_func);
6458                 return -1;
6459             }
6460             result = _Pickle_FastCall(extend_func, slice);
6461             Py_DECREF(extend_func);
6462             if (result == NULL)
6463                 return -1;
6464             Py_DECREF(result);
6465         }
6466         else {
6467             PyObject *append_func;
6468             _Py_IDENTIFIER(append);
6469 
6470             /* Even if the PEP 307 requires extend() and append() methods,
6471                fall back on append() if the object has no extend() method
6472                for backward compatibility. */
6473             append_func = _PyObject_GetAttrId(list, &PyId_append);
6474             if (append_func == NULL)
6475                 return -1;
6476             for (i = x; i < len; i++) {
6477                 value = self->stack->data[i];
6478                 result = _Pickle_FastCall(append_func, value);
6479                 if (result == NULL) {
6480                     Pdata_clear(self->stack, i + 1);
6481                     Py_SIZE(self->stack) = x;
6482                     Py_DECREF(append_func);
6483                     return -1;
6484                 }
6485                 Py_DECREF(result);
6486             }
6487             Py_SIZE(self->stack) = x;
6488             Py_DECREF(append_func);
6489         }
6490     }
6491 
6492     return 0;
6493 }
6494 
6495 static int
load_append(UnpicklerObject * self)6496 load_append(UnpicklerObject *self)
6497 {
6498     if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6499         return Pdata_stack_underflow(self->stack);
6500     return do_append(self, Py_SIZE(self->stack) - 1);
6501 }
6502 
6503 static int
load_appends(UnpicklerObject * self)6504 load_appends(UnpicklerObject *self)
6505 {
6506     Py_ssize_t i = marker(self);
6507     if (i < 0)
6508         return -1;
6509     return do_append(self, i);
6510 }
6511 
6512 static int
do_setitems(UnpicklerObject * self,Py_ssize_t x)6513 do_setitems(UnpicklerObject *self, Py_ssize_t x)
6514 {
6515     PyObject *value, *key;
6516     PyObject *dict;
6517     Py_ssize_t len, i;
6518     int status = 0;
6519 
6520     len = Py_SIZE(self->stack);
6521     if (x > len || x <= self->stack->fence)
6522         return Pdata_stack_underflow(self->stack);
6523     if (len == x)  /* nothing to do */
6524         return 0;
6525     if ((len - x) % 2 != 0) {
6526         PickleState *st = _Pickle_GetGlobalState();
6527         /* Currupt or hostile pickle -- we never write one like this. */
6528         PyErr_SetString(st->UnpicklingError,
6529                         "odd number of items for SETITEMS");
6530         return -1;
6531     }
6532 
6533     /* Here, dict does not actually need to be a PyDict; it could be anything
6534        that supports the __setitem__ attribute. */
6535     dict = self->stack->data[x - 1];
6536 
6537     for (i = x + 1; i < len; i += 2) {
6538         key = self->stack->data[i - 1];
6539         value = self->stack->data[i];
6540         if (PyObject_SetItem(dict, key, value) < 0) {
6541             status = -1;
6542             break;
6543         }
6544     }
6545 
6546     Pdata_clear(self->stack, x);
6547     return status;
6548 }
6549 
6550 static int
load_setitem(UnpicklerObject * self)6551 load_setitem(UnpicklerObject *self)
6552 {
6553     return do_setitems(self, Py_SIZE(self->stack) - 2);
6554 }
6555 
6556 static int
load_setitems(UnpicklerObject * self)6557 load_setitems(UnpicklerObject *self)
6558 {
6559     Py_ssize_t i = marker(self);
6560     if (i < 0)
6561         return -1;
6562     return do_setitems(self, i);
6563 }
6564 
6565 static int
load_additems(UnpicklerObject * self)6566 load_additems(UnpicklerObject *self)
6567 {
6568     PyObject *set;
6569     Py_ssize_t mark, len, i;
6570 
6571     mark =  marker(self);
6572     if (mark < 0)
6573         return -1;
6574     len = Py_SIZE(self->stack);
6575     if (mark > len || mark <= self->stack->fence)
6576         return Pdata_stack_underflow(self->stack);
6577     if (len == mark)  /* nothing to do */
6578         return 0;
6579 
6580     set = self->stack->data[mark - 1];
6581 
6582     if (PySet_Check(set)) {
6583         PyObject *items;
6584         int status;
6585 
6586         items = Pdata_poptuple(self->stack, mark);
6587         if (items == NULL)
6588             return -1;
6589 
6590         status = _PySet_Update(set, items);
6591         Py_DECREF(items);
6592         return status;
6593     }
6594     else {
6595         PyObject *add_func;
6596         _Py_IDENTIFIER(add);
6597 
6598         add_func = _PyObject_GetAttrId(set, &PyId_add);
6599         if (add_func == NULL)
6600             return -1;
6601         for (i = mark; i < len; i++) {
6602             PyObject *result;
6603             PyObject *item;
6604 
6605             item = self->stack->data[i];
6606             result = _Pickle_FastCall(add_func, item);
6607             if (result == NULL) {
6608                 Pdata_clear(self->stack, i + 1);
6609                 Py_SIZE(self->stack) = mark;
6610                 return -1;
6611             }
6612             Py_DECREF(result);
6613         }
6614         Py_SIZE(self->stack) = mark;
6615     }
6616 
6617     return 0;
6618 }
6619 
6620 static int
load_build(UnpicklerObject * self)6621 load_build(UnpicklerObject *self)
6622 {
6623     PyObject *state, *inst, *slotstate;
6624     PyObject *setstate;
6625     int status = 0;
6626     _Py_IDENTIFIER(__setstate__);
6627 
6628     /* Stack is ... instance, state.  We want to leave instance at
6629      * the stack top, possibly mutated via instance.__setstate__(state).
6630      */
6631     if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6632         return Pdata_stack_underflow(self->stack);
6633 
6634     PDATA_POP(self->stack, state);
6635     if (state == NULL)
6636         return -1;
6637 
6638     inst = self->stack->data[Py_SIZE(self->stack) - 1];
6639 
6640     if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6641         Py_DECREF(state);
6642         return -1;
6643     }
6644     if (setstate != NULL) {
6645         PyObject *result;
6646 
6647         /* The explicit __setstate__ is responsible for everything. */
6648         result = _Pickle_FastCall(setstate, state);
6649         Py_DECREF(setstate);
6650         if (result == NULL)
6651             return -1;
6652         Py_DECREF(result);
6653         return 0;
6654     }
6655 
6656     /* A default __setstate__.  First see whether state embeds a
6657      * slot state dict too (a proto 2 addition).
6658      */
6659     if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6660         PyObject *tmp = state;
6661 
6662         state = PyTuple_GET_ITEM(tmp, 0);
6663         slotstate = PyTuple_GET_ITEM(tmp, 1);
6664         Py_INCREF(state);
6665         Py_INCREF(slotstate);
6666         Py_DECREF(tmp);
6667     }
6668     else
6669         slotstate = NULL;
6670 
6671     /* Set inst.__dict__ from the state dict (if any). */
6672     if (state != Py_None) {
6673         PyObject *dict;
6674         PyObject *d_key, *d_value;
6675         Py_ssize_t i;
6676         _Py_IDENTIFIER(__dict__);
6677 
6678         if (!PyDict_Check(state)) {
6679             PickleState *st = _Pickle_GetGlobalState();
6680             PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6681             goto error;
6682         }
6683         dict = _PyObject_GetAttrId(inst, &PyId___dict__);
6684         if (dict == NULL)
6685             goto error;
6686 
6687         i = 0;
6688         while (PyDict_Next(state, &i, &d_key, &d_value)) {
6689             /* normally the keys for instance attributes are
6690                interned.  we should try to do that here. */
6691             Py_INCREF(d_key);
6692             if (PyUnicode_CheckExact(d_key))
6693                 PyUnicode_InternInPlace(&d_key);
6694             if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6695                 Py_DECREF(d_key);
6696                 goto error;
6697             }
6698             Py_DECREF(d_key);
6699         }
6700         Py_DECREF(dict);
6701     }
6702 
6703     /* Also set instance attributes from the slotstate dict (if any). */
6704     if (slotstate != NULL) {
6705         PyObject *d_key, *d_value;
6706         Py_ssize_t i;
6707 
6708         if (!PyDict_Check(slotstate)) {
6709             PickleState *st = _Pickle_GetGlobalState();
6710             PyErr_SetString(st->UnpicklingError,
6711                             "slot state is not a dictionary");
6712             goto error;
6713         }
6714         i = 0;
6715         while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6716             if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6717                 goto error;
6718         }
6719     }
6720 
6721     if (0) {
6722   error:
6723         status = -1;
6724     }
6725 
6726     Py_DECREF(state);
6727     Py_XDECREF(slotstate);
6728     return status;
6729 }
6730 
6731 static int
load_mark(UnpicklerObject * self)6732 load_mark(UnpicklerObject *self)
6733 {
6734 
6735     /* Note that we split the (pickle.py) stack into two stacks, an
6736      * object stack and a mark stack. Here we push a mark onto the
6737      * mark stack.
6738      */
6739 
6740     if (self->num_marks >= self->marks_size) {
6741         size_t alloc = ((size_t)self->num_marks << 1) + 20;
6742         Py_ssize_t *marks_new = self->marks;
6743         PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6744         if (marks_new == NULL) {
6745             PyErr_NoMemory();
6746             return -1;
6747         }
6748         self->marks = marks_new;
6749         self->marks_size = (Py_ssize_t)alloc;
6750     }
6751 
6752     self->stack->mark_set = 1;
6753     self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6754 
6755     return 0;
6756 }
6757 
6758 static int
load_reduce(UnpicklerObject * self)6759 load_reduce(UnpicklerObject *self)
6760 {
6761     PyObject *callable = NULL;
6762     PyObject *argtup = NULL;
6763     PyObject *obj = NULL;
6764 
6765     PDATA_POP(self->stack, argtup);
6766     if (argtup == NULL)
6767         return -1;
6768     PDATA_POP(self->stack, callable);
6769     if (callable) {
6770         obj = PyObject_CallObject(callable, argtup);
6771         Py_DECREF(callable);
6772     }
6773     Py_DECREF(argtup);
6774 
6775     if (obj == NULL)
6776         return -1;
6777 
6778     PDATA_PUSH(self->stack, obj, -1);
6779     return 0;
6780 }
6781 
6782 /* Just raises an error if we don't know the protocol specified.  PROTO
6783  * is the first opcode for protocols >= 2.
6784  */
6785 static int
load_proto(UnpicklerObject * self)6786 load_proto(UnpicklerObject *self)
6787 {
6788     char *s;
6789     int i;
6790 
6791     if (_Unpickler_Read(self, &s, 1) < 0)
6792         return -1;
6793 
6794     i = (unsigned char)s[0];
6795     if (i <= HIGHEST_PROTOCOL) {
6796         self->proto = i;
6797         return 0;
6798     }
6799 
6800     PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6801     return -1;
6802 }
6803 
6804 static int
load_frame(UnpicklerObject * self)6805 load_frame(UnpicklerObject *self)
6806 {
6807     char *s;
6808     Py_ssize_t frame_len;
6809 
6810     if (_Unpickler_Read(self, &s, 8) < 0)
6811         return -1;
6812 
6813     frame_len = calc_binsize(s, 8);
6814     if (frame_len < 0) {
6815         PyErr_Format(PyExc_OverflowError,
6816                      "FRAME length exceeds system's maximum of %zd bytes",
6817                      PY_SSIZE_T_MAX);
6818         return -1;
6819     }
6820 
6821     if (_Unpickler_Read(self, &s, frame_len) < 0)
6822         return -1;
6823 
6824     /* Rewind to start of frame */
6825     self->next_read_idx -= frame_len;
6826     return 0;
6827 }
6828 
6829 static PyObject *
load(UnpicklerObject * self)6830 load(UnpicklerObject *self)
6831 {
6832     PyObject *value = NULL;
6833     char *s = NULL;
6834 
6835     self->num_marks = 0;
6836     self->stack->mark_set = 0;
6837     self->stack->fence = 0;
6838     self->proto = 0;
6839     if (Py_SIZE(self->stack))
6840         Pdata_clear(self->stack, 0);
6841 
6842     /* Convenient macros for the dispatch while-switch loop just below. */
6843 #define OP(opcode, load_func) \
6844     case opcode: if (load_func(self) < 0) break; continue;
6845 
6846 #define OP_ARG(opcode, load_func, arg) \
6847     case opcode: if (load_func(self, (arg)) < 0) break; continue;
6848 
6849     while (1) {
6850         if (_Unpickler_Read(self, &s, 1) < 0) {
6851             PickleState *st = _Pickle_GetGlobalState();
6852             if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6853                 PyErr_Format(PyExc_EOFError, "Ran out of input");
6854             }
6855             return NULL;
6856         }
6857 
6858         switch ((enum opcode)s[0]) {
6859         OP(NONE, load_none)
6860         OP(BININT, load_binint)
6861         OP(BININT1, load_binint1)
6862         OP(BININT2, load_binint2)
6863         OP(INT, load_int)
6864         OP(LONG, load_long)
6865         OP_ARG(LONG1, load_counted_long, 1)
6866         OP_ARG(LONG4, load_counted_long, 4)
6867         OP(FLOAT, load_float)
6868         OP(BINFLOAT, load_binfloat)
6869         OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6870         OP_ARG(BINBYTES, load_counted_binbytes, 4)
6871         OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6872         OP(BYTEARRAY8, load_counted_bytearray)
6873         OP(NEXT_BUFFER, load_next_buffer)
6874         OP(READONLY_BUFFER, load_readonly_buffer)
6875         OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6876         OP_ARG(BINSTRING, load_counted_binstring, 4)
6877         OP(STRING, load_string)
6878         OP(UNICODE, load_unicode)
6879         OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6880         OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6881         OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6882         OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6883         OP_ARG(TUPLE1, load_counted_tuple, 1)
6884         OP_ARG(TUPLE2, load_counted_tuple, 2)
6885         OP_ARG(TUPLE3, load_counted_tuple, 3)
6886         OP(TUPLE, load_tuple)
6887         OP(EMPTY_LIST, load_empty_list)
6888         OP(LIST, load_list)
6889         OP(EMPTY_DICT, load_empty_dict)
6890         OP(DICT, load_dict)
6891         OP(EMPTY_SET, load_empty_set)
6892         OP(ADDITEMS, load_additems)
6893         OP(FROZENSET, load_frozenset)
6894         OP(OBJ, load_obj)
6895         OP(INST, load_inst)
6896         OP(NEWOBJ, load_newobj)
6897         OP(NEWOBJ_EX, load_newobj_ex)
6898         OP(GLOBAL, load_global)
6899         OP(STACK_GLOBAL, load_stack_global)
6900         OP(APPEND, load_append)
6901         OP(APPENDS, load_appends)
6902         OP(BUILD, load_build)
6903         OP(DUP, load_dup)
6904         OP(BINGET, load_binget)
6905         OP(LONG_BINGET, load_long_binget)
6906         OP(GET, load_get)
6907         OP(MARK, load_mark)
6908         OP(BINPUT, load_binput)
6909         OP(LONG_BINPUT, load_long_binput)
6910         OP(PUT, load_put)
6911         OP(MEMOIZE, load_memoize)
6912         OP(POP, load_pop)
6913         OP(POP_MARK, load_pop_mark)
6914         OP(SETITEM, load_setitem)
6915         OP(SETITEMS, load_setitems)
6916         OP(PERSID, load_persid)
6917         OP(BINPERSID, load_binpersid)
6918         OP(REDUCE, load_reduce)
6919         OP(PROTO, load_proto)
6920         OP(FRAME, load_frame)
6921         OP_ARG(EXT1, load_extension, 1)
6922         OP_ARG(EXT2, load_extension, 2)
6923         OP_ARG(EXT4, load_extension, 4)
6924         OP_ARG(NEWTRUE, load_bool, Py_True)
6925         OP_ARG(NEWFALSE, load_bool, Py_False)
6926 
6927         case STOP:
6928             break;
6929 
6930         default:
6931             {
6932                 PickleState *st = _Pickle_GetGlobalState();
6933                 unsigned char c = (unsigned char) *s;
6934                 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6935                     PyErr_Format(st->UnpicklingError,
6936                                  "invalid load key, '%c'.", c);
6937                 }
6938                 else {
6939                     PyErr_Format(st->UnpicklingError,
6940                                  "invalid load key, '\\x%02x'.", c);
6941                 }
6942                 return NULL;
6943             }
6944         }
6945 
6946         break;                  /* and we are done! */
6947     }
6948 
6949     if (PyErr_Occurred()) {
6950         return NULL;
6951     }
6952 
6953     if (_Unpickler_SkipConsumed(self) < 0)
6954         return NULL;
6955 
6956     PDATA_POP(self->stack, value);
6957     return value;
6958 }
6959 
6960 /*[clinic input]
6961 
6962 _pickle.Unpickler.load
6963 
6964 Load a pickle.
6965 
6966 Read a pickled object representation from the open file object given
6967 in the constructor, and return the reconstituted object hierarchy
6968 specified therein.
6969 [clinic start generated code]*/
6970 
6971 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self)6972 _pickle_Unpickler_load_impl(UnpicklerObject *self)
6973 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
6974 {
6975     UnpicklerObject *unpickler = (UnpicklerObject*)self;
6976 
6977     /* Check whether the Unpickler was initialized correctly. This prevents
6978        segfaulting if a subclass overridden __init__ with a function that does
6979        not call Unpickler.__init__(). Here, we simply ensure that self->read
6980        is not NULL. */
6981     if (unpickler->read == NULL) {
6982         PickleState *st = _Pickle_GetGlobalState();
6983         PyErr_Format(st->UnpicklingError,
6984                      "Unpickler.__init__() was not called by %s.__init__()",
6985                      Py_TYPE(unpickler)->tp_name);
6986         return NULL;
6987     }
6988 
6989     return load(unpickler);
6990 }
6991 
6992 /* The name of find_class() is misleading. In newer pickle protocols, this
6993    function is used for loading any global (i.e., functions), not just
6994    classes. The name is kept only for backward compatibility. */
6995 
6996 /*[clinic input]
6997 
6998 _pickle.Unpickler.find_class
6999 
7000   module_name: object
7001   global_name: object
7002   /
7003 
7004 Return an object from a specified module.
7005 
7006 If necessary, the module will be imported. Subclasses may override
7007 this method (e.g. to restrict unpickling of arbitrary classes and
7008 functions).
7009 
7010 This method is called whenever a class or a function object is
7011 needed.  Both arguments passed are str objects.
7012 [clinic start generated code]*/
7013 
7014 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)7015 _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
7016                                   PyObject *module_name,
7017                                   PyObject *global_name)
7018 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
7019 {
7020     PyObject *global;
7021     PyObject *module;
7022 
7023     if (PySys_Audit("pickle.find_class", "OO",
7024                     module_name, global_name) < 0) {
7025         return NULL;
7026     }
7027 
7028     /* Try to map the old names used in Python 2.x to the new ones used in
7029        Python 3.x.  We do this only with old pickle protocols and when the
7030        user has not disabled the feature. */
7031     if (self->proto < 3 && self->fix_imports) {
7032         PyObject *key;
7033         PyObject *item;
7034         PickleState *st = _Pickle_GetGlobalState();
7035 
7036         /* Check if the global (i.e., a function or a class) was renamed
7037            or moved to another module. */
7038         key = PyTuple_Pack(2, module_name, global_name);
7039         if (key == NULL)
7040             return NULL;
7041         item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
7042         Py_DECREF(key);
7043         if (item) {
7044             if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7045                 PyErr_Format(PyExc_RuntimeError,
7046                              "_compat_pickle.NAME_MAPPING values should be "
7047                              "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7048                 return NULL;
7049             }
7050             module_name = PyTuple_GET_ITEM(item, 0);
7051             global_name = PyTuple_GET_ITEM(item, 1);
7052             if (!PyUnicode_Check(module_name) ||
7053                 !PyUnicode_Check(global_name)) {
7054                 PyErr_Format(PyExc_RuntimeError,
7055                              "_compat_pickle.NAME_MAPPING values should be "
7056                              "pairs of str, not (%.200s, %.200s)",
7057                              Py_TYPE(module_name)->tp_name,
7058                              Py_TYPE(global_name)->tp_name);
7059                 return NULL;
7060             }
7061         }
7062         else if (PyErr_Occurred()) {
7063             return NULL;
7064         }
7065         else {
7066             /* Check if the module was renamed. */
7067             item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7068             if (item) {
7069                 if (!PyUnicode_Check(item)) {
7070                     PyErr_Format(PyExc_RuntimeError,
7071                                 "_compat_pickle.IMPORT_MAPPING values should be "
7072                                 "strings, not %.200s", Py_TYPE(item)->tp_name);
7073                     return NULL;
7074                 }
7075                 module_name = item;
7076             }
7077             else if (PyErr_Occurred()) {
7078                 return NULL;
7079             }
7080         }
7081     }
7082 
7083     /*
7084      * we don't use PyImport_GetModule here, because it can return partially-
7085      * initialised modules, which then cause the getattribute to fail.
7086      */
7087     module = PyImport_Import(module_name);
7088     if (module == NULL) {
7089         return NULL;
7090     }
7091     global = getattribute(module, global_name, self->proto >= 4);
7092     Py_DECREF(module);
7093     return global;
7094 }
7095 
7096 /*[clinic input]
7097 
7098 _pickle.Unpickler.__sizeof__ -> Py_ssize_t
7099 
7100 Returns size in memory, in bytes.
7101 [clinic start generated code]*/
7102 
7103 static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)7104 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7105 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
7106 {
7107     Py_ssize_t res;
7108 
7109     res = _PyObject_SIZE(Py_TYPE(self));
7110     if (self->memo != NULL)
7111         res += self->memo_size * sizeof(PyObject *);
7112     if (self->marks != NULL)
7113         res += self->marks_size * sizeof(Py_ssize_t);
7114     if (self->input_line != NULL)
7115         res += strlen(self->input_line) + 1;
7116     if (self->encoding != NULL)
7117         res += strlen(self->encoding) + 1;
7118     if (self->errors != NULL)
7119         res += strlen(self->errors) + 1;
7120     return res;
7121 }
7122 
7123 static struct PyMethodDef Unpickler_methods[] = {
7124     _PICKLE_UNPICKLER_LOAD_METHODDEF
7125     _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
7126     _PICKLE_UNPICKLER___SIZEOF___METHODDEF
7127     {NULL, NULL}                /* sentinel */
7128 };
7129 
7130 static void
Unpickler_dealloc(UnpicklerObject * self)7131 Unpickler_dealloc(UnpicklerObject *self)
7132 {
7133     PyObject_GC_UnTrack((PyObject *)self);
7134     Py_XDECREF(self->readline);
7135     Py_XDECREF(self->readinto);
7136     Py_XDECREF(self->read);
7137     Py_XDECREF(self->peek);
7138     Py_XDECREF(self->stack);
7139     Py_XDECREF(self->pers_func);
7140     Py_XDECREF(self->buffers);
7141     if (self->buffer.buf != NULL) {
7142         PyBuffer_Release(&self->buffer);
7143         self->buffer.buf = NULL;
7144     }
7145 
7146     _Unpickler_MemoCleanup(self);
7147     PyMem_Free(self->marks);
7148     PyMem_Free(self->input_line);
7149     PyMem_Free(self->encoding);
7150     PyMem_Free(self->errors);
7151 
7152     Py_TYPE(self)->tp_free((PyObject *)self);
7153 }
7154 
7155 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)7156 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7157 {
7158     Py_VISIT(self->readline);
7159     Py_VISIT(self->readinto);
7160     Py_VISIT(self->read);
7161     Py_VISIT(self->peek);
7162     Py_VISIT(self->stack);
7163     Py_VISIT(self->pers_func);
7164     Py_VISIT(self->buffers);
7165     return 0;
7166 }
7167 
7168 static int
Unpickler_clear(UnpicklerObject * self)7169 Unpickler_clear(UnpicklerObject *self)
7170 {
7171     Py_CLEAR(self->readline);
7172     Py_CLEAR(self->readinto);
7173     Py_CLEAR(self->read);
7174     Py_CLEAR(self->peek);
7175     Py_CLEAR(self->stack);
7176     Py_CLEAR(self->pers_func);
7177     Py_CLEAR(self->buffers);
7178     if (self->buffer.buf != NULL) {
7179         PyBuffer_Release(&self->buffer);
7180         self->buffer.buf = NULL;
7181     }
7182 
7183     _Unpickler_MemoCleanup(self);
7184     PyMem_Free(self->marks);
7185     self->marks = NULL;
7186     PyMem_Free(self->input_line);
7187     self->input_line = NULL;
7188     PyMem_Free(self->encoding);
7189     self->encoding = NULL;
7190     PyMem_Free(self->errors);
7191     self->errors = NULL;
7192 
7193     return 0;
7194 }
7195 
7196 /*[clinic input]
7197 
7198 _pickle.Unpickler.__init__
7199 
7200   file: object
7201   *
7202   fix_imports: bool = True
7203   encoding: str = 'ASCII'
7204   errors: str = 'strict'
7205   buffers: object(c_default="NULL") = ()
7206 
7207 This takes a binary file for reading a pickle data stream.
7208 
7209 The protocol version of the pickle is detected automatically, so no
7210 protocol argument is needed.  Bytes past the pickled object's
7211 representation are ignored.
7212 
7213 The argument *file* must have two methods, a read() method that takes
7214 an integer argument, and a readline() method that requires no
7215 arguments.  Both methods should return bytes.  Thus *file* can be a
7216 binary file object opened for reading, an io.BytesIO object, or any
7217 other custom object that meets this interface.
7218 
7219 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7220 which are used to control compatibility support for pickle stream
7221 generated by Python 2.  If *fix_imports* is True, pickle will try to
7222 map the old Python 2 names to the new names used in Python 3.  The
7223 *encoding* and *errors* tell pickle how to decode 8-bit string
7224 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7225 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7226 string instances as bytes objects.
7227 [clinic start generated code]*/
7228 
7229 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7230 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7231                                 int fix_imports, const char *encoding,
7232                                 const char *errors, PyObject *buffers)
7233 /*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
7234 {
7235     _Py_IDENTIFIER(persistent_load);
7236 
7237     /* In case of multiple __init__() calls, clear previous content. */
7238     if (self->read != NULL)
7239         (void)Unpickler_clear(self);
7240 
7241     if (_Unpickler_SetInputStream(self, file) < 0)
7242         return -1;
7243 
7244     if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
7245         return -1;
7246 
7247     if (_Unpickler_SetBuffers(self, buffers) < 0)
7248         return -1;
7249 
7250     self->fix_imports = fix_imports;
7251 
7252     if (init_method_ref((PyObject *)self, &PyId_persistent_load,
7253                         &self->pers_func, &self->pers_func_self) < 0)
7254     {
7255         return -1;
7256     }
7257 
7258     self->stack = (Pdata *)Pdata_New();
7259     if (self->stack == NULL)
7260         return -1;
7261 
7262     self->memo_size = 32;
7263     self->memo = _Unpickler_NewMemo(self->memo_size);
7264     if (self->memo == NULL)
7265         return -1;
7266 
7267     self->proto = 0;
7268 
7269     return 0;
7270 }
7271 
7272 
7273 /* Define a proxy object for the Unpickler's internal memo object. This is to
7274  * avoid breaking code like:
7275  *  unpickler.memo.clear()
7276  * and
7277  *  unpickler.memo = saved_memo
7278  * Is this a good idea? Not really, but we don't want to break code that uses
7279  * it. Note that we don't implement the entire mapping API here. This is
7280  * intentional, as these should be treated as black-box implementation details.
7281  *
7282  * We do, however, have to implement pickling/unpickling support because of
7283  * real-world code like cvs2svn.
7284  */
7285 
7286 /*[clinic input]
7287 _pickle.UnpicklerMemoProxy.clear
7288 
7289 Remove all items from memo.
7290 [clinic start generated code]*/
7291 
7292 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)7293 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
7294 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
7295 {
7296     _Unpickler_MemoCleanup(self->unpickler);
7297     self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7298     if (self->unpickler->memo == NULL)
7299         return NULL;
7300     Py_RETURN_NONE;
7301 }
7302 
7303 /*[clinic input]
7304 _pickle.UnpicklerMemoProxy.copy
7305 
7306 Copy the memo to a new object.
7307 [clinic start generated code]*/
7308 
7309 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)7310 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
7311 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
7312 {
7313     size_t i;
7314     PyObject *new_memo = PyDict_New();
7315     if (new_memo == NULL)
7316         return NULL;
7317 
7318     for (i = 0; i < self->unpickler->memo_size; i++) {
7319         int status;
7320         PyObject *key, *value;
7321 
7322         value = self->unpickler->memo[i];
7323         if (value == NULL)
7324             continue;
7325 
7326         key = PyLong_FromSsize_t(i);
7327         if (key == NULL)
7328             goto error;
7329         status = PyDict_SetItem(new_memo, key, value);
7330         Py_DECREF(key);
7331         if (status < 0)
7332             goto error;
7333     }
7334     return new_memo;
7335 
7336 error:
7337     Py_DECREF(new_memo);
7338     return NULL;
7339 }
7340 
7341 /*[clinic input]
7342 _pickle.UnpicklerMemoProxy.__reduce__
7343 
7344 Implement pickling support.
7345 [clinic start generated code]*/
7346 
7347 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)7348 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
7349 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
7350 {
7351     PyObject *reduce_value;
7352     PyObject *constructor_args;
7353     PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
7354     if (contents == NULL)
7355         return NULL;
7356 
7357     reduce_value = PyTuple_New(2);
7358     if (reduce_value == NULL) {
7359         Py_DECREF(contents);
7360         return NULL;
7361     }
7362     constructor_args = PyTuple_New(1);
7363     if (constructor_args == NULL) {
7364         Py_DECREF(contents);
7365         Py_DECREF(reduce_value);
7366         return NULL;
7367     }
7368     PyTuple_SET_ITEM(constructor_args, 0, contents);
7369     Py_INCREF((PyObject *)&PyDict_Type);
7370     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
7371     PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7372     return reduce_value;
7373 }
7374 
7375 static PyMethodDef unpicklerproxy_methods[] = {
7376     _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7377     _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7378     _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
7379     {NULL, NULL}    /* sentinel */
7380 };
7381 
7382 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)7383 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7384 {
7385     PyObject_GC_UnTrack(self);
7386     Py_XDECREF(self->unpickler);
7387     PyObject_GC_Del((PyObject *)self);
7388 }
7389 
7390 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)7391 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7392                             visitproc visit, void *arg)
7393 {
7394     Py_VISIT(self->unpickler);
7395     return 0;
7396 }
7397 
7398 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)7399 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7400 {
7401     Py_CLEAR(self->unpickler);
7402     return 0;
7403 }
7404 
7405 static PyTypeObject UnpicklerMemoProxyType = {
7406     PyVarObject_HEAD_INIT(NULL, 0)
7407     "_pickle.UnpicklerMemoProxy",               /*tp_name*/
7408     sizeof(UnpicklerMemoProxyObject),           /*tp_basicsize*/
7409     0,
7410     (destructor)UnpicklerMemoProxy_dealloc,     /* tp_dealloc */
7411     0,                                          /* tp_vectorcall_offset */
7412     0,                                          /* tp_getattr */
7413     0,                                          /* tp_setattr */
7414     0,                                          /* tp_as_async */
7415     0,                                          /* tp_repr */
7416     0,                                          /* tp_as_number */
7417     0,                                          /* tp_as_sequence */
7418     0,                                          /* tp_as_mapping */
7419     PyObject_HashNotImplemented,                /* tp_hash */
7420     0,                                          /* tp_call */
7421     0,                                          /* tp_str */
7422     PyObject_GenericGetAttr,                    /* tp_getattro */
7423     PyObject_GenericSetAttr,                    /* tp_setattro */
7424     0,                                          /* tp_as_buffer */
7425     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7426     0,                                          /* tp_doc */
7427     (traverseproc)UnpicklerMemoProxy_traverse,  /* tp_traverse */
7428     (inquiry)UnpicklerMemoProxy_clear,          /* tp_clear */
7429     0,                                          /* tp_richcompare */
7430     0,                                          /* tp_weaklistoffset */
7431     0,                                          /* tp_iter */
7432     0,                                          /* tp_iternext */
7433     unpicklerproxy_methods,                     /* tp_methods */
7434 };
7435 
7436 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)7437 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7438 {
7439     UnpicklerMemoProxyObject *self;
7440 
7441     self = PyObject_GC_New(UnpicklerMemoProxyObject,
7442                            &UnpicklerMemoProxyType);
7443     if (self == NULL)
7444         return NULL;
7445     Py_INCREF(unpickler);
7446     self->unpickler = unpickler;
7447     PyObject_GC_Track(self);
7448     return (PyObject *)self;
7449 }
7450 
7451 /*****************************************************************************/
7452 
7453 
7454 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7455 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7456 {
7457     return UnpicklerMemoProxy_New(self);
7458 }
7459 
7460 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7461 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7462 {
7463     PyObject **new_memo;
7464     size_t new_memo_size = 0;
7465 
7466     if (obj == NULL) {
7467         PyErr_SetString(PyExc_TypeError,
7468                         "attribute deletion is not supported");
7469         return -1;
7470     }
7471 
7472     if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
7473         UnpicklerObject *unpickler =
7474             ((UnpicklerMemoProxyObject *)obj)->unpickler;
7475 
7476         new_memo_size = unpickler->memo_size;
7477         new_memo = _Unpickler_NewMemo(new_memo_size);
7478         if (new_memo == NULL)
7479             return -1;
7480 
7481         for (size_t i = 0; i < new_memo_size; i++) {
7482             Py_XINCREF(unpickler->memo[i]);
7483             new_memo[i] = unpickler->memo[i];
7484         }
7485     }
7486     else if (PyDict_Check(obj)) {
7487         Py_ssize_t i = 0;
7488         PyObject *key, *value;
7489 
7490         new_memo_size = PyDict_GET_SIZE(obj);
7491         new_memo = _Unpickler_NewMemo(new_memo_size);
7492         if (new_memo == NULL)
7493             return -1;
7494 
7495         while (PyDict_Next(obj, &i, &key, &value)) {
7496             Py_ssize_t idx;
7497             if (!PyLong_Check(key)) {
7498                 PyErr_SetString(PyExc_TypeError,
7499                                 "memo key must be integers");
7500                 goto error;
7501             }
7502             idx = PyLong_AsSsize_t(key);
7503             if (idx == -1 && PyErr_Occurred())
7504                 goto error;
7505             if (idx < 0) {
7506                 PyErr_SetString(PyExc_ValueError,
7507                                 "memo key must be positive integers.");
7508                 goto error;
7509             }
7510             if (_Unpickler_MemoPut(self, idx, value) < 0)
7511                 goto error;
7512         }
7513     }
7514     else {
7515         PyErr_Format(PyExc_TypeError,
7516                      "'memo' attribute must be an UnpicklerMemoProxy object "
7517                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7518         return -1;
7519     }
7520 
7521     _Unpickler_MemoCleanup(self);
7522     self->memo_size = new_memo_size;
7523     self->memo = new_memo;
7524 
7525     return 0;
7526 
7527   error:
7528     if (new_memo_size) {
7529         for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7530             Py_XDECREF(new_memo[i]);
7531         }
7532         PyMem_FREE(new_memo);
7533     }
7534     return -1;
7535 }
7536 
7537 static PyObject *
Unpickler_get_persload(UnpicklerObject * self,void * Py_UNUSED (ignored))7538 Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
7539 {
7540     if (self->pers_func == NULL) {
7541         PyErr_SetString(PyExc_AttributeError, "persistent_load");
7542         return NULL;
7543     }
7544     return reconstruct_method(self->pers_func, self->pers_func_self);
7545 }
7546 
7547 static int
Unpickler_set_persload(UnpicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))7548 Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
7549 {
7550     if (value == NULL) {
7551         PyErr_SetString(PyExc_TypeError,
7552                         "attribute deletion is not supported");
7553         return -1;
7554     }
7555     if (!PyCallable_Check(value)) {
7556         PyErr_SetString(PyExc_TypeError,
7557                         "persistent_load must be a callable taking "
7558                         "one argument");
7559         return -1;
7560     }
7561 
7562     self->pers_func_self = NULL;
7563     Py_INCREF(value);
7564     Py_XSETREF(self->pers_func, value);
7565 
7566     return 0;
7567 }
7568 
7569 static PyGetSetDef Unpickler_getsets[] = {
7570     {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7571     {"persistent_load", (getter)Unpickler_get_persload,
7572                         (setter)Unpickler_set_persload},
7573     {NULL}
7574 };
7575 
7576 static PyTypeObject Unpickler_Type = {
7577     PyVarObject_HEAD_INIT(NULL, 0)
7578     "_pickle.Unpickler",                /*tp_name*/
7579     sizeof(UnpicklerObject),            /*tp_basicsize*/
7580     0,                                  /*tp_itemsize*/
7581     (destructor)Unpickler_dealloc,      /*tp_dealloc*/
7582     0,                                  /*tp_vectorcall_offset*/
7583     0,                                  /*tp_getattr*/
7584     0,                                  /*tp_setattr*/
7585     0,                                  /*tp_as_async*/
7586     0,                                  /*tp_repr*/
7587     0,                                  /*tp_as_number*/
7588     0,                                  /*tp_as_sequence*/
7589     0,                                  /*tp_as_mapping*/
7590     0,                                  /*tp_hash*/
7591     0,                                  /*tp_call*/
7592     0,                                  /*tp_str*/
7593     0,                                  /*tp_getattro*/
7594     0,                                  /*tp_setattro*/
7595     0,                                  /*tp_as_buffer*/
7596     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7597     _pickle_Unpickler___init____doc__,  /*tp_doc*/
7598     (traverseproc)Unpickler_traverse,   /*tp_traverse*/
7599     (inquiry)Unpickler_clear,           /*tp_clear*/
7600     0,                                  /*tp_richcompare*/
7601     0,                                  /*tp_weaklistoffset*/
7602     0,                                  /*tp_iter*/
7603     0,                                  /*tp_iternext*/
7604     Unpickler_methods,                  /*tp_methods*/
7605     0,                                  /*tp_members*/
7606     Unpickler_getsets,                  /*tp_getset*/
7607     0,                                  /*tp_base*/
7608     0,                                  /*tp_dict*/
7609     0,                                  /*tp_descr_get*/
7610     0,                                  /*tp_descr_set*/
7611     0,                                  /*tp_dictoffset*/
7612     _pickle_Unpickler___init__,         /*tp_init*/
7613     PyType_GenericAlloc,                /*tp_alloc*/
7614     PyType_GenericNew,                  /*tp_new*/
7615     PyObject_GC_Del,                    /*tp_free*/
7616     0,                                  /*tp_is_gc*/
7617 };
7618 
7619 /*[clinic input]
7620 
7621 _pickle.dump
7622 
7623   obj: object
7624   file: object
7625   protocol: object = None
7626   *
7627   fix_imports: bool = True
7628   buffer_callback: object = None
7629 
7630 Write a pickled representation of obj to the open file object file.
7631 
7632 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7633 be more efficient.
7634 
7635 The optional *protocol* argument tells the pickler to use the given
7636 protocol; supported protocols are 0, 1, 2, 3 and 4.  The default
7637 protocol is 4. It was introduced in Python 3.4, it is incompatible
7638 with previous versions.
7639 
7640 Specifying a negative protocol version selects the highest protocol
7641 version supported.  The higher the protocol used, the more recent the
7642 version of Python needed to read the pickle produced.
7643 
7644 The *file* argument must have a write() method that accepts a single
7645 bytes argument.  It can thus be a file object opened for binary
7646 writing, an io.BytesIO instance, or any other custom object that meets
7647 this interface.
7648 
7649 If *fix_imports* is True and protocol is less than 3, pickle will try
7650 to map the new Python 3 names to the old module names used in Python
7651 2, so that the pickle data stream is readable with Python 2.
7652 
7653 If *buffer_callback* is None (the default), buffer views are serialized
7654 into *file* as part of the pickle stream.  It is an error if
7655 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7656 
7657 [clinic start generated code]*/
7658 
7659 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7660 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7661                   PyObject *protocol, int fix_imports,
7662                   PyObject *buffer_callback)
7663 /*[clinic end generated code: output=706186dba996490c input=cfdcaf573ed6e46c]*/
7664 {
7665     PicklerObject *pickler = _Pickler_New();
7666 
7667     if (pickler == NULL)
7668         return NULL;
7669 
7670     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7671         goto error;
7672 
7673     if (_Pickler_SetOutputStream(pickler, file) < 0)
7674         goto error;
7675 
7676     if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7677         goto error;
7678 
7679     if (dump(pickler, obj) < 0)
7680         goto error;
7681 
7682     if (_Pickler_FlushToFile(pickler) < 0)
7683         goto error;
7684 
7685     Py_DECREF(pickler);
7686     Py_RETURN_NONE;
7687 
7688   error:
7689     Py_XDECREF(pickler);
7690     return NULL;
7691 }
7692 
7693 /*[clinic input]
7694 
7695 _pickle.dumps
7696 
7697   obj: object
7698   protocol: object = None
7699   *
7700   fix_imports: bool = True
7701   buffer_callback: object = None
7702 
7703 Return the pickled representation of the object as a bytes object.
7704 
7705 The optional *protocol* argument tells the pickler to use the given
7706 protocol; supported protocols are 0, 1, 2, 3 and 4.  The default
7707 protocol is 4. It was introduced in Python 3.4, it is incompatible
7708 with previous versions.
7709 
7710 Specifying a negative protocol version selects the highest protocol
7711 version supported.  The higher the protocol used, the more recent the
7712 version of Python needed to read the pickle produced.
7713 
7714 If *fix_imports* is True and *protocol* is less than 3, pickle will
7715 try to map the new Python 3 names to the old module names used in
7716 Python 2, so that the pickle data stream is readable with Python 2.
7717 
7718 If *buffer_callback* is None (the default), buffer views are serialized
7719 into *file* as part of the pickle stream.  It is an error if
7720 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7721 
7722 [clinic start generated code]*/
7723 
7724 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7725 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7726                    int fix_imports, PyObject *buffer_callback)
7727 /*[clinic end generated code: output=fbab0093a5580fdf input=9f334d535ff7194f]*/
7728 {
7729     PyObject *result;
7730     PicklerObject *pickler = _Pickler_New();
7731 
7732     if (pickler == NULL)
7733         return NULL;
7734 
7735     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7736         goto error;
7737 
7738     if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7739         goto error;
7740 
7741     if (dump(pickler, obj) < 0)
7742         goto error;
7743 
7744     result = _Pickler_GetString(pickler);
7745     Py_DECREF(pickler);
7746     return result;
7747 
7748   error:
7749     Py_XDECREF(pickler);
7750     return NULL;
7751 }
7752 
7753 /*[clinic input]
7754 
7755 _pickle.load
7756 
7757   file: object
7758   *
7759   fix_imports: bool = True
7760   encoding: str = 'ASCII'
7761   errors: str = 'strict'
7762   buffers: object(c_default="NULL") = ()
7763 
7764 Read and return an object from the pickle data stored in a file.
7765 
7766 This is equivalent to ``Unpickler(file).load()``, but may be more
7767 efficient.
7768 
7769 The protocol version of the pickle is detected automatically, so no
7770 protocol argument is needed.  Bytes past the pickled object's
7771 representation are ignored.
7772 
7773 The argument *file* must have two methods, a read() method that takes
7774 an integer argument, and a readline() method that requires no
7775 arguments.  Both methods should return bytes.  Thus *file* can be a
7776 binary file object opened for reading, an io.BytesIO object, or any
7777 other custom object that meets this interface.
7778 
7779 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7780 which are used to control compatibility support for pickle stream
7781 generated by Python 2.  If *fix_imports* is True, pickle will try to
7782 map the old Python 2 names to the new names used in Python 3.  The
7783 *encoding* and *errors* tell pickle how to decode 8-bit string
7784 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7785 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7786 string instances as bytes objects.
7787 [clinic start generated code]*/
7788 
7789 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7790 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7791                   const char *encoding, const char *errors,
7792                   PyObject *buffers)
7793 /*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
7794 {
7795     PyObject *result;
7796     UnpicklerObject *unpickler = _Unpickler_New();
7797 
7798     if (unpickler == NULL)
7799         return NULL;
7800 
7801     if (_Unpickler_SetInputStream(unpickler, file) < 0)
7802         goto error;
7803 
7804     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7805         goto error;
7806 
7807     if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7808         goto error;
7809 
7810     unpickler->fix_imports = fix_imports;
7811 
7812     result = load(unpickler);
7813     Py_DECREF(unpickler);
7814     return result;
7815 
7816   error:
7817     Py_XDECREF(unpickler);
7818     return NULL;
7819 }
7820 
7821 /*[clinic input]
7822 
7823 _pickle.loads
7824 
7825   data: object
7826   *
7827   fix_imports: bool = True
7828   encoding: str = 'ASCII'
7829   errors: str = 'strict'
7830   buffers: object(c_default="NULL") = ()
7831 
7832 Read and return an object from the given pickle data.
7833 
7834 The protocol version of the pickle is detected automatically, so no
7835 protocol argument is needed.  Bytes past the pickled object's
7836 representation are ignored.
7837 
7838 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7839 which are used to control compatibility support for pickle stream
7840 generated by Python 2.  If *fix_imports* is True, pickle will try to
7841 map the old Python 2 names to the new names used in Python 3.  The
7842 *encoding* and *errors* tell pickle how to decode 8-bit string
7843 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7844 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7845 string instances as bytes objects.
7846 [clinic start generated code]*/
7847 
7848 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7849 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7850                    const char *encoding, const char *errors,
7851                    PyObject *buffers)
7852 /*[clinic end generated code: output=82ac1e6b588e6d02 input=9c2ab6a0960185ea]*/
7853 {
7854     PyObject *result;
7855     UnpicklerObject *unpickler = _Unpickler_New();
7856 
7857     if (unpickler == NULL)
7858         return NULL;
7859 
7860     if (_Unpickler_SetStringInput(unpickler, data) < 0)
7861         goto error;
7862 
7863     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7864         goto error;
7865 
7866     if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7867         goto error;
7868 
7869     unpickler->fix_imports = fix_imports;
7870 
7871     result = load(unpickler);
7872     Py_DECREF(unpickler);
7873     return result;
7874 
7875   error:
7876     Py_XDECREF(unpickler);
7877     return NULL;
7878 }
7879 
7880 static struct PyMethodDef pickle_methods[] = {
7881     _PICKLE_DUMP_METHODDEF
7882     _PICKLE_DUMPS_METHODDEF
7883     _PICKLE_LOAD_METHODDEF
7884     _PICKLE_LOADS_METHODDEF
7885     {NULL, NULL} /* sentinel */
7886 };
7887 
7888 static int
pickle_clear(PyObject * m)7889 pickle_clear(PyObject *m)
7890 {
7891     _Pickle_ClearState(_Pickle_GetState(m));
7892     return 0;
7893 }
7894 
7895 static void
pickle_free(PyObject * m)7896 pickle_free(PyObject *m)
7897 {
7898     _Pickle_ClearState(_Pickle_GetState(m));
7899 }
7900 
7901 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7902 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7903 {
7904     PickleState *st = _Pickle_GetState(m);
7905     Py_VISIT(st->PickleError);
7906     Py_VISIT(st->PicklingError);
7907     Py_VISIT(st->UnpicklingError);
7908     Py_VISIT(st->dispatch_table);
7909     Py_VISIT(st->extension_registry);
7910     Py_VISIT(st->extension_cache);
7911     Py_VISIT(st->inverted_registry);
7912     Py_VISIT(st->name_mapping_2to3);
7913     Py_VISIT(st->import_mapping_2to3);
7914     Py_VISIT(st->name_mapping_3to2);
7915     Py_VISIT(st->import_mapping_3to2);
7916     Py_VISIT(st->codecs_encode);
7917     Py_VISIT(st->getattr);
7918     return 0;
7919 }
7920 
7921 static struct PyModuleDef _picklemodule = {
7922     PyModuleDef_HEAD_INIT,
7923     "_pickle",            /* m_name */
7924     pickle_module_doc,    /* m_doc */
7925     sizeof(PickleState),  /* m_size */
7926     pickle_methods,       /* m_methods */
7927     NULL,                 /* m_reload */
7928     pickle_traverse,      /* m_traverse */
7929     pickle_clear,         /* m_clear */
7930     (freefunc)pickle_free /* m_free */
7931 };
7932 
7933 PyMODINIT_FUNC
PyInit__pickle(void)7934 PyInit__pickle(void)
7935 {
7936     PyObject *m;
7937     PickleState *st;
7938 
7939     m = PyState_FindModule(&_picklemodule);
7940     if (m) {
7941         Py_INCREF(m);
7942         return m;
7943     }
7944 
7945     if (PyType_Ready(&Unpickler_Type) < 0)
7946         return NULL;
7947     if (PyType_Ready(&Pickler_Type) < 0)
7948         return NULL;
7949     if (PyType_Ready(&Pdata_Type) < 0)
7950         return NULL;
7951     if (PyType_Ready(&PicklerMemoProxyType) < 0)
7952         return NULL;
7953     if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7954         return NULL;
7955 
7956     /* Create the module and add the functions. */
7957     m = PyModule_Create(&_picklemodule);
7958     if (m == NULL)
7959         return NULL;
7960 
7961     /* Add types */
7962     Py_INCREF(&Pickler_Type);
7963     if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
7964         return NULL;
7965     Py_INCREF(&Unpickler_Type);
7966     if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
7967         return NULL;
7968     Py_INCREF(&PyPickleBuffer_Type);
7969     if (PyModule_AddObject(m, "PickleBuffer",
7970                            (PyObject *)&PyPickleBuffer_Type) < 0)
7971         return NULL;
7972 
7973     st = _Pickle_GetState(m);
7974 
7975     /* Initialize the exceptions. */
7976     st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7977     if (st->PickleError == NULL)
7978         return NULL;
7979     st->PicklingError = \
7980         PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7981     if (st->PicklingError == NULL)
7982         return NULL;
7983     st->UnpicklingError = \
7984         PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7985     if (st->UnpicklingError == NULL)
7986         return NULL;
7987 
7988     Py_INCREF(st->PickleError);
7989     if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
7990         return NULL;
7991     Py_INCREF(st->PicklingError);
7992     if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
7993         return NULL;
7994     Py_INCREF(st->UnpicklingError);
7995     if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
7996         return NULL;
7997 
7998     if (_Pickle_InitState(st) < 0)
7999         return NULL;
8000 
8001     return m;
8002 }
8003