1 /* pickle accelerator C extensor: _pickle module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
6
7 #ifndef Py_BUILD_CORE_BUILTIN
8 # define Py_BUILD_CORE_MODULE 1
9 #endif
10
11 #include "Python.h"
12 #include "pycore_bytesobject.h" // _PyBytesWriter
13 #include "pycore_ceval.h" // _Py_EnterRecursiveCall()
14 #include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION()
15 #include "pycore_long.h" // _PyLong_AsByteArray()
16 #include "pycore_moduleobject.h" // _PyModule_GetState()
17 #include "pycore_object.h" // _PyNone_Type
18 #include "pycore_pystate.h" // _PyThreadState_GET()
19 #include "pycore_runtime.h" // _Py_ID()
20 #include "pycore_setobject.h" // _PySet_NextEntry()
21 #include "pycore_sysmodule.h" // _PySys_GetAttr()
22
23 #include <stdlib.h> // strtol()
24
25
26 PyDoc_STRVAR(pickle_module_doc,
27 "Optimized C implementation for the Python pickle module.");
28
29 /*[clinic input]
30 module _pickle
31 class _pickle.Pickler "PicklerObject *" ""
32 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" ""
33 class _pickle.Unpickler "UnpicklerObject *" ""
34 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" ""
35 [clinic start generated code]*/
36 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b6d7191ab6466cda]*/
37
38 /* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
39 Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
40 already includes it. */
41 enum {
42 HIGHEST_PROTOCOL = 5,
43 DEFAULT_PROTOCOL = 4
44 };
45
46 #ifdef MS_WINDOWS
47 // These are already typedefs from windows.h, pulled in via pycore_runtime.h.
48 #define FLOAT FLOAT_
49 #define INT INT_
50 #define LONG LONG_
51
52 /* This can already be defined on Windows to set the character set
53 the Windows header files treat as default */
54 #ifdef UNICODE
55 #undef UNICODE
56 #endif
57 #endif
58
59 /* Pickle opcodes. These must be kept updated with pickle.py.
60 Extensive docs are in pickletools.py. */
61 enum opcode {
62 MARK = '(',
63 STOP = '.',
64 POP = '0',
65 POP_MARK = '1',
66 DUP = '2',
67 FLOAT = 'F',
68 INT = 'I',
69 BININT = 'J',
70 BININT1 = 'K',
71 LONG = 'L',
72 BININT2 = 'M',
73 NONE = 'N',
74 PERSID = 'P',
75 BINPERSID = 'Q',
76 REDUCE = 'R',
77 STRING = 'S',
78 BINSTRING = 'T',
79 SHORT_BINSTRING = 'U',
80 UNICODE = 'V',
81 BINUNICODE = 'X',
82 APPEND = 'a',
83 BUILD = 'b',
84 GLOBAL = 'c',
85 DICT = 'd',
86 EMPTY_DICT = '}',
87 APPENDS = 'e',
88 GET = 'g',
89 BINGET = 'h',
90 INST = 'i',
91 LONG_BINGET = 'j',
92 LIST = 'l',
93 EMPTY_LIST = ']',
94 OBJ = 'o',
95 PUT = 'p',
96 BINPUT = 'q',
97 LONG_BINPUT = 'r',
98 SETITEM = 's',
99 TUPLE = 't',
100 EMPTY_TUPLE = ')',
101 SETITEMS = 'u',
102 BINFLOAT = 'G',
103
104 /* Protocol 2. */
105 PROTO = '\x80',
106 NEWOBJ = '\x81',
107 EXT1 = '\x82',
108 EXT2 = '\x83',
109 EXT4 = '\x84',
110 TUPLE1 = '\x85',
111 TUPLE2 = '\x86',
112 TUPLE3 = '\x87',
113 NEWTRUE = '\x88',
114 NEWFALSE = '\x89',
115 LONG1 = '\x8a',
116 LONG4 = '\x8b',
117
118 /* Protocol 3 (Python 3.x) */
119 BINBYTES = 'B',
120 SHORT_BINBYTES = 'C',
121
122 /* Protocol 4 */
123 SHORT_BINUNICODE = '\x8c',
124 BINUNICODE8 = '\x8d',
125 BINBYTES8 = '\x8e',
126 EMPTY_SET = '\x8f',
127 ADDITEMS = '\x90',
128 FROZENSET = '\x91',
129 NEWOBJ_EX = '\x92',
130 STACK_GLOBAL = '\x93',
131 MEMOIZE = '\x94',
132 FRAME = '\x95',
133
134 /* Protocol 5 */
135 BYTEARRAY8 = '\x96',
136 NEXT_BUFFER = '\x97',
137 READONLY_BUFFER = '\x98'
138 };
139
140 enum {
141 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
142 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
143 break if this gets out of synch with pickle.py, but it's unclear that would
144 help anything either. */
145 BATCHSIZE = 1000,
146
147 /* Nesting limit until Pickler, when running in "fast mode", starts
148 checking for self-referential data-structures. */
149 FAST_NESTING_LIMIT = 50,
150
151 /* Initial size of the write buffer of Pickler. */
152 WRITE_BUF_SIZE = 4096,
153
154 /* Prefetch size when unpickling (disabled on unpeekable streams) */
155 PREFETCH = 8192 * 16,
156
157 FRAME_SIZE_MIN = 4,
158 FRAME_SIZE_TARGET = 64 * 1024,
159 FRAME_HEADER_SIZE = 9
160 };
161
162 /*************************************************************************/
163
164 /* State of the pickle module, per PEP 3121. */
165 typedef struct {
166 /* Exception classes for pickle. */
167 PyObject *PickleError;
168 PyObject *PicklingError;
169 PyObject *UnpicklingError;
170
171 /* copyreg.dispatch_table, {type_object: pickling_function} */
172 PyObject *dispatch_table;
173
174 /* For the extension opcodes EXT1, EXT2 and EXT4. */
175
176 /* copyreg._extension_registry, {(module_name, function_name): code} */
177 PyObject *extension_registry;
178 /* copyreg._extension_cache, {code: object} */
179 PyObject *extension_cache;
180 /* copyreg._inverted_registry, {code: (module_name, function_name)} */
181 PyObject *inverted_registry;
182
183 /* Import mappings for compatibility with Python 2.x */
184
185 /* _compat_pickle.NAME_MAPPING,
186 {(oldmodule, oldname): (newmodule, newname)} */
187 PyObject *name_mapping_2to3;
188 /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
189 PyObject *import_mapping_2to3;
190 /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
191 PyObject *name_mapping_3to2;
192 PyObject *import_mapping_3to2;
193
194 /* codecs.encode, used for saving bytes in older protocols */
195 PyObject *codecs_encode;
196 /* builtins.getattr, used for saving nested names with protocol < 4 */
197 PyObject *getattr;
198 /* functools.partial, used for implementing __newobj_ex__ with protocols
199 2 and 3 */
200 PyObject *partial;
201
202 /* Types */
203 PyTypeObject *Pickler_Type;
204 PyTypeObject *Unpickler_Type;
205 PyTypeObject *Pdata_Type;
206 PyTypeObject *PicklerMemoProxyType;
207 PyTypeObject *UnpicklerMemoProxyType;
208 } PickleState;
209
210 /* Forward declaration of the _pickle module definition. */
211 static struct PyModuleDef _picklemodule;
212
213 /* Given a module object, get its per-module state. */
214 static inline PickleState *
_Pickle_GetState(PyObject * module)215 _Pickle_GetState(PyObject *module)
216 {
217 void *state = _PyModule_GetState(module);
218 assert(state != NULL);
219 return (PickleState *)state;
220 }
221
222 static inline PickleState *
_Pickle_GetStateByClass(PyTypeObject * cls)223 _Pickle_GetStateByClass(PyTypeObject *cls)
224 {
225 void *state = _PyType_GetModuleState(cls);
226 assert(state != NULL);
227 return (PickleState *)state;
228 }
229
230 static inline PickleState *
_Pickle_FindStateByType(PyTypeObject * tp)231 _Pickle_FindStateByType(PyTypeObject *tp)
232 {
233 PyObject *module = PyType_GetModuleByDef(tp, &_picklemodule);
234 assert(module != NULL);
235 return _Pickle_GetState(module);
236 }
237
238 /* Clear the given pickle module state. */
239 static void
_Pickle_ClearState(PickleState * st)240 _Pickle_ClearState(PickleState *st)
241 {
242 Py_CLEAR(st->PickleError);
243 Py_CLEAR(st->PicklingError);
244 Py_CLEAR(st->UnpicklingError);
245 Py_CLEAR(st->dispatch_table);
246 Py_CLEAR(st->extension_registry);
247 Py_CLEAR(st->extension_cache);
248 Py_CLEAR(st->inverted_registry);
249 Py_CLEAR(st->name_mapping_2to3);
250 Py_CLEAR(st->import_mapping_2to3);
251 Py_CLEAR(st->name_mapping_3to2);
252 Py_CLEAR(st->import_mapping_3to2);
253 Py_CLEAR(st->codecs_encode);
254 Py_CLEAR(st->getattr);
255 Py_CLEAR(st->partial);
256 Py_CLEAR(st->Pickler_Type);
257 Py_CLEAR(st->Unpickler_Type);
258 Py_CLEAR(st->Pdata_Type);
259 Py_CLEAR(st->PicklerMemoProxyType);
260 Py_CLEAR(st->UnpicklerMemoProxyType);
261 }
262
263 /* Initialize the given pickle module state. */
264 static int
_Pickle_InitState(PickleState * st)265 _Pickle_InitState(PickleState *st)
266 {
267 PyObject *copyreg = NULL;
268 PyObject *compat_pickle = NULL;
269
270 st->getattr = _PyEval_GetBuiltin(&_Py_ID(getattr));
271 if (st->getattr == NULL)
272 goto error;
273
274 copyreg = PyImport_ImportModule("copyreg");
275 if (!copyreg)
276 goto error;
277 st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
278 if (!st->dispatch_table)
279 goto error;
280 if (!PyDict_CheckExact(st->dispatch_table)) {
281 PyErr_Format(PyExc_RuntimeError,
282 "copyreg.dispatch_table should be a dict, not %.200s",
283 Py_TYPE(st->dispatch_table)->tp_name);
284 goto error;
285 }
286 st->extension_registry = \
287 PyObject_GetAttrString(copyreg, "_extension_registry");
288 if (!st->extension_registry)
289 goto error;
290 if (!PyDict_CheckExact(st->extension_registry)) {
291 PyErr_Format(PyExc_RuntimeError,
292 "copyreg._extension_registry should be a dict, "
293 "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
294 goto error;
295 }
296 st->inverted_registry = \
297 PyObject_GetAttrString(copyreg, "_inverted_registry");
298 if (!st->inverted_registry)
299 goto error;
300 if (!PyDict_CheckExact(st->inverted_registry)) {
301 PyErr_Format(PyExc_RuntimeError,
302 "copyreg._inverted_registry should be a dict, "
303 "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
304 goto error;
305 }
306 st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
307 if (!st->extension_cache)
308 goto error;
309 if (!PyDict_CheckExact(st->extension_cache)) {
310 PyErr_Format(PyExc_RuntimeError,
311 "copyreg._extension_cache should be a dict, "
312 "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
313 goto error;
314 }
315 Py_CLEAR(copyreg);
316
317 /* Load the 2.x -> 3.x stdlib module mapping tables */
318 compat_pickle = PyImport_ImportModule("_compat_pickle");
319 if (!compat_pickle)
320 goto error;
321 st->name_mapping_2to3 = \
322 PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
323 if (!st->name_mapping_2to3)
324 goto error;
325 if (!PyDict_CheckExact(st->name_mapping_2to3)) {
326 PyErr_Format(PyExc_RuntimeError,
327 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
328 Py_TYPE(st->name_mapping_2to3)->tp_name);
329 goto error;
330 }
331 st->import_mapping_2to3 = \
332 PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
333 if (!st->import_mapping_2to3)
334 goto error;
335 if (!PyDict_CheckExact(st->import_mapping_2to3)) {
336 PyErr_Format(PyExc_RuntimeError,
337 "_compat_pickle.IMPORT_MAPPING should be a dict, "
338 "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
339 goto error;
340 }
341 /* ... and the 3.x -> 2.x mapping tables */
342 st->name_mapping_3to2 = \
343 PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
344 if (!st->name_mapping_3to2)
345 goto error;
346 if (!PyDict_CheckExact(st->name_mapping_3to2)) {
347 PyErr_Format(PyExc_RuntimeError,
348 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
349 "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
350 goto error;
351 }
352 st->import_mapping_3to2 = \
353 PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
354 if (!st->import_mapping_3to2)
355 goto error;
356 if (!PyDict_CheckExact(st->import_mapping_3to2)) {
357 PyErr_Format(PyExc_RuntimeError,
358 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
359 "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
360 goto error;
361 }
362 Py_CLEAR(compat_pickle);
363
364 st->codecs_encode = _PyImport_GetModuleAttrString("codecs", "encode");
365 if (st->codecs_encode == NULL) {
366 goto error;
367 }
368 if (!PyCallable_Check(st->codecs_encode)) {
369 PyErr_Format(PyExc_RuntimeError,
370 "codecs.encode should be a callable, not %.200s",
371 Py_TYPE(st->codecs_encode)->tp_name);
372 goto error;
373 }
374
375 st->partial = _PyImport_GetModuleAttrString("functools", "partial");
376 if (!st->partial)
377 goto error;
378
379 return 0;
380
381 error:
382 Py_CLEAR(copyreg);
383 Py_CLEAR(compat_pickle);
384 _Pickle_ClearState(st);
385 return -1;
386 }
387
388 /* Helper for calling a function with a single argument quickly.
389
390 This function steals the reference of the given argument. */
391 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)392 _Pickle_FastCall(PyObject *func, PyObject *obj)
393 {
394 PyObject *result;
395
396 result = PyObject_CallOneArg(func, obj);
397 Py_DECREF(obj);
398 return result;
399 }
400
401 /*************************************************************************/
402
403 /* Internal data type used as the unpickling stack. */
404 typedef struct {
405 PyObject_VAR_HEAD
406 PyObject **data;
407 int mark_set; /* is MARK set? */
408 Py_ssize_t fence; /* position of top MARK or 0 */
409 Py_ssize_t allocated; /* number of slots in data allocated */
410 } Pdata;
411
412 static int
Pdata_traverse(Pdata * self,visitproc visit,void * arg)413 Pdata_traverse(Pdata *self, visitproc visit, void *arg)
414 {
415 Py_VISIT(Py_TYPE(self));
416 return 0;
417 }
418
419 static void
Pdata_dealloc(Pdata * self)420 Pdata_dealloc(Pdata *self)
421 {
422 PyTypeObject *tp = Py_TYPE(self);
423 PyObject_GC_UnTrack(self);
424 Py_ssize_t i = Py_SIZE(self);
425 while (--i >= 0) {
426 Py_DECREF(self->data[i]);
427 }
428 PyMem_Free(self->data);
429 tp->tp_free((PyObject *)self);
430 Py_DECREF(tp);
431 }
432
433 static PyType_Slot pdata_slots[] = {
434 {Py_tp_dealloc, Pdata_dealloc},
435 {Py_tp_traverse, Pdata_traverse},
436 {0, NULL},
437 };
438
439 static PyType_Spec pdata_spec = {
440 .name = "_pickle.Pdata",
441 .basicsize = sizeof(Pdata),
442 .itemsize = sizeof(PyObject *),
443 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
444 Py_TPFLAGS_IMMUTABLETYPE),
445 .slots = pdata_slots,
446 };
447
448 static PyObject *
Pdata_New(PickleState * state)449 Pdata_New(PickleState *state)
450 {
451 Pdata *self;
452
453 if (!(self = PyObject_GC_New(Pdata, state->Pdata_Type)))
454 return NULL;
455 Py_SET_SIZE(self, 0);
456 self->mark_set = 0;
457 self->fence = 0;
458 self->allocated = 8;
459 self->data = PyMem_Malloc(self->allocated * sizeof(PyObject *));
460 if (self->data) {
461 PyObject_GC_Track(self);
462 return (PyObject *)self;
463 }
464 Py_DECREF(self);
465 return PyErr_NoMemory();
466 }
467
468
469 /* Retain only the initial clearto items. If clearto >= the current
470 * number of items, this is a (non-erroneous) NOP.
471 */
472 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)473 Pdata_clear(Pdata *self, Py_ssize_t clearto)
474 {
475 Py_ssize_t i = Py_SIZE(self);
476
477 assert(clearto >= self->fence);
478 if (clearto >= i)
479 return 0;
480
481 while (--i >= clearto) {
482 Py_CLEAR(self->data[i]);
483 }
484 Py_SET_SIZE(self, clearto);
485 return 0;
486 }
487
488 static int
Pdata_grow(Pdata * self)489 Pdata_grow(Pdata *self)
490 {
491 PyObject **data = self->data;
492 size_t allocated = (size_t)self->allocated;
493 size_t new_allocated;
494
495 new_allocated = (allocated >> 3) + 6;
496 /* check for integer overflow */
497 if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
498 goto nomemory;
499 new_allocated += allocated;
500 PyMem_RESIZE(data, PyObject *, new_allocated);
501 if (data == NULL)
502 goto nomemory;
503
504 self->data = data;
505 self->allocated = (Py_ssize_t)new_allocated;
506 return 0;
507
508 nomemory:
509 PyErr_NoMemory();
510 return -1;
511 }
512
513 static int
Pdata_stack_underflow(PickleState * st,Pdata * self)514 Pdata_stack_underflow(PickleState *st, Pdata *self)
515 {
516 PyErr_SetString(st->UnpicklingError,
517 self->mark_set ?
518 "unexpected MARK found" :
519 "unpickling stack underflow");
520 return -1;
521 }
522
523 /* D is a Pdata*. Pop the topmost element and store it into V, which
524 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
525 * is raised and V is set to NULL.
526 */
527 static PyObject *
Pdata_pop(PickleState * state,Pdata * self)528 Pdata_pop(PickleState *state, Pdata *self)
529 {
530 if (Py_SIZE(self) <= self->fence) {
531 Pdata_stack_underflow(state, self);
532 return NULL;
533 }
534 Py_SET_SIZE(self, Py_SIZE(self) - 1);
535 return self->data[Py_SIZE(self)];
536 }
537 #define PDATA_POP(S, D, V) do { (V) = Pdata_pop(S, (D)); } while (0)
538
539 static int
Pdata_push(Pdata * self,PyObject * obj)540 Pdata_push(Pdata *self, PyObject *obj)
541 {
542 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
543 return -1;
544 }
545 self->data[Py_SIZE(self)] = obj;
546 Py_SET_SIZE(self, Py_SIZE(self) + 1);
547 return 0;
548 }
549
550 /* Push an object on stack, transferring its ownership to the stack. */
551 #define PDATA_PUSH(D, O, ER) do { \
552 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
553
554 /* Push an object on stack, adding a new reference to the object. */
555 #define PDATA_APPEND(D, O, ER) do { \
556 Py_INCREF((O)); \
557 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
558
559 static PyObject *
Pdata_poptuple(PickleState * state,Pdata * self,Py_ssize_t start)560 Pdata_poptuple(PickleState *state, Pdata *self, Py_ssize_t start)
561 {
562 PyObject *tuple;
563 Py_ssize_t len, i, j;
564
565 if (start < self->fence) {
566 Pdata_stack_underflow(state, self);
567 return NULL;
568 }
569 len = Py_SIZE(self) - start;
570 tuple = PyTuple_New(len);
571 if (tuple == NULL)
572 return NULL;
573 for (i = start, j = 0; j < len; i++, j++)
574 PyTuple_SET_ITEM(tuple, j, self->data[i]);
575
576 Py_SET_SIZE(self, start);
577 return tuple;
578 }
579
580 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)581 Pdata_poplist(Pdata *self, Py_ssize_t start)
582 {
583 PyObject *list;
584 Py_ssize_t len, i, j;
585
586 len = Py_SIZE(self) - start;
587 list = PyList_New(len);
588 if (list == NULL)
589 return NULL;
590 for (i = start, j = 0; j < len; i++, j++)
591 PyList_SET_ITEM(list, j, self->data[i]);
592
593 Py_SET_SIZE(self, start);
594 return list;
595 }
596
597 typedef struct {
598 PyObject *me_key;
599 Py_ssize_t me_value;
600 } PyMemoEntry;
601
602 typedef struct {
603 size_t mt_mask;
604 size_t mt_used;
605 size_t mt_allocated;
606 PyMemoEntry *mt_table;
607 } PyMemoTable;
608
609 typedef struct PicklerObject {
610 PyObject_HEAD
611 PyMemoTable *memo; /* Memo table, keep track of the seen
612 objects to support self-referential objects
613 pickling. */
614 PyObject *persistent_id; /* persistent_id() method, can be NULL */
615 PyObject *persistent_id_attr; /* instance attribute, can be NULL */
616 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
617 PyObject *reducer_override; /* hook for invoking user-defined callbacks
618 instead of save_global when pickling
619 functions and classes*/
620
621 PyObject *write; /* write() method of the output stream. */
622 PyObject *output_buffer; /* Write into a local bytearray buffer before
623 flushing to the stream. */
624 Py_ssize_t output_len; /* Length of output_buffer. */
625 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
626 int proto; /* Pickle protocol number, >= 0 */
627 int bin; /* Boolean, true if proto > 0 */
628 int framing; /* True when framing is enabled, proto >= 4 */
629 Py_ssize_t frame_start; /* Position in output_buffer where the
630 current frame begins. -1 if there
631 is no frame currently open. */
632
633 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
634 int fast; /* Enable fast mode if set to a true value.
635 The fast mode disable the usage of memo,
636 therefore speeding the pickling process by
637 not generating superfluous PUT opcodes. It
638 should not be used if with self-referential
639 objects. */
640 int fast_nesting;
641 int fix_imports; /* Indicate whether Pickler should fix
642 the name of globals for Python 2.x. */
643 PyObject *fast_memo;
644 PyObject *buffer_callback; /* Callback for out-of-band buffers, or NULL */
645 } PicklerObject;
646
647 typedef struct UnpicklerObject {
648 PyObject_HEAD
649 Pdata *stack; /* Pickle data stack, store unpickled objects. */
650
651 /* The unpickler memo is just an array of PyObject *s. Using a dict
652 is unnecessary, since the keys are contiguous ints. */
653 PyObject **memo;
654 size_t memo_size; /* Capacity of the memo array */
655 size_t memo_len; /* Number of objects in the memo */
656
657 PyObject *persistent_load; /* persistent_load() method, can be NULL. */
658 PyObject *persistent_load_attr; /* instance attribute, can be NULL. */
659
660 Py_buffer buffer;
661 char *input_buffer;
662 char *input_line;
663 Py_ssize_t input_len;
664 Py_ssize_t next_read_idx;
665 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
666
667 PyObject *read; /* read() method of the input stream. */
668 PyObject *readinto; /* readinto() method of the input stream. */
669 PyObject *readline; /* readline() method of the input stream. */
670 PyObject *peek; /* peek() method of the input stream, or NULL */
671 PyObject *buffers; /* iterable of out-of-band buffers, or NULL */
672
673 char *encoding; /* Name of the encoding to be used for
674 decoding strings pickled using Python
675 2.x. The default value is "ASCII" */
676 char *errors; /* Name of errors handling scheme to used when
677 decoding strings. The default value is
678 "strict". */
679 Py_ssize_t *marks; /* Mark stack, used for unpickling container
680 objects. */
681 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
682 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
683 int proto; /* Protocol of the pickle loaded. */
684 int fix_imports; /* Indicate whether Unpickler should fix
685 the name of globals pickled by Python 2.x. */
686 } UnpicklerObject;
687
688 typedef struct {
689 PyObject_HEAD
690 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
691 } PicklerMemoProxyObject;
692
693 typedef struct {
694 PyObject_HEAD
695 UnpicklerObject *unpickler;
696 } UnpicklerMemoProxyObject;
697
698 /* Forward declarations */
699 static int save(PickleState *state, PicklerObject *, PyObject *, int);
700 static int save_reduce(PickleState *, PicklerObject *, PyObject *, PyObject *);
701
702 #include "clinic/_pickle.c.h"
703
704 /*************************************************************************
705 A custom hashtable mapping void* to Python ints. This is used by the pickler
706 for memoization. Using a custom hashtable rather than PyDict allows us to skip
707 a bunch of unnecessary object creation. This makes a huge performance
708 difference. */
709
710 #define MT_MINSIZE 8
711 #define PERTURB_SHIFT 5
712
713
714 static PyMemoTable *
PyMemoTable_New(void)715 PyMemoTable_New(void)
716 {
717 PyMemoTable *memo = PyMem_Malloc(sizeof(PyMemoTable));
718 if (memo == NULL) {
719 PyErr_NoMemory();
720 return NULL;
721 }
722
723 memo->mt_used = 0;
724 memo->mt_allocated = MT_MINSIZE;
725 memo->mt_mask = MT_MINSIZE - 1;
726 memo->mt_table = PyMem_Malloc(MT_MINSIZE * sizeof(PyMemoEntry));
727 if (memo->mt_table == NULL) {
728 PyMem_Free(memo);
729 PyErr_NoMemory();
730 return NULL;
731 }
732 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
733
734 return memo;
735 }
736
737 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)738 PyMemoTable_Copy(PyMemoTable *self)
739 {
740 PyMemoTable *new = PyMemoTable_New();
741 if (new == NULL)
742 return NULL;
743
744 new->mt_used = self->mt_used;
745 new->mt_allocated = self->mt_allocated;
746 new->mt_mask = self->mt_mask;
747 /* The table we get from _New() is probably smaller than we wanted.
748 Free it and allocate one that's the right size. */
749 PyMem_Free(new->mt_table);
750 new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
751 if (new->mt_table == NULL) {
752 PyMem_Free(new);
753 PyErr_NoMemory();
754 return NULL;
755 }
756 for (size_t i = 0; i < self->mt_allocated; i++) {
757 Py_XINCREF(self->mt_table[i].me_key);
758 }
759 memcpy(new->mt_table, self->mt_table,
760 sizeof(PyMemoEntry) * self->mt_allocated);
761
762 return new;
763 }
764
765 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)766 PyMemoTable_Size(PyMemoTable *self)
767 {
768 return self->mt_used;
769 }
770
771 static int
PyMemoTable_Clear(PyMemoTable * self)772 PyMemoTable_Clear(PyMemoTable *self)
773 {
774 Py_ssize_t i = self->mt_allocated;
775
776 while (--i >= 0) {
777 Py_XDECREF(self->mt_table[i].me_key);
778 }
779 self->mt_used = 0;
780 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
781 return 0;
782 }
783
784 static void
PyMemoTable_Del(PyMemoTable * self)785 PyMemoTable_Del(PyMemoTable *self)
786 {
787 if (self == NULL)
788 return;
789 PyMemoTable_Clear(self);
790
791 PyMem_Free(self->mt_table);
792 PyMem_Free(self);
793 }
794
795 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
796 can be considerably simpler than dictobject.c's lookdict(). */
797 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)798 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
799 {
800 size_t i;
801 size_t perturb;
802 size_t mask = self->mt_mask;
803 PyMemoEntry *table = self->mt_table;
804 PyMemoEntry *entry;
805 Py_hash_t hash = (Py_hash_t)key >> 3;
806
807 i = hash & mask;
808 entry = &table[i];
809 if (entry->me_key == NULL || entry->me_key == key)
810 return entry;
811
812 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
813 i = (i << 2) + i + perturb + 1;
814 entry = &table[i & mask];
815 if (entry->me_key == NULL || entry->me_key == key)
816 return entry;
817 }
818 Py_UNREACHABLE();
819 }
820
821 /* Returns -1 on failure, 0 on success. */
822 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)823 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
824 {
825 PyMemoEntry *oldtable = NULL;
826 PyMemoEntry *oldentry, *newentry;
827 size_t new_size = MT_MINSIZE;
828 size_t to_process;
829
830 assert(min_size > 0);
831
832 if (min_size > PY_SSIZE_T_MAX) {
833 PyErr_NoMemory();
834 return -1;
835 }
836
837 /* Find the smallest valid table size >= min_size. */
838 while (new_size < min_size) {
839 new_size <<= 1;
840 }
841 /* new_size needs to be a power of two. */
842 assert((new_size & (new_size - 1)) == 0);
843
844 /* Allocate new table. */
845 oldtable = self->mt_table;
846 self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
847 if (self->mt_table == NULL) {
848 self->mt_table = oldtable;
849 PyErr_NoMemory();
850 return -1;
851 }
852 self->mt_allocated = new_size;
853 self->mt_mask = new_size - 1;
854 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
855
856 /* Copy entries from the old table. */
857 to_process = self->mt_used;
858 for (oldentry = oldtable; to_process > 0; oldentry++) {
859 if (oldentry->me_key != NULL) {
860 to_process--;
861 /* newentry is a pointer to a chunk of the new
862 mt_table, so we're setting the key:value pair
863 in-place. */
864 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
865 newentry->me_key = oldentry->me_key;
866 newentry->me_value = oldentry->me_value;
867 }
868 }
869
870 /* Deallocate the old table. */
871 PyMem_Free(oldtable);
872 return 0;
873 }
874
875 /* Returns NULL on failure, a pointer to the value otherwise. */
876 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)877 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
878 {
879 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
880 if (entry->me_key == NULL)
881 return NULL;
882 return &entry->me_value;
883 }
884
885 /* Returns -1 on failure, 0 on success. */
886 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)887 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
888 {
889 PyMemoEntry *entry;
890
891 assert(key != NULL);
892
893 entry = _PyMemoTable_Lookup(self, key);
894 if (entry->me_key != NULL) {
895 entry->me_value = value;
896 return 0;
897 }
898 entry->me_key = Py_NewRef(key);
899 entry->me_value = value;
900 self->mt_used++;
901
902 /* If we added a key, we can safely resize. Otherwise just return!
903 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
904 *
905 * Quadrupling the size improves average table sparseness
906 * (reducing collisions) at the cost of some memory. It also halves
907 * the number of expensive resize operations in a growing memo table.
908 *
909 * Very large memo tables (over 50K items) use doubling instead.
910 * This may help applications with severe memory constraints.
911 */
912 if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
913 return 0;
914 }
915 // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
916 size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
917 return _PyMemoTable_ResizeTable(self, desired_size);
918 }
919
920 #undef MT_MINSIZE
921 #undef PERTURB_SHIFT
922
923 /*************************************************************************/
924
925
926 static int
_Pickler_ClearBuffer(PicklerObject * self)927 _Pickler_ClearBuffer(PicklerObject *self)
928 {
929 Py_XSETREF(self->output_buffer,
930 PyBytes_FromStringAndSize(NULL, self->max_output_len));
931 if (self->output_buffer == NULL)
932 return -1;
933 self->output_len = 0;
934 self->frame_start = -1;
935 return 0;
936 }
937
938 static void
_write_size64(char * out,size_t value)939 _write_size64(char *out, size_t value)
940 {
941 size_t i;
942
943 static_assert(sizeof(size_t) <= 8, "size_t is larger than 64-bit");
944
945 for (i = 0; i < sizeof(size_t); i++) {
946 out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
947 }
948 for (i = sizeof(size_t); i < 8; i++) {
949 out[i] = 0;
950 }
951 }
952
953 static int
_Pickler_CommitFrame(PicklerObject * self)954 _Pickler_CommitFrame(PicklerObject *self)
955 {
956 size_t frame_len;
957 char *qdata;
958
959 if (!self->framing || self->frame_start == -1)
960 return 0;
961 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
962 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
963 if (frame_len >= FRAME_SIZE_MIN) {
964 qdata[0] = FRAME;
965 _write_size64(qdata + 1, frame_len);
966 }
967 else {
968 memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
969 self->output_len -= FRAME_HEADER_SIZE;
970 }
971 self->frame_start = -1;
972 return 0;
973 }
974
975 static PyObject *
_Pickler_GetString(PicklerObject * self)976 _Pickler_GetString(PicklerObject *self)
977 {
978 PyObject *output_buffer = self->output_buffer;
979
980 assert(self->output_buffer != NULL);
981
982 if (_Pickler_CommitFrame(self))
983 return NULL;
984
985 self->output_buffer = NULL;
986 /* Resize down to exact size */
987 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
988 return NULL;
989 return output_buffer;
990 }
991
992 static int
_Pickler_FlushToFile(PicklerObject * self)993 _Pickler_FlushToFile(PicklerObject *self)
994 {
995 PyObject *output, *result;
996
997 assert(self->write != NULL);
998
999 /* This will commit the frame first */
1000 output = _Pickler_GetString(self);
1001 if (output == NULL)
1002 return -1;
1003
1004 result = _Pickle_FastCall(self->write, output);
1005 Py_XDECREF(result);
1006 return (result == NULL) ? -1 : 0;
1007 }
1008
1009 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1010 _Pickler_OpcodeBoundary(PicklerObject *self)
1011 {
1012 Py_ssize_t frame_len;
1013
1014 if (!self->framing || self->frame_start == -1) {
1015 return 0;
1016 }
1017 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1018 if (frame_len >= FRAME_SIZE_TARGET) {
1019 if(_Pickler_CommitFrame(self)) {
1020 return -1;
1021 }
1022 /* Flush the content of the committed frame to the underlying
1023 * file and reuse the pickler buffer for the next frame so as
1024 * to limit memory usage when dumping large complex objects to
1025 * a file.
1026 *
1027 * self->write is NULL when called via dumps.
1028 */
1029 if (self->write != NULL) {
1030 if (_Pickler_FlushToFile(self) < 0) {
1031 return -1;
1032 }
1033 if (_Pickler_ClearBuffer(self) < 0) {
1034 return -1;
1035 }
1036 }
1037 }
1038 return 0;
1039 }
1040
1041 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1042 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1043 {
1044 Py_ssize_t i, n, required;
1045 char *buffer;
1046 int need_new_frame;
1047
1048 assert(s != NULL);
1049 need_new_frame = (self->framing && self->frame_start == -1);
1050
1051 if (need_new_frame)
1052 n = data_len + FRAME_HEADER_SIZE;
1053 else
1054 n = data_len;
1055
1056 required = self->output_len + n;
1057 if (required > self->max_output_len) {
1058 /* Make place in buffer for the pickle chunk */
1059 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1060 PyErr_NoMemory();
1061 return -1;
1062 }
1063 self->max_output_len = (self->output_len + n) / 2 * 3;
1064 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1065 return -1;
1066 }
1067 buffer = PyBytes_AS_STRING(self->output_buffer);
1068 if (need_new_frame) {
1069 /* Setup new frame */
1070 Py_ssize_t frame_start = self->output_len;
1071 self->frame_start = frame_start;
1072 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1073 /* Write an invalid value, for debugging */
1074 buffer[frame_start + i] = 0xFE;
1075 }
1076 self->output_len += FRAME_HEADER_SIZE;
1077 }
1078 if (data_len < 8) {
1079 /* This is faster than memcpy when the string is short. */
1080 for (i = 0; i < data_len; i++) {
1081 buffer[self->output_len + i] = s[i];
1082 }
1083 }
1084 else {
1085 memcpy(buffer + self->output_len, s, data_len);
1086 }
1087 self->output_len += data_len;
1088 return data_len;
1089 }
1090
1091 static PicklerObject *
_Pickler_New(PickleState * st)1092 _Pickler_New(PickleState *st)
1093 {
1094 PyMemoTable *memo = PyMemoTable_New();
1095 if (memo == NULL) {
1096 return NULL;
1097 }
1098
1099 const Py_ssize_t max_output_len = WRITE_BUF_SIZE;
1100 PyObject *output_buffer = PyBytes_FromStringAndSize(NULL, max_output_len);
1101 if (output_buffer == NULL) {
1102 goto error;
1103 }
1104
1105 PicklerObject *self = PyObject_GC_New(PicklerObject, st->Pickler_Type);
1106 if (self == NULL) {
1107 goto error;
1108 }
1109
1110 self->memo = memo;
1111 self->persistent_id = NULL;
1112 self->persistent_id_attr = NULL;
1113 self->dispatch_table = NULL;
1114 self->reducer_override = NULL;
1115 self->write = NULL;
1116 self->output_buffer = output_buffer;
1117 self->output_len = 0;
1118 self->max_output_len = max_output_len;
1119 self->proto = 0;
1120 self->bin = 0;
1121 self->framing = 0;
1122 self->frame_start = -1;
1123 self->buf_size = 0;
1124 self->fast = 0;
1125 self->fast_nesting = 0;
1126 self->fix_imports = 0;
1127 self->fast_memo = NULL;
1128 self->buffer_callback = NULL;
1129
1130 PyObject_GC_Track(self);
1131 return self;
1132
1133 error:
1134 PyMem_Free(memo);
1135 Py_XDECREF(output_buffer);
1136 return NULL;
1137 }
1138
1139 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1140 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1141 {
1142 long proto;
1143
1144 if (protocol == Py_None) {
1145 proto = DEFAULT_PROTOCOL;
1146 }
1147 else {
1148 proto = PyLong_AsLong(protocol);
1149 if (proto < 0) {
1150 if (proto == -1 && PyErr_Occurred())
1151 return -1;
1152 proto = HIGHEST_PROTOCOL;
1153 }
1154 else if (proto > HIGHEST_PROTOCOL) {
1155 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1156 HIGHEST_PROTOCOL);
1157 return -1;
1158 }
1159 }
1160 self->proto = (int)proto;
1161 self->bin = proto > 0;
1162 self->fix_imports = fix_imports && proto < 3;
1163 return 0;
1164 }
1165
1166 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1167 be called once on a freshly created Pickler. */
1168 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1169 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1170 {
1171 assert(file != NULL);
1172 if (PyObject_GetOptionalAttr(file, &_Py_ID(write), &self->write) < 0) {
1173 return -1;
1174 }
1175 if (self->write == NULL) {
1176 PyErr_SetString(PyExc_TypeError,
1177 "file must have a 'write' attribute");
1178 return -1;
1179 }
1180
1181 return 0;
1182 }
1183
1184 static int
_Pickler_SetBufferCallback(PicklerObject * self,PyObject * buffer_callback)1185 _Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1186 {
1187 if (buffer_callback == Py_None) {
1188 buffer_callback = NULL;
1189 }
1190 if (buffer_callback != NULL && self->proto < 5) {
1191 PyErr_SetString(PyExc_ValueError,
1192 "buffer_callback needs protocol >= 5");
1193 return -1;
1194 }
1195
1196 self->buffer_callback = Py_XNewRef(buffer_callback);
1197 return 0;
1198 }
1199
1200 /* Returns the size of the input on success, -1 on failure. This takes its
1201 own reference to `input`. */
1202 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1203 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1204 {
1205 if (self->buffer.buf != NULL)
1206 PyBuffer_Release(&self->buffer);
1207 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1208 return -1;
1209 self->input_buffer = self->buffer.buf;
1210 self->input_len = self->buffer.len;
1211 self->next_read_idx = 0;
1212 self->prefetched_idx = self->input_len;
1213 return self->input_len;
1214 }
1215
1216 static int
bad_readline(PickleState * st)1217 bad_readline(PickleState *st)
1218 {
1219 PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1220 return -1;
1221 }
1222
1223 /* Skip any consumed data that was only prefetched using peek() */
1224 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1225 _Unpickler_SkipConsumed(UnpicklerObject *self)
1226 {
1227 Py_ssize_t consumed;
1228 PyObject *r;
1229
1230 consumed = self->next_read_idx - self->prefetched_idx;
1231 if (consumed <= 0)
1232 return 0;
1233
1234 assert(self->peek); /* otherwise we did something wrong */
1235 /* This makes a useless copy... */
1236 r = PyObject_CallFunction(self->read, "n", consumed);
1237 if (r == NULL)
1238 return -1;
1239 Py_DECREF(r);
1240
1241 self->prefetched_idx = self->next_read_idx;
1242 return 0;
1243 }
1244
1245 static const Py_ssize_t READ_WHOLE_LINE = -1;
1246
1247 /* If reading from a file, we need to only pull the bytes we need, since there
1248 may be multiple pickle objects arranged contiguously in the same input
1249 buffer.
1250
1251 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1252 bytes from the input stream/buffer.
1253
1254 Update the unpickler's input buffer with the newly-read data. Returns -1 on
1255 failure; on success, returns the number of bytes read from the file.
1256
1257 On success, self->input_len will be 0; this is intentional so that when
1258 unpickling from a file, the "we've run out of data" code paths will trigger,
1259 causing the Unpickler to go back to the file for more data. Use the returned
1260 size to tell you how much data you can process. */
1261 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1262 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1263 {
1264 PyObject *data;
1265 Py_ssize_t read_size;
1266
1267 assert(self->read != NULL);
1268
1269 if (_Unpickler_SkipConsumed(self) < 0)
1270 return -1;
1271
1272 if (n == READ_WHOLE_LINE) {
1273 data = PyObject_CallNoArgs(self->readline);
1274 }
1275 else {
1276 PyObject *len;
1277 /* Prefetch some data without advancing the file pointer, if possible */
1278 if (self->peek && n < PREFETCH) {
1279 len = PyLong_FromSsize_t(PREFETCH);
1280 if (len == NULL)
1281 return -1;
1282 data = _Pickle_FastCall(self->peek, len);
1283 if (data == NULL) {
1284 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1285 return -1;
1286 /* peek() is probably not supported by the given file object */
1287 PyErr_Clear();
1288 Py_CLEAR(self->peek);
1289 }
1290 else {
1291 read_size = _Unpickler_SetStringInput(self, data);
1292 Py_DECREF(data);
1293 if (read_size < 0) {
1294 return -1;
1295 }
1296
1297 self->prefetched_idx = 0;
1298 if (n <= read_size)
1299 return n;
1300 }
1301 }
1302 len = PyLong_FromSsize_t(n);
1303 if (len == NULL)
1304 return -1;
1305 data = _Pickle_FastCall(self->read, len);
1306 }
1307 if (data == NULL)
1308 return -1;
1309
1310 read_size = _Unpickler_SetStringInput(self, data);
1311 Py_DECREF(data);
1312 return read_size;
1313 }
1314
1315 /* Don't call it directly: use _Unpickler_Read() */
1316 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,PickleState * st,char ** s,Py_ssize_t n)1317 _Unpickler_ReadImpl(UnpicklerObject *self, PickleState *st, char **s, Py_ssize_t n)
1318 {
1319 Py_ssize_t num_read;
1320
1321 *s = NULL;
1322 if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1323 PyErr_SetString(st->UnpicklingError,
1324 "read would overflow (invalid bytecode)");
1325 return -1;
1326 }
1327
1328 /* This case is handled by the _Unpickler_Read() macro for efficiency */
1329 assert(self->next_read_idx + n > self->input_len);
1330
1331 if (!self->read)
1332 return bad_readline(st);
1333
1334 /* Extend the buffer to satisfy desired size */
1335 num_read = _Unpickler_ReadFromFile(self, n);
1336 if (num_read < 0)
1337 return -1;
1338 if (num_read < n)
1339 return bad_readline(st);
1340 *s = self->input_buffer;
1341 self->next_read_idx = n;
1342 return n;
1343 }
1344
1345 /* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1346 *
1347 * This should only be used for non-small data reads where potentially
1348 * avoiding a copy is beneficial. This method does not try to prefetch
1349 * more data into the input buffer.
1350 *
1351 * _Unpickler_Read() is recommended in most cases.
1352 */
1353 static Py_ssize_t
_Unpickler_ReadInto(PickleState * state,UnpicklerObject * self,char * buf,Py_ssize_t n)1354 _Unpickler_ReadInto(PickleState *state, UnpicklerObject *self, char *buf,
1355 Py_ssize_t n)
1356 {
1357 assert(n != READ_WHOLE_LINE);
1358
1359 /* Read from available buffer data, if any */
1360 Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1361 if (in_buffer > 0) {
1362 Py_ssize_t to_read = Py_MIN(in_buffer, n);
1363 memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1364 self->next_read_idx += to_read;
1365 buf += to_read;
1366 n -= to_read;
1367 if (n == 0) {
1368 /* Entire read was satisfied from buffer */
1369 return n;
1370 }
1371 }
1372
1373 /* Read from file */
1374 if (!self->read) {
1375 /* We're unpickling memory, this means the input is truncated */
1376 return bad_readline(state);
1377 }
1378 if (_Unpickler_SkipConsumed(self) < 0) {
1379 return -1;
1380 }
1381
1382 if (!self->readinto) {
1383 /* readinto() not supported on file-like object, fall back to read()
1384 * and copy into destination buffer (bpo-39681) */
1385 PyObject* len = PyLong_FromSsize_t(n);
1386 if (len == NULL) {
1387 return -1;
1388 }
1389 PyObject* data = _Pickle_FastCall(self->read, len);
1390 if (data == NULL) {
1391 return -1;
1392 }
1393 if (!PyBytes_Check(data)) {
1394 PyErr_Format(PyExc_ValueError,
1395 "read() returned non-bytes object (%R)",
1396 Py_TYPE(data));
1397 Py_DECREF(data);
1398 return -1;
1399 }
1400 Py_ssize_t read_size = PyBytes_GET_SIZE(data);
1401 if (read_size < n) {
1402 Py_DECREF(data);
1403 return bad_readline(state);
1404 }
1405 memcpy(buf, PyBytes_AS_STRING(data), n);
1406 Py_DECREF(data);
1407 return n;
1408 }
1409
1410 /* Call readinto() into user buffer */
1411 PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1412 if (buf_obj == NULL) {
1413 return -1;
1414 }
1415 PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1416 if (read_size_obj == NULL) {
1417 return -1;
1418 }
1419 Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1420 Py_DECREF(read_size_obj);
1421
1422 if (read_size < 0) {
1423 if (!PyErr_Occurred()) {
1424 PyErr_SetString(PyExc_ValueError,
1425 "readinto() returned negative size");
1426 }
1427 return -1;
1428 }
1429 if (read_size < n) {
1430 return bad_readline(state);
1431 }
1432 return n;
1433 }
1434
1435 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1436
1437 This should be used for all data reads, rather than accessing the unpickler's
1438 input buffer directly. This method deals correctly with reading from input
1439 streams, which the input buffer doesn't deal with.
1440
1441 Note that when reading from a file-like object, self->next_read_idx won't
1442 be updated (it should remain at 0 for the entire unpickling process). You
1443 should use this function's return value to know how many bytes you can
1444 consume.
1445
1446 Returns -1 (with an exception set) on failure. On success, return the
1447 number of chars read. */
1448 #define _Unpickler_Read(self, state, s, n) \
1449 (((n) <= (self)->input_len - (self)->next_read_idx) \
1450 ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1451 (self)->next_read_idx += (n), \
1452 (n)) \
1453 : _Unpickler_ReadImpl(self, state, (s), (n)))
1454
1455 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1456 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1457 char **result)
1458 {
1459 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1460 if (input_line == NULL) {
1461 PyErr_NoMemory();
1462 return -1;
1463 }
1464
1465 memcpy(input_line, line, len);
1466 input_line[len] = '\0';
1467 self->input_line = input_line;
1468 *result = self->input_line;
1469 return len;
1470 }
1471
1472 /* Read a line from the input stream/buffer. If we run off the end of the input
1473 before hitting \n, raise an error.
1474
1475 Returns the number of chars read, or -1 on failure. */
1476 static Py_ssize_t
_Unpickler_Readline(PickleState * state,UnpicklerObject * self,char ** result)1477 _Unpickler_Readline(PickleState *state, UnpicklerObject *self, char **result)
1478 {
1479 Py_ssize_t i, num_read;
1480
1481 for (i = self->next_read_idx; i < self->input_len; i++) {
1482 if (self->input_buffer[i] == '\n') {
1483 char *line_start = self->input_buffer + self->next_read_idx;
1484 num_read = i - self->next_read_idx + 1;
1485 self->next_read_idx = i + 1;
1486 return _Unpickler_CopyLine(self, line_start, num_read, result);
1487 }
1488 }
1489 if (!self->read)
1490 return bad_readline(state);
1491
1492 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1493 if (num_read < 0)
1494 return -1;
1495 if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1496 return bad_readline(state);
1497 self->next_read_idx = num_read;
1498 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1499 }
1500
1501 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1502 will be modified in place. */
1503 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1504 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1505 {
1506 size_t i;
1507
1508 assert(new_size > self->memo_size);
1509
1510 PyObject **memo_new = self->memo;
1511 PyMem_RESIZE(memo_new, PyObject *, new_size);
1512 if (memo_new == NULL) {
1513 PyErr_NoMemory();
1514 return -1;
1515 }
1516 self->memo = memo_new;
1517 for (i = self->memo_size; i < new_size; i++)
1518 self->memo[i] = NULL;
1519 self->memo_size = new_size;
1520 return 0;
1521 }
1522
1523 /* Returns NULL if idx is out of bounds. */
1524 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1525 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1526 {
1527 if (idx >= self->memo_size)
1528 return NULL;
1529
1530 return self->memo[idx];
1531 }
1532
1533 /* Returns -1 (with an exception set) on failure, 0 on success.
1534 This takes its own reference to `value`. */
1535 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1536 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1537 {
1538 PyObject *old_item;
1539
1540 if (idx >= self->memo_size) {
1541 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1542 return -1;
1543 assert(idx < self->memo_size);
1544 }
1545 old_item = self->memo[idx];
1546 self->memo[idx] = Py_NewRef(value);
1547 if (old_item != NULL) {
1548 Py_DECREF(old_item);
1549 }
1550 else {
1551 self->memo_len++;
1552 }
1553 return 0;
1554 }
1555
1556 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1557 _Unpickler_NewMemo(Py_ssize_t new_size)
1558 {
1559 PyObject **memo = PyMem_NEW(PyObject *, new_size);
1560 if (memo == NULL) {
1561 PyErr_NoMemory();
1562 return NULL;
1563 }
1564 memset(memo, 0, new_size * sizeof(PyObject *));
1565 return memo;
1566 }
1567
1568 /* Free the unpickler's memo, taking care to decref any items left in it. */
1569 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1570 _Unpickler_MemoCleanup(UnpicklerObject *self)
1571 {
1572 Py_ssize_t i;
1573 PyObject **memo = self->memo;
1574
1575 if (self->memo == NULL)
1576 return;
1577 self->memo = NULL;
1578 i = self->memo_size;
1579 while (--i >= 0) {
1580 Py_XDECREF(memo[i]);
1581 }
1582 PyMem_Free(memo);
1583 }
1584
1585 static UnpicklerObject *
_Unpickler_New(PyObject * module)1586 _Unpickler_New(PyObject *module)
1587 {
1588 const int MEMO_SIZE = 32;
1589 PyObject **memo = _Unpickler_NewMemo(MEMO_SIZE);
1590 if (memo == NULL) {
1591 return NULL;
1592 }
1593
1594 PickleState *st = _Pickle_GetState(module);
1595 PyObject *stack = Pdata_New(st);
1596 if (stack == NULL) {
1597 goto error;
1598 }
1599
1600 UnpicklerObject *self = PyObject_GC_New(UnpicklerObject,
1601 st->Unpickler_Type);
1602 if (self == NULL) {
1603 goto error;
1604 }
1605
1606 self->stack = (Pdata *)stack;
1607 self->memo = memo;
1608 self->memo_size = MEMO_SIZE;
1609 self->memo_len = 0;
1610 self->persistent_load = NULL;
1611 self->persistent_load_attr = NULL;
1612 memset(&self->buffer, 0, sizeof(Py_buffer));
1613 self->input_buffer = NULL;
1614 self->input_line = NULL;
1615 self->input_len = 0;
1616 self->next_read_idx = 0;
1617 self->prefetched_idx = 0;
1618 self->read = NULL;
1619 self->readinto = NULL;
1620 self->readline = NULL;
1621 self->peek = NULL;
1622 self->buffers = NULL;
1623 self->encoding = NULL;
1624 self->errors = NULL;
1625 self->marks = NULL;
1626 self->num_marks = 0;
1627 self->marks_size = 0;
1628 self->proto = 0;
1629 self->fix_imports = 0;
1630
1631 PyObject_GC_Track(self);
1632 return self;
1633
1634 error:
1635 PyMem_Free(memo);
1636 Py_XDECREF(stack);
1637 return NULL;
1638 }
1639
1640 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1641 be called once on a freshly created Unpickler. */
1642 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1643 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1644 {
1645 /* Optional file methods */
1646 if (PyObject_GetOptionalAttr(file, &_Py_ID(peek), &self->peek) < 0) {
1647 goto error;
1648 }
1649 if (PyObject_GetOptionalAttr(file, &_Py_ID(readinto), &self->readinto) < 0) {
1650 goto error;
1651 }
1652 if (PyObject_GetOptionalAttr(file, &_Py_ID(read), &self->read) < 0) {
1653 goto error;
1654 }
1655 if (PyObject_GetOptionalAttr(file, &_Py_ID(readline), &self->readline) < 0) {
1656 goto error;
1657 }
1658 if (!self->readline || !self->read) {
1659 PyErr_SetString(PyExc_TypeError,
1660 "file must have 'read' and 'readline' attributes");
1661 goto error;
1662 }
1663 return 0;
1664
1665 error:
1666 Py_CLEAR(self->read);
1667 Py_CLEAR(self->readinto);
1668 Py_CLEAR(self->readline);
1669 Py_CLEAR(self->peek);
1670 return -1;
1671 }
1672
1673 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1674 be called once on a freshly created Unpickler. */
1675 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1676 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1677 const char *encoding,
1678 const char *errors)
1679 {
1680 if (encoding == NULL)
1681 encoding = "ASCII";
1682 if (errors == NULL)
1683 errors = "strict";
1684
1685 self->encoding = _PyMem_Strdup(encoding);
1686 self->errors = _PyMem_Strdup(errors);
1687 if (self->encoding == NULL || self->errors == NULL) {
1688 PyErr_NoMemory();
1689 return -1;
1690 }
1691 return 0;
1692 }
1693
1694 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1695 be called once on a freshly created Unpickler. */
1696 static int
_Unpickler_SetBuffers(UnpicklerObject * self,PyObject * buffers)1697 _Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1698 {
1699 if (buffers == NULL || buffers == Py_None) {
1700 self->buffers = NULL;
1701 }
1702 else {
1703 self->buffers = PyObject_GetIter(buffers);
1704 if (self->buffers == NULL) {
1705 return -1;
1706 }
1707 }
1708 return 0;
1709 }
1710
1711 /* Generate a GET opcode for an object stored in the memo. */
1712 static int
memo_get(PickleState * st,PicklerObject * self,PyObject * key)1713 memo_get(PickleState *st, PicklerObject *self, PyObject *key)
1714 {
1715 Py_ssize_t *value;
1716 char pdata[30];
1717 Py_ssize_t len;
1718
1719 value = PyMemoTable_Get(self->memo, key);
1720 if (value == NULL) {
1721 PyErr_SetObject(PyExc_KeyError, key);
1722 return -1;
1723 }
1724
1725 if (!self->bin) {
1726 pdata[0] = GET;
1727 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1728 "%zd\n", *value);
1729 len = strlen(pdata);
1730 }
1731 else {
1732 if (*value < 256) {
1733 pdata[0] = BINGET;
1734 pdata[1] = (unsigned char)(*value & 0xff);
1735 len = 2;
1736 }
1737 else if ((size_t)*value <= 0xffffffffUL) {
1738 pdata[0] = LONG_BINGET;
1739 pdata[1] = (unsigned char)(*value & 0xff);
1740 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1741 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1742 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1743 len = 5;
1744 }
1745 else { /* unlikely */
1746 PyErr_SetString(st->PicklingError,
1747 "memo id too large for LONG_BINGET");
1748 return -1;
1749 }
1750 }
1751
1752 if (_Pickler_Write(self, pdata, len) < 0)
1753 return -1;
1754
1755 return 0;
1756 }
1757
1758 /* Store an object in the memo, assign it a new unique ID based on the number
1759 of objects currently stored in the memo and generate a PUT opcode. */
1760 static int
memo_put(PickleState * st,PicklerObject * self,PyObject * obj)1761 memo_put(PickleState *st, PicklerObject *self, PyObject *obj)
1762 {
1763 char pdata[30];
1764 Py_ssize_t len;
1765 Py_ssize_t idx;
1766
1767 const char memoize_op = MEMOIZE;
1768
1769 if (self->fast)
1770 return 0;
1771
1772 idx = PyMemoTable_Size(self->memo);
1773 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1774 return -1;
1775
1776 if (self->proto >= 4) {
1777 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1778 return -1;
1779 return 0;
1780 }
1781 else if (!self->bin) {
1782 pdata[0] = PUT;
1783 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1784 "%zd\n", idx);
1785 len = strlen(pdata);
1786 }
1787 else {
1788 if (idx < 256) {
1789 pdata[0] = BINPUT;
1790 pdata[1] = (unsigned char)idx;
1791 len = 2;
1792 }
1793 else if ((size_t)idx <= 0xffffffffUL) {
1794 pdata[0] = LONG_BINPUT;
1795 pdata[1] = (unsigned char)(idx & 0xff);
1796 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1797 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1798 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1799 len = 5;
1800 }
1801 else { /* unlikely */
1802 PyErr_SetString(st->PicklingError,
1803 "memo id too large for LONG_BINPUT");
1804 return -1;
1805 }
1806 }
1807 if (_Pickler_Write(self, pdata, len) < 0)
1808 return -1;
1809
1810 return 0;
1811 }
1812
1813 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1814 get_dotted_path(PyObject *obj, PyObject *name)
1815 {
1816 PyObject *dotted_path;
1817 Py_ssize_t i, n;
1818 dotted_path = PyUnicode_Split(name, _Py_LATIN1_CHR('.'), -1);
1819 if (dotted_path == NULL)
1820 return NULL;
1821 n = PyList_GET_SIZE(dotted_path);
1822 assert(n >= 1);
1823 for (i = 0; i < n; i++) {
1824 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1825 if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1826 if (obj == NULL)
1827 PyErr_Format(PyExc_AttributeError,
1828 "Can't get local object %R", name);
1829 else
1830 PyErr_Format(PyExc_AttributeError,
1831 "Can't get local attribute %R on %R", name, obj);
1832 Py_DECREF(dotted_path);
1833 return NULL;
1834 }
1835 }
1836 return dotted_path;
1837 }
1838
1839 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1840 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1841 {
1842 Py_ssize_t i, n;
1843 PyObject *parent = NULL;
1844
1845 assert(PyList_CheckExact(names));
1846 Py_INCREF(obj);
1847 n = PyList_GET_SIZE(names);
1848 for (i = 0; i < n; i++) {
1849 PyObject *name = PyList_GET_ITEM(names, i);
1850 Py_XSETREF(parent, obj);
1851 (void)PyObject_GetOptionalAttr(parent, name, &obj);
1852 if (obj == NULL) {
1853 Py_DECREF(parent);
1854 return NULL;
1855 }
1856 }
1857 if (pparent != NULL)
1858 *pparent = parent;
1859 else
1860 Py_XDECREF(parent);
1861 return obj;
1862 }
1863
1864
1865 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1866 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1867 {
1868 PyObject *dotted_path, *attr;
1869
1870 if (allow_qualname) {
1871 dotted_path = get_dotted_path(obj, name);
1872 if (dotted_path == NULL)
1873 return NULL;
1874 attr = get_deep_attribute(obj, dotted_path, NULL);
1875 Py_DECREF(dotted_path);
1876 }
1877 else {
1878 (void)PyObject_GetOptionalAttr(obj, name, &attr);
1879 }
1880 if (attr == NULL && !PyErr_Occurred()) {
1881 PyErr_Format(PyExc_AttributeError,
1882 "Can't get attribute %R on %R", name, obj);
1883 }
1884 return attr;
1885 }
1886
1887 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1888 _checkmodule(PyObject *module_name, PyObject *module,
1889 PyObject *global, PyObject *dotted_path)
1890 {
1891 if (module == Py_None) {
1892 return -1;
1893 }
1894 if (PyUnicode_Check(module_name) &&
1895 _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1896 return -1;
1897 }
1898
1899 PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1900 if (candidate == NULL) {
1901 return -1;
1902 }
1903 if (candidate != global) {
1904 Py_DECREF(candidate);
1905 return -1;
1906 }
1907 Py_DECREF(candidate);
1908 return 0;
1909 }
1910
1911 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1912 whichmodule(PyObject *global, PyObject *dotted_path)
1913 {
1914 PyObject *module_name;
1915 PyObject *module = NULL;
1916 Py_ssize_t i;
1917 PyObject *modules;
1918
1919 if (PyObject_GetOptionalAttr(global, &_Py_ID(__module__), &module_name) < 0) {
1920 return NULL;
1921 }
1922 if (module_name) {
1923 /* In some rare cases (e.g., bound methods of extension types),
1924 __module__ can be None. If it is so, then search sys.modules for
1925 the module of global. */
1926 if (module_name != Py_None)
1927 return module_name;
1928 Py_CLEAR(module_name);
1929 }
1930 assert(module_name == NULL);
1931
1932 /* Fallback on walking sys.modules */
1933 PyThreadState *tstate = _PyThreadState_GET();
1934 modules = _PySys_GetAttr(tstate, &_Py_ID(modules));
1935 if (modules == NULL) {
1936 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1937 return NULL;
1938 }
1939 if (PyDict_CheckExact(modules)) {
1940 i = 0;
1941 while (PyDict_Next(modules, &i, &module_name, &module)) {
1942 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1943 return Py_NewRef(module_name);
1944 }
1945 if (PyErr_Occurred()) {
1946 return NULL;
1947 }
1948 }
1949 }
1950 else {
1951 PyObject *iterator = PyObject_GetIter(modules);
1952 if (iterator == NULL) {
1953 return NULL;
1954 }
1955 while ((module_name = PyIter_Next(iterator))) {
1956 module = PyObject_GetItem(modules, module_name);
1957 if (module == NULL) {
1958 Py_DECREF(module_name);
1959 Py_DECREF(iterator);
1960 return NULL;
1961 }
1962 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1963 Py_DECREF(module);
1964 Py_DECREF(iterator);
1965 return module_name;
1966 }
1967 Py_DECREF(module);
1968 Py_DECREF(module_name);
1969 if (PyErr_Occurred()) {
1970 Py_DECREF(iterator);
1971 return NULL;
1972 }
1973 }
1974 Py_DECREF(iterator);
1975 }
1976
1977 /* If no module is found, use __main__. */
1978 return &_Py_ID(__main__);
1979 }
1980
1981 /* fast_save_enter() and fast_save_leave() are guards against recursive
1982 objects when Pickler is used with the "fast mode" (i.e., with object
1983 memoization disabled). If the nesting of a list or dict object exceed
1984 FAST_NESTING_LIMIT, these guards will start keeping an internal
1985 reference to the seen list or dict objects and check whether these objects
1986 are recursive. These are not strictly necessary, since save() has a
1987 hard-coded recursion limit, but they give a nicer error message than the
1988 typical RuntimeError. */
1989 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1990 fast_save_enter(PicklerObject *self, PyObject *obj)
1991 {
1992 /* if fast_nesting < 0, we're doing an error exit. */
1993 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1994 PyObject *key = NULL;
1995 if (self->fast_memo == NULL) {
1996 self->fast_memo = PyDict_New();
1997 if (self->fast_memo == NULL) {
1998 self->fast_nesting = -1;
1999 return 0;
2000 }
2001 }
2002 key = PyLong_FromVoidPtr(obj);
2003 if (key == NULL) {
2004 self->fast_nesting = -1;
2005 return 0;
2006 }
2007 int r = PyDict_Contains(self->fast_memo, key);
2008 if (r > 0) {
2009 PyErr_Format(PyExc_ValueError,
2010 "fast mode: can't pickle cyclic objects "
2011 "including object type %.200s at %p",
2012 Py_TYPE(obj)->tp_name, obj);
2013 }
2014 else if (r == 0) {
2015 r = PyDict_SetItem(self->fast_memo, key, Py_None);
2016 }
2017 Py_DECREF(key);
2018 if (r != 0) {
2019 self->fast_nesting = -1;
2020 return 0;
2021 }
2022 }
2023 return 1;
2024 }
2025
2026 static int
fast_save_leave(PicklerObject * self,PyObject * obj)2027 fast_save_leave(PicklerObject *self, PyObject *obj)
2028 {
2029 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2030 PyObject *key = PyLong_FromVoidPtr(obj);
2031 if (key == NULL)
2032 return 0;
2033 if (PyDict_DelItem(self->fast_memo, key) < 0) {
2034 Py_DECREF(key);
2035 return 0;
2036 }
2037 Py_DECREF(key);
2038 }
2039 return 1;
2040 }
2041
2042 static int
save_none(PicklerObject * self,PyObject * obj)2043 save_none(PicklerObject *self, PyObject *obj)
2044 {
2045 const char none_op = NONE;
2046 if (_Pickler_Write(self, &none_op, 1) < 0)
2047 return -1;
2048
2049 return 0;
2050 }
2051
2052 static int
save_bool(PicklerObject * self,PyObject * obj)2053 save_bool(PicklerObject *self, PyObject *obj)
2054 {
2055 if (self->proto >= 2) {
2056 const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
2057 if (_Pickler_Write(self, &bool_op, 1) < 0)
2058 return -1;
2059 }
2060 else {
2061 /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2062 * so that unpicklers written before bools were introduced unpickle them
2063 * as ints, but unpicklers after can recognize that bools were intended.
2064 * Note that protocol 2 added direct ways to pickle bools.
2065 */
2066 const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2067 if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2068 return -1;
2069 }
2070 return 0;
2071 }
2072
2073 static int
save_long(PicklerObject * self,PyObject * obj)2074 save_long(PicklerObject *self, PyObject *obj)
2075 {
2076 PyObject *repr = NULL;
2077 Py_ssize_t size;
2078 long val;
2079 int overflow;
2080 int status = 0;
2081
2082 val= PyLong_AsLongAndOverflow(obj, &overflow);
2083 if (!overflow && (sizeof(long) <= 4 ||
2084 (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2085 {
2086 /* result fits in a signed 4-byte integer.
2087
2088 Note: we can't use -0x80000000L in the above condition because some
2089 compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2090 before applying the unary minus when sizeof(long) <= 4. The
2091 resulting value stays unsigned which is commonly not what we want,
2092 so MSVC happily warns us about it. However, that result would have
2093 been fine because we guard for sizeof(long) <= 4 which turns the
2094 condition true in that particular case. */
2095 char pdata[32];
2096 Py_ssize_t len = 0;
2097
2098 if (self->bin) {
2099 pdata[1] = (unsigned char)(val & 0xff);
2100 pdata[2] = (unsigned char)((val >> 8) & 0xff);
2101 pdata[3] = (unsigned char)((val >> 16) & 0xff);
2102 pdata[4] = (unsigned char)((val >> 24) & 0xff);
2103
2104 if ((pdata[4] != 0) || (pdata[3] != 0)) {
2105 pdata[0] = BININT;
2106 len = 5;
2107 }
2108 else if (pdata[2] != 0) {
2109 pdata[0] = BININT2;
2110 len = 3;
2111 }
2112 else {
2113 pdata[0] = BININT1;
2114 len = 2;
2115 }
2116 }
2117 else {
2118 sprintf(pdata, "%c%ld\n", INT, val);
2119 len = strlen(pdata);
2120 }
2121 if (_Pickler_Write(self, pdata, len) < 0)
2122 return -1;
2123
2124 return 0;
2125 }
2126 assert(!PyErr_Occurred());
2127
2128 if (self->proto >= 2) {
2129 /* Linear-time pickling. */
2130 size_t nbits;
2131 size_t nbytes;
2132 unsigned char *pdata;
2133 char header[5];
2134 int i;
2135 int sign = _PyLong_Sign(obj);
2136
2137 if (sign == 0) {
2138 header[0] = LONG1;
2139 header[1] = 0; /* It's 0 -- an empty bytestring. */
2140 if (_Pickler_Write(self, header, 2) < 0)
2141 goto error;
2142 return 0;
2143 }
2144 nbits = _PyLong_NumBits(obj);
2145 if (nbits == (size_t)-1 && PyErr_Occurred())
2146 goto error;
2147 /* How many bytes do we need? There are nbits >> 3 full
2148 * bytes of data, and nbits & 7 leftover bits. If there
2149 * are any leftover bits, then we clearly need another
2150 * byte. What's not so obvious is that we *probably*
2151 * need another byte even if there aren't any leftovers:
2152 * the most-significant bit of the most-significant byte
2153 * acts like a sign bit, and it's usually got a sense
2154 * opposite of the one we need. The exception is ints
2155 * of the form -(2**(8*j-1)) for j > 0. Such an int is
2156 * its own 256's-complement, so has the right sign bit
2157 * even without the extra byte. That's a pain to check
2158 * for in advance, though, so we always grab an extra
2159 * byte at the start, and cut it back later if possible.
2160 */
2161 nbytes = (nbits >> 3) + 1;
2162 if (nbytes > 0x7fffffffL) {
2163 PyErr_SetString(PyExc_OverflowError,
2164 "int too large to pickle");
2165 goto error;
2166 }
2167 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2168 if (repr == NULL)
2169 goto error;
2170 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2171 i = _PyLong_AsByteArray((PyLongObject *)obj,
2172 pdata, nbytes,
2173 1 /* little endian */ , 1 /* signed */ ,
2174 1 /* with exceptions */);
2175 if (i < 0)
2176 goto error;
2177 /* If the int is negative, this may be a byte more than
2178 * needed. This is so iff the MSB is all redundant sign
2179 * bits.
2180 */
2181 if (sign < 0 &&
2182 nbytes > 1 &&
2183 pdata[nbytes - 1] == 0xff &&
2184 (pdata[nbytes - 2] & 0x80) != 0) {
2185 nbytes--;
2186 }
2187
2188 if (nbytes < 256) {
2189 header[0] = LONG1;
2190 header[1] = (unsigned char)nbytes;
2191 size = 2;
2192 }
2193 else {
2194 header[0] = LONG4;
2195 size = (Py_ssize_t) nbytes;
2196 for (i = 1; i < 5; i++) {
2197 header[i] = (unsigned char)(size & 0xff);
2198 size >>= 8;
2199 }
2200 size = 5;
2201 }
2202 if (_Pickler_Write(self, header, size) < 0 ||
2203 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2204 goto error;
2205 }
2206 else {
2207 const char long_op = LONG;
2208 const char *string;
2209
2210 /* proto < 2: write the repr and newline. This is quadratic-time (in
2211 the number of digits), in both directions. We add a trailing 'L'
2212 to the repr, for compatibility with Python 2.x. */
2213
2214 repr = PyObject_Repr(obj);
2215 if (repr == NULL)
2216 goto error;
2217
2218 string = PyUnicode_AsUTF8AndSize(repr, &size);
2219 if (string == NULL)
2220 goto error;
2221
2222 if (_Pickler_Write(self, &long_op, 1) < 0 ||
2223 _Pickler_Write(self, string, size) < 0 ||
2224 _Pickler_Write(self, "L\n", 2) < 0)
2225 goto error;
2226 }
2227
2228 if (0) {
2229 error:
2230 status = -1;
2231 }
2232 Py_XDECREF(repr);
2233
2234 return status;
2235 }
2236
2237 static int
save_float(PicklerObject * self,PyObject * obj)2238 save_float(PicklerObject *self, PyObject *obj)
2239 {
2240 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2241
2242 if (self->bin) {
2243 char pdata[9];
2244 pdata[0] = BINFLOAT;
2245 if (PyFloat_Pack8(x, &pdata[1], 0) < 0)
2246 return -1;
2247 if (_Pickler_Write(self, pdata, 9) < 0)
2248 return -1;
2249 }
2250 else {
2251 int result = -1;
2252 char *buf = NULL;
2253 char op = FLOAT;
2254
2255 if (_Pickler_Write(self, &op, 1) < 0)
2256 goto done;
2257
2258 buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2259 if (!buf) {
2260 PyErr_NoMemory();
2261 goto done;
2262 }
2263
2264 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2265 goto done;
2266
2267 if (_Pickler_Write(self, "\n", 1) < 0)
2268 goto done;
2269
2270 result = 0;
2271 done:
2272 PyMem_Free(buf);
2273 return result;
2274 }
2275
2276 return 0;
2277 }
2278
2279 /* Perform direct write of the header and payload of the binary object.
2280
2281 The large contiguous data is written directly into the underlying file
2282 object, bypassing the output_buffer of the Pickler. We intentionally
2283 do not insert a protocol 4 frame opcode to make it possible to optimize
2284 file.read calls in the loader.
2285 */
2286 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2287 _Pickler_write_bytes(PicklerObject *self,
2288 const char *header, Py_ssize_t header_size,
2289 const char *data, Py_ssize_t data_size,
2290 PyObject *payload)
2291 {
2292 int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2293 int framing = self->framing;
2294
2295 if (bypass_buffer) {
2296 assert(self->output_buffer != NULL);
2297 /* Commit the previous frame. */
2298 if (_Pickler_CommitFrame(self)) {
2299 return -1;
2300 }
2301 /* Disable framing temporarily */
2302 self->framing = 0;
2303 }
2304
2305 if (_Pickler_Write(self, header, header_size) < 0) {
2306 return -1;
2307 }
2308
2309 if (bypass_buffer && self->write != NULL) {
2310 /* Bypass the in-memory buffer to directly stream large data
2311 into the underlying file object. */
2312 PyObject *result, *mem = NULL;
2313 /* Dump the output buffer to the file. */
2314 if (_Pickler_FlushToFile(self) < 0) {
2315 return -1;
2316 }
2317
2318 /* Stream write the payload into the file without going through the
2319 output buffer. */
2320 if (payload == NULL) {
2321 /* TODO: It would be better to use a memoryview with a linked
2322 original string if this is possible. */
2323 payload = mem = PyBytes_FromStringAndSize(data, data_size);
2324 if (payload == NULL) {
2325 return -1;
2326 }
2327 }
2328 result = PyObject_CallOneArg(self->write, payload);
2329 Py_XDECREF(mem);
2330 if (result == NULL) {
2331 return -1;
2332 }
2333 Py_DECREF(result);
2334
2335 /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2336 if (_Pickler_ClearBuffer(self) < 0) {
2337 return -1;
2338 }
2339 }
2340 else {
2341 if (_Pickler_Write(self, data, data_size) < 0) {
2342 return -1;
2343 }
2344 }
2345
2346 /* Re-enable framing for subsequent calls to _Pickler_Write. */
2347 self->framing = framing;
2348
2349 return 0;
2350 }
2351
2352 static int
_save_bytes_data(PickleState * st,PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2353 _save_bytes_data(PickleState *st, PicklerObject *self, PyObject *obj,
2354 const char *data, Py_ssize_t size)
2355 {
2356 assert(self->proto >= 3);
2357
2358 char header[9];
2359 Py_ssize_t len;
2360
2361 if (size < 0)
2362 return -1;
2363
2364 if (size <= 0xff) {
2365 header[0] = SHORT_BINBYTES;
2366 header[1] = (unsigned char)size;
2367 len = 2;
2368 }
2369 else if ((size_t)size <= 0xffffffffUL) {
2370 header[0] = BINBYTES;
2371 header[1] = (unsigned char)(size & 0xff);
2372 header[2] = (unsigned char)((size >> 8) & 0xff);
2373 header[3] = (unsigned char)((size >> 16) & 0xff);
2374 header[4] = (unsigned char)((size >> 24) & 0xff);
2375 len = 5;
2376 }
2377 else if (self->proto >= 4) {
2378 header[0] = BINBYTES8;
2379 _write_size64(header + 1, size);
2380 len = 9;
2381 }
2382 else {
2383 PyErr_SetString(PyExc_OverflowError,
2384 "serializing a bytes object larger than 4 GiB "
2385 "requires pickle protocol 4 or higher");
2386 return -1;
2387 }
2388
2389 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2390 return -1;
2391 }
2392
2393 if (memo_put(st, self, obj) < 0) {
2394 return -1;
2395 }
2396
2397 return 0;
2398 }
2399
2400 static int
save_bytes(PickleState * st,PicklerObject * self,PyObject * obj)2401 save_bytes(PickleState *st, PicklerObject *self, PyObject *obj)
2402 {
2403 if (self->proto < 3) {
2404 /* Older pickle protocols do not have an opcode for pickling bytes
2405 objects. Therefore, we need to fake the copy protocol (i.e.,
2406 the __reduce__ method) to permit bytes object unpickling.
2407
2408 Here we use a hack to be compatible with Python 2. Since in Python
2409 2 'bytes' is just an alias for 'str' (which has different
2410 parameters than the actual bytes object), we use codecs.encode
2411 to create the appropriate 'str' object when unpickled using
2412 Python 2 *and* the appropriate 'bytes' object when unpickled
2413 using Python 3. Again this is a hack and we don't need to do this
2414 with newer protocols. */
2415 PyObject *reduce_value;
2416 int status;
2417
2418 if (PyBytes_GET_SIZE(obj) == 0) {
2419 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2420 }
2421 else {
2422 PyObject *unicode_str =
2423 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2424 PyBytes_GET_SIZE(obj),
2425 "strict");
2426
2427 if (unicode_str == NULL)
2428 return -1;
2429 reduce_value = Py_BuildValue("(O(OO))",
2430 st->codecs_encode, unicode_str,
2431 &_Py_ID(latin1));
2432 Py_DECREF(unicode_str);
2433 }
2434
2435 if (reduce_value == NULL)
2436 return -1;
2437
2438 /* save_reduce() will memoize the object automatically. */
2439 status = save_reduce(st, self, reduce_value, obj);
2440 Py_DECREF(reduce_value);
2441 return status;
2442 }
2443 else {
2444 return _save_bytes_data(st, self, obj, PyBytes_AS_STRING(obj),
2445 PyBytes_GET_SIZE(obj));
2446 }
2447 }
2448
2449 static int
_save_bytearray_data(PickleState * state,PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2450 _save_bytearray_data(PickleState *state, PicklerObject *self, PyObject *obj,
2451 const char *data, Py_ssize_t size)
2452 {
2453 assert(self->proto >= 5);
2454
2455 char header[9];
2456 Py_ssize_t len;
2457
2458 if (size < 0)
2459 return -1;
2460
2461 header[0] = BYTEARRAY8;
2462 _write_size64(header + 1, size);
2463 len = 9;
2464
2465 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2466 return -1;
2467 }
2468
2469 if (memo_put(state, self, obj) < 0) {
2470 return -1;
2471 }
2472
2473 return 0;
2474 }
2475
2476 static int
save_bytearray(PickleState * state,PicklerObject * self,PyObject * obj)2477 save_bytearray(PickleState *state, PicklerObject *self, PyObject *obj)
2478 {
2479 if (self->proto < 5) {
2480 /* Older pickle protocols do not have an opcode for pickling
2481 * bytearrays. */
2482 PyObject *reduce_value = NULL;
2483 int status;
2484
2485 if (PyByteArray_GET_SIZE(obj) == 0) {
2486 reduce_value = Py_BuildValue("(O())",
2487 (PyObject *) &PyByteArray_Type);
2488 }
2489 else {
2490 PyObject *bytes_obj = PyBytes_FromObject(obj);
2491 if (bytes_obj != NULL) {
2492 reduce_value = Py_BuildValue("(O(O))",
2493 (PyObject *) &PyByteArray_Type,
2494 bytes_obj);
2495 Py_DECREF(bytes_obj);
2496 }
2497 }
2498 if (reduce_value == NULL)
2499 return -1;
2500
2501 /* save_reduce() will memoize the object automatically. */
2502 status = save_reduce(state, self, reduce_value, obj);
2503 Py_DECREF(reduce_value);
2504 return status;
2505 }
2506 else {
2507 return _save_bytearray_data(state, self, obj,
2508 PyByteArray_AS_STRING(obj),
2509 PyByteArray_GET_SIZE(obj));
2510 }
2511 }
2512
2513 static int
save_picklebuffer(PickleState * st,PicklerObject * self,PyObject * obj)2514 save_picklebuffer(PickleState *st, PicklerObject *self, PyObject *obj)
2515 {
2516 if (self->proto < 5) {
2517 PyErr_SetString(st->PicklingError,
2518 "PickleBuffer can only be pickled with protocol >= 5");
2519 return -1;
2520 }
2521 const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2522 if (view == NULL) {
2523 return -1;
2524 }
2525 if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2526 PyErr_SetString(st->PicklingError,
2527 "PickleBuffer can not be pickled when "
2528 "pointing to a non-contiguous buffer");
2529 return -1;
2530 }
2531 int in_band = 1;
2532 if (self->buffer_callback != NULL) {
2533 PyObject *ret = PyObject_CallOneArg(self->buffer_callback, obj);
2534 if (ret == NULL) {
2535 return -1;
2536 }
2537 in_band = PyObject_IsTrue(ret);
2538 Py_DECREF(ret);
2539 if (in_band == -1) {
2540 return -1;
2541 }
2542 }
2543 if (in_band) {
2544 /* Write data in-band */
2545 if (view->readonly) {
2546 return _save_bytes_data(st, self, obj, (const char *)view->buf,
2547 view->len);
2548 }
2549 else {
2550 return _save_bytearray_data(st, self, obj, (const char *)view->buf,
2551 view->len);
2552 }
2553 }
2554 else {
2555 /* Write data out-of-band */
2556 const char next_buffer_op = NEXT_BUFFER;
2557 if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2558 return -1;
2559 }
2560 if (view->readonly) {
2561 const char readonly_buffer_op = READONLY_BUFFER;
2562 if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2563 return -1;
2564 }
2565 }
2566 }
2567 return 0;
2568 }
2569
2570 /* A copy of PyUnicode_AsRawUnicodeEscapeString() that also translates
2571 backslash and newline characters to \uXXXX escapes. */
2572 static PyObject *
raw_unicode_escape(PyObject * obj)2573 raw_unicode_escape(PyObject *obj)
2574 {
2575 char *p;
2576 Py_ssize_t i, size;
2577 const void *data;
2578 int kind;
2579 _PyBytesWriter writer;
2580
2581 _PyBytesWriter_Init(&writer);
2582
2583 size = PyUnicode_GET_LENGTH(obj);
2584 data = PyUnicode_DATA(obj);
2585 kind = PyUnicode_KIND(obj);
2586
2587 p = _PyBytesWriter_Alloc(&writer, size);
2588 if (p == NULL)
2589 goto error;
2590 writer.overallocate = 1;
2591
2592 for (i=0; i < size; i++) {
2593 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2594 /* Map 32-bit characters to '\Uxxxxxxxx' */
2595 if (ch >= 0x10000) {
2596 /* -1: subtract 1 preallocated byte */
2597 p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2598 if (p == NULL)
2599 goto error;
2600
2601 *p++ = '\\';
2602 *p++ = 'U';
2603 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2604 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2605 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2606 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2607 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2608 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2609 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2610 *p++ = Py_hexdigits[ch & 15];
2611 }
2612 /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2613 else if (ch >= 256 ||
2614 ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2615 ch == 0x1a)
2616 {
2617 /* -1: subtract 1 preallocated byte */
2618 p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2619 if (p == NULL)
2620 goto error;
2621
2622 *p++ = '\\';
2623 *p++ = 'u';
2624 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2625 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2626 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2627 *p++ = Py_hexdigits[ch & 15];
2628 }
2629 /* Copy everything else as-is */
2630 else
2631 *p++ = (char) ch;
2632 }
2633
2634 return _PyBytesWriter_Finish(&writer, p);
2635
2636 error:
2637 _PyBytesWriter_Dealloc(&writer);
2638 return NULL;
2639 }
2640
2641 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2642 write_unicode_binary(PicklerObject *self, PyObject *obj)
2643 {
2644 char header[9];
2645 Py_ssize_t len;
2646 PyObject *encoded = NULL;
2647 Py_ssize_t size;
2648 const char *data;
2649
2650 data = PyUnicode_AsUTF8AndSize(obj, &size);
2651 if (data == NULL) {
2652 /* Issue #8383: for strings with lone surrogates, fallback on the
2653 "surrogatepass" error handler. */
2654 PyErr_Clear();
2655 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2656 if (encoded == NULL)
2657 return -1;
2658
2659 data = PyBytes_AS_STRING(encoded);
2660 size = PyBytes_GET_SIZE(encoded);
2661 }
2662
2663 assert(size >= 0);
2664 if (size <= 0xff && self->proto >= 4) {
2665 header[0] = SHORT_BINUNICODE;
2666 header[1] = (unsigned char)(size & 0xff);
2667 len = 2;
2668 }
2669 else if ((size_t)size <= 0xffffffffUL) {
2670 header[0] = BINUNICODE;
2671 header[1] = (unsigned char)(size & 0xff);
2672 header[2] = (unsigned char)((size >> 8) & 0xff);
2673 header[3] = (unsigned char)((size >> 16) & 0xff);
2674 header[4] = (unsigned char)((size >> 24) & 0xff);
2675 len = 5;
2676 }
2677 else if (self->proto >= 4) {
2678 header[0] = BINUNICODE8;
2679 _write_size64(header + 1, size);
2680 len = 9;
2681 }
2682 else {
2683 PyErr_SetString(PyExc_OverflowError,
2684 "serializing a string larger than 4 GiB "
2685 "requires pickle protocol 4 or higher");
2686 Py_XDECREF(encoded);
2687 return -1;
2688 }
2689
2690 if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2691 Py_XDECREF(encoded);
2692 return -1;
2693 }
2694 Py_XDECREF(encoded);
2695 return 0;
2696 }
2697
2698 static int
save_unicode(PickleState * state,PicklerObject * self,PyObject * obj)2699 save_unicode(PickleState *state, PicklerObject *self, PyObject *obj)
2700 {
2701 if (self->bin) {
2702 if (write_unicode_binary(self, obj) < 0)
2703 return -1;
2704 }
2705 else {
2706 PyObject *encoded;
2707 Py_ssize_t size;
2708 const char unicode_op = UNICODE;
2709
2710 encoded = raw_unicode_escape(obj);
2711 if (encoded == NULL)
2712 return -1;
2713
2714 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2715 Py_DECREF(encoded);
2716 return -1;
2717 }
2718
2719 size = PyBytes_GET_SIZE(encoded);
2720 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2721 Py_DECREF(encoded);
2722 return -1;
2723 }
2724 Py_DECREF(encoded);
2725
2726 if (_Pickler_Write(self, "\n", 1) < 0)
2727 return -1;
2728 }
2729 if (memo_put(state, self, obj) < 0)
2730 return -1;
2731
2732 return 0;
2733 }
2734
2735 /* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2736 static int
store_tuple_elements(PickleState * state,PicklerObject * self,PyObject * t,Py_ssize_t len)2737 store_tuple_elements(PickleState *state, PicklerObject *self, PyObject *t,
2738 Py_ssize_t len)
2739 {
2740 Py_ssize_t i;
2741
2742 assert(PyTuple_Size(t) == len);
2743
2744 for (i = 0; i < len; i++) {
2745 PyObject *element = PyTuple_GET_ITEM(t, i);
2746
2747 if (element == NULL)
2748 return -1;
2749 if (save(state, self, element, 0) < 0)
2750 return -1;
2751 }
2752
2753 return 0;
2754 }
2755
2756 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2757 * used across protocols to minimize the space needed to pickle them.
2758 * Tuples are also the only builtin immutable type that can be recursive
2759 * (a tuple can be reached from itself), and that requires some subtle
2760 * magic so that it works in all cases. IOW, this is a long routine.
2761 */
2762 static int
save_tuple(PickleState * state,PicklerObject * self,PyObject * obj)2763 save_tuple(PickleState *state, PicklerObject *self, PyObject *obj)
2764 {
2765 Py_ssize_t len, i;
2766
2767 const char mark_op = MARK;
2768 const char tuple_op = TUPLE;
2769 const char pop_op = POP;
2770 const char pop_mark_op = POP_MARK;
2771 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2772
2773 if ((len = PyTuple_Size(obj)) < 0)
2774 return -1;
2775
2776 if (len == 0) {
2777 char pdata[2];
2778
2779 if (self->proto) {
2780 pdata[0] = EMPTY_TUPLE;
2781 len = 1;
2782 }
2783 else {
2784 pdata[0] = MARK;
2785 pdata[1] = TUPLE;
2786 len = 2;
2787 }
2788 if (_Pickler_Write(self, pdata, len) < 0)
2789 return -1;
2790 return 0;
2791 }
2792
2793 /* The tuple isn't in the memo now. If it shows up there after
2794 * saving the tuple elements, the tuple must be recursive, in
2795 * which case we'll pop everything we put on the stack, and fetch
2796 * its value from the memo.
2797 */
2798 if (len <= 3 && self->proto >= 2) {
2799 /* Use TUPLE{1,2,3} opcodes. */
2800 if (store_tuple_elements(state, self, obj, len) < 0)
2801 return -1;
2802
2803 if (PyMemoTable_Get(self->memo, obj)) {
2804 /* pop the len elements */
2805 for (i = 0; i < len; i++)
2806 if (_Pickler_Write(self, &pop_op, 1) < 0)
2807 return -1;
2808 /* fetch from memo */
2809 if (memo_get(state, self, obj) < 0)
2810 return -1;
2811
2812 return 0;
2813 }
2814 else { /* Not recursive. */
2815 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2816 return -1;
2817 }
2818 goto memoize;
2819 }
2820
2821 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2822 * Generate MARK e1 e2 ... TUPLE
2823 */
2824 if (_Pickler_Write(self, &mark_op, 1) < 0)
2825 return -1;
2826
2827 if (store_tuple_elements(state, self, obj, len) < 0)
2828 return -1;
2829
2830 if (PyMemoTable_Get(self->memo, obj)) {
2831 /* pop the stack stuff we pushed */
2832 if (self->bin) {
2833 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2834 return -1;
2835 }
2836 else {
2837 /* Note that we pop one more than len, to remove
2838 * the MARK too.
2839 */
2840 for (i = 0; i <= len; i++)
2841 if (_Pickler_Write(self, &pop_op, 1) < 0)
2842 return -1;
2843 }
2844 /* fetch from memo */
2845 if (memo_get(state, self, obj) < 0)
2846 return -1;
2847
2848 return 0;
2849 }
2850 else { /* Not recursive. */
2851 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2852 return -1;
2853 }
2854
2855 memoize:
2856 if (memo_put(state, self, obj) < 0)
2857 return -1;
2858
2859 return 0;
2860 }
2861
2862 /* iter is an iterator giving items, and we batch up chunks of
2863 * MARK item item ... item APPENDS
2864 * opcode sequences. Calling code should have arranged to first create an
2865 * empty list, or list-like object, for the APPENDS to operate on.
2866 * Returns 0 on success, <0 on error.
2867 */
2868 static int
batch_list(PickleState * state,PicklerObject * self,PyObject * iter)2869 batch_list(PickleState *state, PicklerObject *self, PyObject *iter)
2870 {
2871 PyObject *obj = NULL;
2872 PyObject *firstitem = NULL;
2873 int i, n;
2874
2875 const char mark_op = MARK;
2876 const char append_op = APPEND;
2877 const char appends_op = APPENDS;
2878
2879 assert(iter != NULL);
2880
2881 /* XXX: I think this function could be made faster by avoiding the
2882 iterator interface and fetching objects directly from list using
2883 PyList_GET_ITEM.
2884 */
2885
2886 if (self->proto == 0) {
2887 /* APPENDS isn't available; do one at a time. */
2888 for (;;) {
2889 obj = PyIter_Next(iter);
2890 if (obj == NULL) {
2891 if (PyErr_Occurred())
2892 return -1;
2893 break;
2894 }
2895 i = save(state, self, obj, 0);
2896 Py_DECREF(obj);
2897 if (i < 0)
2898 return -1;
2899 if (_Pickler_Write(self, &append_op, 1) < 0)
2900 return -1;
2901 }
2902 return 0;
2903 }
2904
2905 /* proto > 0: write in batches of BATCHSIZE. */
2906 do {
2907 /* Get first item */
2908 firstitem = PyIter_Next(iter);
2909 if (firstitem == NULL) {
2910 if (PyErr_Occurred())
2911 goto error;
2912
2913 /* nothing more to add */
2914 break;
2915 }
2916
2917 /* Try to get a second item */
2918 obj = PyIter_Next(iter);
2919 if (obj == NULL) {
2920 if (PyErr_Occurred())
2921 goto error;
2922
2923 /* Only one item to write */
2924 if (save(state, self, firstitem, 0) < 0)
2925 goto error;
2926 if (_Pickler_Write(self, &append_op, 1) < 0)
2927 goto error;
2928 Py_CLEAR(firstitem);
2929 break;
2930 }
2931
2932 /* More than one item to write */
2933
2934 /* Pump out MARK, items, APPENDS. */
2935 if (_Pickler_Write(self, &mark_op, 1) < 0)
2936 goto error;
2937
2938 if (save(state, self, firstitem, 0) < 0)
2939 goto error;
2940 Py_CLEAR(firstitem);
2941 n = 1;
2942
2943 /* Fetch and save up to BATCHSIZE items */
2944 while (obj) {
2945 if (save(state, self, obj, 0) < 0)
2946 goto error;
2947 Py_CLEAR(obj);
2948 n += 1;
2949
2950 if (n == BATCHSIZE)
2951 break;
2952
2953 obj = PyIter_Next(iter);
2954 if (obj == NULL) {
2955 if (PyErr_Occurred())
2956 goto error;
2957 break;
2958 }
2959 }
2960
2961 if (_Pickler_Write(self, &appends_op, 1) < 0)
2962 goto error;
2963
2964 } while (n == BATCHSIZE);
2965 return 0;
2966
2967 error:
2968 Py_XDECREF(firstitem);
2969 Py_XDECREF(obj);
2970 return -1;
2971 }
2972
2973 /* This is a variant of batch_list() above, specialized for lists (with no
2974 * support for list subclasses). Like batch_list(), we batch up chunks of
2975 * MARK item item ... item APPENDS
2976 * opcode sequences. Calling code should have arranged to first create an
2977 * empty list, or list-like object, for the APPENDS to operate on.
2978 * Returns 0 on success, -1 on error.
2979 *
2980 * This version is considerably faster than batch_list(), if less general.
2981 *
2982 * Note that this only works for protocols > 0.
2983 */
2984 static int
batch_list_exact(PickleState * state,PicklerObject * self,PyObject * obj)2985 batch_list_exact(PickleState *state, PicklerObject *self, PyObject *obj)
2986 {
2987 PyObject *item = NULL;
2988 Py_ssize_t this_batch, total;
2989
2990 const char append_op = APPEND;
2991 const char appends_op = APPENDS;
2992 const char mark_op = MARK;
2993
2994 assert(obj != NULL);
2995 assert(self->proto > 0);
2996 assert(PyList_CheckExact(obj));
2997
2998 if (PyList_GET_SIZE(obj) == 1) {
2999 item = PyList_GET_ITEM(obj, 0);
3000 Py_INCREF(item);
3001 int err = save(state, self, item, 0);
3002 Py_DECREF(item);
3003 if (err < 0)
3004 return -1;
3005 if (_Pickler_Write(self, &append_op, 1) < 0)
3006 return -1;
3007 return 0;
3008 }
3009
3010 /* Write in batches of BATCHSIZE. */
3011 total = 0;
3012 do {
3013 this_batch = 0;
3014 if (_Pickler_Write(self, &mark_op, 1) < 0)
3015 return -1;
3016 while (total < PyList_GET_SIZE(obj)) {
3017 item = PyList_GET_ITEM(obj, total);
3018 Py_INCREF(item);
3019 int err = save(state, self, item, 0);
3020 Py_DECREF(item);
3021 if (err < 0)
3022 return -1;
3023 total++;
3024 if (++this_batch == BATCHSIZE)
3025 break;
3026 }
3027 if (_Pickler_Write(self, &appends_op, 1) < 0)
3028 return -1;
3029
3030 } while (total < PyList_GET_SIZE(obj));
3031
3032 return 0;
3033 }
3034
3035 static int
save_list(PickleState * state,PicklerObject * self,PyObject * obj)3036 save_list(PickleState *state, PicklerObject *self, PyObject *obj)
3037 {
3038 char header[3];
3039 Py_ssize_t len;
3040 int status = 0;
3041
3042 if (self->fast && !fast_save_enter(self, obj))
3043 goto error;
3044
3045 /* Create an empty list. */
3046 if (self->bin) {
3047 header[0] = EMPTY_LIST;
3048 len = 1;
3049 }
3050 else {
3051 header[0] = MARK;
3052 header[1] = LIST;
3053 len = 2;
3054 }
3055
3056 if (_Pickler_Write(self, header, len) < 0)
3057 goto error;
3058
3059 /* Get list length, and bow out early if empty. */
3060 if ((len = PyList_Size(obj)) < 0)
3061 goto error;
3062
3063 if (memo_put(state, self, obj) < 0)
3064 goto error;
3065
3066 if (len != 0) {
3067 /* Materialize the list elements. */
3068 if (PyList_CheckExact(obj) && self->proto > 0) {
3069 if (_Py_EnterRecursiveCall(" while pickling an object"))
3070 goto error;
3071 status = batch_list_exact(state, self, obj);
3072 _Py_LeaveRecursiveCall();
3073 } else {
3074 PyObject *iter = PyObject_GetIter(obj);
3075 if (iter == NULL)
3076 goto error;
3077
3078 if (_Py_EnterRecursiveCall(" while pickling an object")) {
3079 Py_DECREF(iter);
3080 goto error;
3081 }
3082 status = batch_list(state, self, iter);
3083 _Py_LeaveRecursiveCall();
3084 Py_DECREF(iter);
3085 }
3086 }
3087 if (0) {
3088 error:
3089 status = -1;
3090 }
3091
3092 if (self->fast && !fast_save_leave(self, obj))
3093 status = -1;
3094
3095 return status;
3096 }
3097
3098 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3099 * MARK key value ... key value SETITEMS
3100 * opcode sequences. Calling code should have arranged to first create an
3101 * empty dict, or dict-like object, for the SETITEMS to operate on.
3102 * Returns 0 on success, <0 on error.
3103 *
3104 * This is very much like batch_list(). The difference between saving
3105 * elements directly, and picking apart two-tuples, is so long-winded at
3106 * the C level, though, that attempts to combine these routines were too
3107 * ugly to bear.
3108 */
3109 static int
batch_dict(PickleState * state,PicklerObject * self,PyObject * iter)3110 batch_dict(PickleState *state, PicklerObject *self, PyObject *iter)
3111 {
3112 PyObject *obj = NULL;
3113 PyObject *firstitem = NULL;
3114 int i, n;
3115
3116 const char mark_op = MARK;
3117 const char setitem_op = SETITEM;
3118 const char setitems_op = SETITEMS;
3119
3120 assert(iter != NULL);
3121
3122 if (self->proto == 0) {
3123 /* SETITEMS isn't available; do one at a time. */
3124 for (;;) {
3125 obj = PyIter_Next(iter);
3126 if (obj == NULL) {
3127 if (PyErr_Occurred())
3128 return -1;
3129 break;
3130 }
3131 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3132 PyErr_SetString(PyExc_TypeError, "dict items "
3133 "iterator must return 2-tuples");
3134 Py_DECREF(obj);
3135 return -1;
3136 }
3137 i = save(state, self, PyTuple_GET_ITEM(obj, 0), 0);
3138 if (i >= 0)
3139 i = save(state, self, PyTuple_GET_ITEM(obj, 1), 0);
3140 Py_DECREF(obj);
3141 if (i < 0)
3142 return -1;
3143 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3144 return -1;
3145 }
3146 return 0;
3147 }
3148
3149 /* proto > 0: write in batches of BATCHSIZE. */
3150 do {
3151 /* Get first item */
3152 firstitem = PyIter_Next(iter);
3153 if (firstitem == NULL) {
3154 if (PyErr_Occurred())
3155 goto error;
3156
3157 /* nothing more to add */
3158 break;
3159 }
3160 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3161 PyErr_SetString(PyExc_TypeError, "dict items "
3162 "iterator must return 2-tuples");
3163 goto error;
3164 }
3165
3166 /* Try to get a second item */
3167 obj = PyIter_Next(iter);
3168 if (obj == NULL) {
3169 if (PyErr_Occurred())
3170 goto error;
3171
3172 /* Only one item to write */
3173 if (save(state, self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3174 goto error;
3175 if (save(state, self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3176 goto error;
3177 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3178 goto error;
3179 Py_CLEAR(firstitem);
3180 break;
3181 }
3182
3183 /* More than one item to write */
3184
3185 /* Pump out MARK, items, SETITEMS. */
3186 if (_Pickler_Write(self, &mark_op, 1) < 0)
3187 goto error;
3188
3189 if (save(state, self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3190 goto error;
3191 if (save(state, self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3192 goto error;
3193 Py_CLEAR(firstitem);
3194 n = 1;
3195
3196 /* Fetch and save up to BATCHSIZE items */
3197 while (obj) {
3198 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3199 PyErr_SetString(PyExc_TypeError, "dict items "
3200 "iterator must return 2-tuples");
3201 goto error;
3202 }
3203 if (save(state, self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3204 save(state, self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3205 goto error;
3206 Py_CLEAR(obj);
3207 n += 1;
3208
3209 if (n == BATCHSIZE)
3210 break;
3211
3212 obj = PyIter_Next(iter);
3213 if (obj == NULL) {
3214 if (PyErr_Occurred())
3215 goto error;
3216 break;
3217 }
3218 }
3219
3220 if (_Pickler_Write(self, &setitems_op, 1) < 0)
3221 goto error;
3222
3223 } while (n == BATCHSIZE);
3224 return 0;
3225
3226 error:
3227 Py_XDECREF(firstitem);
3228 Py_XDECREF(obj);
3229 return -1;
3230 }
3231
3232 /* This is a variant of batch_dict() above that specializes for dicts, with no
3233 * support for dict subclasses. Like batch_dict(), we batch up chunks of
3234 * MARK key value ... key value SETITEMS
3235 * opcode sequences. Calling code should have arranged to first create an
3236 * empty dict, or dict-like object, for the SETITEMS to operate on.
3237 * Returns 0 on success, -1 on error.
3238 *
3239 * Note that this currently doesn't work for protocol 0.
3240 */
3241 static int
batch_dict_exact(PickleState * state,PicklerObject * self,PyObject * obj)3242 batch_dict_exact(PickleState *state, PicklerObject *self, PyObject *obj)
3243 {
3244 PyObject *key = NULL, *value = NULL;
3245 int i;
3246 Py_ssize_t dict_size, ppos = 0;
3247
3248 const char mark_op = MARK;
3249 const char setitem_op = SETITEM;
3250 const char setitems_op = SETITEMS;
3251
3252 assert(obj != NULL && PyDict_CheckExact(obj));
3253 assert(self->proto > 0);
3254
3255 dict_size = PyDict_GET_SIZE(obj);
3256
3257 /* Special-case len(d) == 1 to save space. */
3258 if (dict_size == 1) {
3259 PyDict_Next(obj, &ppos, &key, &value);
3260 Py_INCREF(key);
3261 Py_INCREF(value);
3262 if (save(state, self, key, 0) < 0) {
3263 goto error;
3264 }
3265 if (save(state, self, value, 0) < 0) {
3266 goto error;
3267 }
3268 Py_CLEAR(key);
3269 Py_CLEAR(value);
3270 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3271 return -1;
3272 return 0;
3273 }
3274
3275 /* Write in batches of BATCHSIZE. */
3276 do {
3277 i = 0;
3278 if (_Pickler_Write(self, &mark_op, 1) < 0)
3279 return -1;
3280 while (PyDict_Next(obj, &ppos, &key, &value)) {
3281 Py_INCREF(key);
3282 Py_INCREF(value);
3283 if (save(state, self, key, 0) < 0) {
3284 goto error;
3285 }
3286 if (save(state, self, value, 0) < 0) {
3287 goto error;
3288 }
3289 Py_CLEAR(key);
3290 Py_CLEAR(value);
3291 if (++i == BATCHSIZE)
3292 break;
3293 }
3294 if (_Pickler_Write(self, &setitems_op, 1) < 0)
3295 return -1;
3296 if (PyDict_GET_SIZE(obj) != dict_size) {
3297 PyErr_Format(
3298 PyExc_RuntimeError,
3299 "dictionary changed size during iteration");
3300 return -1;
3301 }
3302
3303 } while (i == BATCHSIZE);
3304 return 0;
3305 error:
3306 Py_XDECREF(key);
3307 Py_XDECREF(value);
3308 return -1;
3309 }
3310
3311 static int
save_dict(PickleState * state,PicklerObject * self,PyObject * obj)3312 save_dict(PickleState *state, PicklerObject *self, PyObject *obj)
3313 {
3314 PyObject *items, *iter;
3315 char header[3];
3316 Py_ssize_t len;
3317 int status = 0;
3318 assert(PyDict_Check(obj));
3319
3320 if (self->fast && !fast_save_enter(self, obj))
3321 goto error;
3322
3323 /* Create an empty dict. */
3324 if (self->bin) {
3325 header[0] = EMPTY_DICT;
3326 len = 1;
3327 }
3328 else {
3329 header[0] = MARK;
3330 header[1] = DICT;
3331 len = 2;
3332 }
3333
3334 if (_Pickler_Write(self, header, len) < 0)
3335 goto error;
3336
3337 if (memo_put(state, self, obj) < 0)
3338 goto error;
3339
3340 if (PyDict_GET_SIZE(obj)) {
3341 /* Save the dict items. */
3342 if (PyDict_CheckExact(obj) && self->proto > 0) {
3343 /* We can take certain shortcuts if we know this is a dict and
3344 not a dict subclass. */
3345 if (_Py_EnterRecursiveCall(" while pickling an object"))
3346 goto error;
3347 status = batch_dict_exact(state, self, obj);
3348 _Py_LeaveRecursiveCall();
3349 } else {
3350 items = PyObject_CallMethodNoArgs(obj, &_Py_ID(items));
3351 if (items == NULL)
3352 goto error;
3353 iter = PyObject_GetIter(items);
3354 Py_DECREF(items);
3355 if (iter == NULL)
3356 goto error;
3357 if (_Py_EnterRecursiveCall(" while pickling an object")) {
3358 Py_DECREF(iter);
3359 goto error;
3360 }
3361 status = batch_dict(state, self, iter);
3362 _Py_LeaveRecursiveCall();
3363 Py_DECREF(iter);
3364 }
3365 }
3366
3367 if (0) {
3368 error:
3369 status = -1;
3370 }
3371
3372 if (self->fast && !fast_save_leave(self, obj))
3373 status = -1;
3374
3375 return status;
3376 }
3377
3378 static int
save_set(PickleState * state,PicklerObject * self,PyObject * obj)3379 save_set(PickleState *state, PicklerObject *self, PyObject *obj)
3380 {
3381 PyObject *item;
3382 int i;
3383 Py_ssize_t set_size, ppos = 0;
3384 Py_hash_t hash;
3385
3386 const char empty_set_op = EMPTY_SET;
3387 const char mark_op = MARK;
3388 const char additems_op = ADDITEMS;
3389
3390 if (self->proto < 4) {
3391 PyObject *items;
3392 PyObject *reduce_value;
3393 int status;
3394
3395 items = PySequence_List(obj);
3396 if (items == NULL) {
3397 return -1;
3398 }
3399 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3400 Py_DECREF(items);
3401 if (reduce_value == NULL) {
3402 return -1;
3403 }
3404 /* save_reduce() will memoize the object automatically. */
3405 status = save_reduce(state, self, reduce_value, obj);
3406 Py_DECREF(reduce_value);
3407 return status;
3408 }
3409
3410 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3411 return -1;
3412
3413 if (memo_put(state, self, obj) < 0)
3414 return -1;
3415
3416 set_size = PySet_GET_SIZE(obj);
3417 if (set_size == 0)
3418 return 0; /* nothing to do */
3419
3420 /* Write in batches of BATCHSIZE. */
3421 do {
3422 i = 0;
3423 if (_Pickler_Write(self, &mark_op, 1) < 0)
3424 return -1;
3425
3426 int err = 0;
3427 Py_BEGIN_CRITICAL_SECTION(obj);
3428 while (_PySet_NextEntryRef(obj, &ppos, &item, &hash)) {
3429 err = save(state, self, item, 0);
3430 Py_CLEAR(item);
3431 if (err < 0)
3432 break;
3433 if (++i == BATCHSIZE)
3434 break;
3435 }
3436 Py_END_CRITICAL_SECTION();
3437 if (err < 0) {
3438 return -1;
3439 }
3440 if (_Pickler_Write(self, &additems_op, 1) < 0)
3441 return -1;
3442 if (PySet_GET_SIZE(obj) != set_size) {
3443 PyErr_Format(
3444 PyExc_RuntimeError,
3445 "set changed size during iteration");
3446 return -1;
3447 }
3448 } while (i == BATCHSIZE);
3449
3450 return 0;
3451 }
3452
3453 static int
save_frozenset(PickleState * state,PicklerObject * self,PyObject * obj)3454 save_frozenset(PickleState *state, PicklerObject *self, PyObject *obj)
3455 {
3456 PyObject *iter;
3457
3458 const char mark_op = MARK;
3459 const char frozenset_op = FROZENSET;
3460
3461 if (self->fast && !fast_save_enter(self, obj))
3462 return -1;
3463
3464 if (self->proto < 4) {
3465 PyObject *items;
3466 PyObject *reduce_value;
3467 int status;
3468
3469 items = PySequence_List(obj);
3470 if (items == NULL) {
3471 return -1;
3472 }
3473 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3474 items);
3475 Py_DECREF(items);
3476 if (reduce_value == NULL) {
3477 return -1;
3478 }
3479 /* save_reduce() will memoize the object automatically. */
3480 status = save_reduce(state, self, reduce_value, obj);
3481 Py_DECREF(reduce_value);
3482 return status;
3483 }
3484
3485 if (_Pickler_Write(self, &mark_op, 1) < 0)
3486 return -1;
3487
3488 iter = PyObject_GetIter(obj);
3489 if (iter == NULL) {
3490 return -1;
3491 }
3492 for (;;) {
3493 PyObject *item;
3494
3495 item = PyIter_Next(iter);
3496 if (item == NULL) {
3497 if (PyErr_Occurred()) {
3498 Py_DECREF(iter);
3499 return -1;
3500 }
3501 break;
3502 }
3503 if (save(state, self, item, 0) < 0) {
3504 Py_DECREF(item);
3505 Py_DECREF(iter);
3506 return -1;
3507 }
3508 Py_DECREF(item);
3509 }
3510 Py_DECREF(iter);
3511
3512 /* If the object is already in the memo, this means it is
3513 recursive. In this case, throw away everything we put on the
3514 stack, and fetch the object back from the memo. */
3515 if (PyMemoTable_Get(self->memo, obj)) {
3516 const char pop_mark_op = POP_MARK;
3517
3518 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3519 return -1;
3520 if (memo_get(state, self, obj) < 0)
3521 return -1;
3522 return 0;
3523 }
3524
3525 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3526 return -1;
3527 if (memo_put(state, self, obj) < 0)
3528 return -1;
3529
3530 return 0;
3531 }
3532
3533 static int
fix_imports(PickleState * st,PyObject ** module_name,PyObject ** global_name)3534 fix_imports(PickleState *st, PyObject **module_name, PyObject **global_name)
3535 {
3536 PyObject *key;
3537 PyObject *item;
3538
3539 key = PyTuple_Pack(2, *module_name, *global_name);
3540 if (key == NULL)
3541 return -1;
3542 item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3543 Py_DECREF(key);
3544 if (item) {
3545 PyObject *fixed_module_name;
3546 PyObject *fixed_global_name;
3547
3548 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3549 PyErr_Format(PyExc_RuntimeError,
3550 "_compat_pickle.REVERSE_NAME_MAPPING values "
3551 "should be 2-tuples, not %.200s",
3552 Py_TYPE(item)->tp_name);
3553 return -1;
3554 }
3555 fixed_module_name = PyTuple_GET_ITEM(item, 0);
3556 fixed_global_name = PyTuple_GET_ITEM(item, 1);
3557 if (!PyUnicode_Check(fixed_module_name) ||
3558 !PyUnicode_Check(fixed_global_name)) {
3559 PyErr_Format(PyExc_RuntimeError,
3560 "_compat_pickle.REVERSE_NAME_MAPPING values "
3561 "should be pairs of str, not (%.200s, %.200s)",
3562 Py_TYPE(fixed_module_name)->tp_name,
3563 Py_TYPE(fixed_global_name)->tp_name);
3564 return -1;
3565 }
3566
3567 Py_CLEAR(*module_name);
3568 Py_CLEAR(*global_name);
3569 *module_name = Py_NewRef(fixed_module_name);
3570 *global_name = Py_NewRef(fixed_global_name);
3571 return 0;
3572 }
3573 else if (PyErr_Occurred()) {
3574 return -1;
3575 }
3576
3577 item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3578 if (item) {
3579 if (!PyUnicode_Check(item)) {
3580 PyErr_Format(PyExc_RuntimeError,
3581 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3582 "should be strings, not %.200s",
3583 Py_TYPE(item)->tp_name);
3584 return -1;
3585 }
3586 Py_XSETREF(*module_name, Py_NewRef(item));
3587 }
3588 else if (PyErr_Occurred()) {
3589 return -1;
3590 }
3591
3592 return 0;
3593 }
3594
3595 static int
save_global(PickleState * st,PicklerObject * self,PyObject * obj,PyObject * name)3596 save_global(PickleState *st, PicklerObject *self, PyObject *obj,
3597 PyObject *name)
3598 {
3599 PyObject *global_name = NULL;
3600 PyObject *module_name = NULL;
3601 PyObject *module = NULL;
3602 PyObject *parent = NULL;
3603 PyObject *dotted_path = NULL;
3604 PyObject *cls;
3605 int status = 0;
3606
3607 const char global_op = GLOBAL;
3608
3609 if (name) {
3610 global_name = Py_NewRef(name);
3611 }
3612 else {
3613 if (PyObject_GetOptionalAttr(obj, &_Py_ID(__qualname__), &global_name) < 0)
3614 goto error;
3615 if (global_name == NULL) {
3616 global_name = PyObject_GetAttr(obj, &_Py_ID(__name__));
3617 if (global_name == NULL)
3618 goto error;
3619 }
3620 }
3621
3622 dotted_path = get_dotted_path(module, global_name);
3623 if (dotted_path == NULL)
3624 goto error;
3625 module_name = whichmodule(obj, dotted_path);
3626 if (module_name == NULL)
3627 goto error;
3628
3629 /* XXX: Change to use the import C API directly with level=0 to disallow
3630 relative imports.
3631
3632 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3633 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3634 custom import functions (IMHO, this would be a nice security
3635 feature). The import C API would need to be extended to support the
3636 extra parameters of __import__ to fix that. */
3637 module = PyImport_Import(module_name);
3638 if (module == NULL) {
3639 PyErr_Format(st->PicklingError,
3640 "Can't pickle %R: import of module %R failed",
3641 obj, module_name);
3642 goto error;
3643 }
3644 cls = get_deep_attribute(module, dotted_path, &parent);
3645 if (cls == NULL) {
3646 PyErr_Format(st->PicklingError,
3647 "Can't pickle %R: attribute lookup %S on %S failed",
3648 obj, global_name, module_name);
3649 goto error;
3650 }
3651 if (cls != obj) {
3652 Py_DECREF(cls);
3653 PyErr_Format(st->PicklingError,
3654 "Can't pickle %R: it's not the same object as %S.%S",
3655 obj, module_name, global_name);
3656 goto error;
3657 }
3658 Py_DECREF(cls);
3659
3660 if (self->proto >= 2) {
3661 /* See whether this is in the extension registry, and if
3662 * so generate an EXT opcode.
3663 */
3664 PyObject *extension_key;
3665 PyObject *code_obj; /* extension code as Python object */
3666 long code; /* extension code as C value */
3667 char pdata[5];
3668 Py_ssize_t n;
3669
3670 extension_key = PyTuple_Pack(2, module_name, global_name);
3671 if (extension_key == NULL) {
3672 goto error;
3673 }
3674 if (PyDict_GetItemRef(st->extension_registry, extension_key, &code_obj) < 0) {
3675 Py_DECREF(extension_key);
3676 goto error;
3677 }
3678 Py_DECREF(extension_key);
3679 if (code_obj == NULL) {
3680 /* The object is not registered in the extension registry.
3681 This is the most likely code path. */
3682 goto gen_global;
3683 }
3684
3685 code = PyLong_AsLong(code_obj);
3686 Py_DECREF(code_obj);
3687 if (code <= 0 || code > 0x7fffffffL) {
3688 /* Should never happen in normal circumstances, since the type and
3689 the value of the code are checked in copyreg.add_extension(). */
3690 if (!PyErr_Occurred())
3691 PyErr_Format(PyExc_RuntimeError, "extension code %ld is out of range", code);
3692 goto error;
3693 }
3694
3695 /* Generate an EXT opcode. */
3696 if (code <= 0xff) {
3697 pdata[0] = EXT1;
3698 pdata[1] = (unsigned char)code;
3699 n = 2;
3700 }
3701 else if (code <= 0xffff) {
3702 pdata[0] = EXT2;
3703 pdata[1] = (unsigned char)(code & 0xff);
3704 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3705 n = 3;
3706 }
3707 else {
3708 pdata[0] = EXT4;
3709 pdata[1] = (unsigned char)(code & 0xff);
3710 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3711 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3712 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3713 n = 5;
3714 }
3715
3716 if (_Pickler_Write(self, pdata, n) < 0)
3717 goto error;
3718 }
3719 else {
3720 gen_global:
3721 if (parent == module) {
3722 Py_SETREF(global_name,
3723 Py_NewRef(PyList_GET_ITEM(dotted_path,
3724 PyList_GET_SIZE(dotted_path) - 1)));
3725 Py_CLEAR(dotted_path);
3726 }
3727 if (self->proto >= 4) {
3728 const char stack_global_op = STACK_GLOBAL;
3729
3730 if (save(st, self, module_name, 0) < 0)
3731 goto error;
3732 if (save(st, self, global_name, 0) < 0)
3733 goto error;
3734
3735 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3736 goto error;
3737 }
3738 else {
3739 /* Generate a normal global opcode if we are using a pickle
3740 protocol < 4, or if the object is not registered in the
3741 extension registry.
3742
3743 Objects with multi-part __qualname__ are represented as
3744 getattr(getattr(..., attrname1), attrname2). */
3745 const char mark_op = MARK;
3746 const char tupletwo_op = (self->proto < 2) ? TUPLE : TUPLE2;
3747 const char reduce_op = REDUCE;
3748 Py_ssize_t i;
3749 if (dotted_path) {
3750 if (PyList_GET_SIZE(dotted_path) > 1) {
3751 Py_SETREF(global_name, Py_NewRef(PyList_GET_ITEM(dotted_path, 0)));
3752 }
3753 for (i = 1; i < PyList_GET_SIZE(dotted_path); i++) {
3754 if (save(st, self, st->getattr, 0) < 0 ||
3755 (self->proto < 2 && _Pickler_Write(self, &mark_op, 1) < 0))
3756 {
3757 goto error;
3758 }
3759 }
3760 }
3761
3762 PyObject *encoded;
3763 PyObject *(*unicode_encoder)(PyObject *);
3764
3765 if (_Pickler_Write(self, &global_op, 1) < 0)
3766 goto error;
3767
3768 /* For protocol < 3 and if the user didn't request against doing
3769 so, we convert module names to the old 2.x module names. */
3770 if (self->proto < 3 && self->fix_imports) {
3771 if (fix_imports(st, &module_name, &global_name) < 0) {
3772 goto error;
3773 }
3774 }
3775
3776 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3777 both the module name and the global name using UTF-8. We do so
3778 only when we are using the pickle protocol newer than version
3779 3. This is to ensure compatibility with older Unpickler running
3780 on Python 2.x. */
3781 if (self->proto == 3) {
3782 unicode_encoder = PyUnicode_AsUTF8String;
3783 }
3784 else {
3785 unicode_encoder = PyUnicode_AsASCIIString;
3786 }
3787 encoded = unicode_encoder(module_name);
3788 if (encoded == NULL) {
3789 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3790 PyErr_Format(st->PicklingError,
3791 "can't pickle module identifier '%S' using "
3792 "pickle protocol %i",
3793 module_name, self->proto);
3794 goto error;
3795 }
3796 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3797 PyBytes_GET_SIZE(encoded)) < 0) {
3798 Py_DECREF(encoded);
3799 goto error;
3800 }
3801 Py_DECREF(encoded);
3802 if(_Pickler_Write(self, "\n", 1) < 0)
3803 goto error;
3804
3805 /* Save the name of the module. */
3806 encoded = unicode_encoder(global_name);
3807 if (encoded == NULL) {
3808 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3809 PyErr_Format(st->PicklingError,
3810 "can't pickle global identifier '%S' using "
3811 "pickle protocol %i",
3812 global_name, self->proto);
3813 goto error;
3814 }
3815 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3816 PyBytes_GET_SIZE(encoded)) < 0) {
3817 Py_DECREF(encoded);
3818 goto error;
3819 }
3820 Py_DECREF(encoded);
3821 if (_Pickler_Write(self, "\n", 1) < 0)
3822 goto error;
3823
3824 if (dotted_path) {
3825 for (i = 1; i < PyList_GET_SIZE(dotted_path); i++) {
3826 if (save(st, self, PyList_GET_ITEM(dotted_path, i), 0) < 0 ||
3827 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
3828 _Pickler_Write(self, &reduce_op, 1) < 0)
3829 {
3830 goto error;
3831 }
3832 }
3833 }
3834 }
3835 /* Memoize the object. */
3836 if (memo_put(st, self, obj) < 0)
3837 goto error;
3838 }
3839
3840 if (0) {
3841 error:
3842 status = -1;
3843 }
3844 Py_XDECREF(module_name);
3845 Py_XDECREF(global_name);
3846 Py_XDECREF(module);
3847 Py_XDECREF(parent);
3848 Py_XDECREF(dotted_path);
3849
3850 return status;
3851 }
3852
3853 static int
save_singleton_type(PickleState * state,PicklerObject * self,PyObject * obj,PyObject * singleton)3854 save_singleton_type(PickleState *state, PicklerObject *self, PyObject *obj,
3855 PyObject *singleton)
3856 {
3857 PyObject *reduce_value;
3858 int status;
3859
3860 reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3861 if (reduce_value == NULL) {
3862 return -1;
3863 }
3864 status = save_reduce(state, self, reduce_value, obj);
3865 Py_DECREF(reduce_value);
3866 return status;
3867 }
3868
3869 static int
save_type(PickleState * state,PicklerObject * self,PyObject * obj)3870 save_type(PickleState *state, PicklerObject *self, PyObject *obj)
3871 {
3872 if (obj == (PyObject *)&_PyNone_Type) {
3873 return save_singleton_type(state, self, obj, Py_None);
3874 }
3875 else if (obj == (PyObject *)&PyEllipsis_Type) {
3876 return save_singleton_type(state, self, obj, Py_Ellipsis);
3877 }
3878 else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3879 return save_singleton_type(state, self, obj, Py_NotImplemented);
3880 }
3881 return save_global(state, self, obj, NULL);
3882 }
3883
3884 static int
save_pers(PickleState * state,PicklerObject * self,PyObject * obj)3885 save_pers(PickleState *state, PicklerObject *self, PyObject *obj)
3886 {
3887 PyObject *pid = NULL;
3888 int status = 0;
3889
3890 const char persid_op = PERSID;
3891 const char binpersid_op = BINPERSID;
3892
3893 pid = PyObject_CallOneArg(self->persistent_id, obj);
3894 if (pid == NULL)
3895 return -1;
3896
3897 if (pid != Py_None) {
3898 if (self->bin) {
3899 if (save(state, self, pid, 1) < 0 ||
3900 _Pickler_Write(self, &binpersid_op, 1) < 0)
3901 goto error;
3902 }
3903 else {
3904 PyObject *pid_str;
3905
3906 pid_str = PyObject_Str(pid);
3907 if (pid_str == NULL)
3908 goto error;
3909
3910 /* XXX: Should it check whether the pid contains embedded
3911 newlines? */
3912 if (!PyUnicode_IS_ASCII(pid_str)) {
3913 PyErr_SetString(state->PicklingError,
3914 "persistent IDs in protocol 0 must be "
3915 "ASCII strings");
3916 Py_DECREF(pid_str);
3917 goto error;
3918 }
3919
3920 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3921 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3922 PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3923 _Pickler_Write(self, "\n", 1) < 0) {
3924 Py_DECREF(pid_str);
3925 goto error;
3926 }
3927 Py_DECREF(pid_str);
3928 }
3929 status = 1;
3930 }
3931
3932 if (0) {
3933 error:
3934 status = -1;
3935 }
3936 Py_XDECREF(pid);
3937
3938 return status;
3939 }
3940
3941 static PyObject *
get_class(PyObject * obj)3942 get_class(PyObject *obj)
3943 {
3944 PyObject *cls;
3945
3946 if (PyObject_GetOptionalAttr(obj, &_Py_ID(__class__), &cls) == 0) {
3947 cls = Py_NewRef(Py_TYPE(obj));
3948 }
3949 return cls;
3950 }
3951
3952 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3953 * appropriate __reduce__ method for obj.
3954 */
3955 static int
save_reduce(PickleState * st,PicklerObject * self,PyObject * args,PyObject * obj)3956 save_reduce(PickleState *st, PicklerObject *self, PyObject *args,
3957 PyObject *obj)
3958 {
3959 PyObject *callable;
3960 PyObject *argtup;
3961 PyObject *state = NULL;
3962 PyObject *listitems = Py_None;
3963 PyObject *dictitems = Py_None;
3964 PyObject *state_setter = Py_None;
3965 Py_ssize_t size;
3966 int use_newobj = 0, use_newobj_ex = 0;
3967
3968 const char reduce_op = REDUCE;
3969 const char build_op = BUILD;
3970 const char newobj_op = NEWOBJ;
3971 const char newobj_ex_op = NEWOBJ_EX;
3972
3973 size = PyTuple_Size(args);
3974 if (size < 2 || size > 6) {
3975 PyErr_SetString(st->PicklingError, "tuple returned by "
3976 "__reduce__ must contain 2 through 6 elements");
3977 return -1;
3978 }
3979
3980 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3981 &callable, &argtup, &state, &listitems, &dictitems,
3982 &state_setter))
3983 return -1;
3984
3985 if (!PyCallable_Check(callable)) {
3986 PyErr_SetString(st->PicklingError, "first item of the tuple "
3987 "returned by __reduce__ must be callable");
3988 return -1;
3989 }
3990 if (!PyTuple_Check(argtup)) {
3991 PyErr_SetString(st->PicklingError, "second item of the tuple "
3992 "returned by __reduce__ must be a tuple");
3993 return -1;
3994 }
3995
3996 if (state == Py_None)
3997 state = NULL;
3998
3999 if (listitems == Py_None)
4000 listitems = NULL;
4001 else if (!PyIter_Check(listitems)) {
4002 PyErr_Format(st->PicklingError, "fourth element of the tuple "
4003 "returned by __reduce__ must be an iterator, not %s",
4004 Py_TYPE(listitems)->tp_name);
4005 return -1;
4006 }
4007
4008 if (dictitems == Py_None)
4009 dictitems = NULL;
4010 else if (!PyIter_Check(dictitems)) {
4011 PyErr_Format(st->PicklingError, "fifth element of the tuple "
4012 "returned by __reduce__ must be an iterator, not %s",
4013 Py_TYPE(dictitems)->tp_name);
4014 return -1;
4015 }
4016
4017 if (state_setter == Py_None)
4018 state_setter = NULL;
4019 else if (!PyCallable_Check(state_setter)) {
4020 PyErr_Format(st->PicklingError, "sixth element of the tuple "
4021 "returned by __reduce__ must be a function, not %s",
4022 Py_TYPE(state_setter)->tp_name);
4023 return -1;
4024 }
4025
4026 if (self->proto >= 2) {
4027 PyObject *name;
4028
4029 if (PyObject_GetOptionalAttr(callable, &_Py_ID(__name__), &name) < 0) {
4030 return -1;
4031 }
4032 if (name != NULL && PyUnicode_Check(name)) {
4033 use_newobj_ex = _PyUnicode_Equal(name, &_Py_ID(__newobj_ex__));
4034 if (!use_newobj_ex) {
4035 use_newobj = _PyUnicode_Equal(name, &_Py_ID(__newobj__));
4036 }
4037 }
4038 Py_XDECREF(name);
4039 }
4040
4041 if (use_newobj_ex) {
4042 PyObject *cls;
4043 PyObject *args;
4044 PyObject *kwargs;
4045
4046 if (PyTuple_GET_SIZE(argtup) != 3) {
4047 PyErr_Format(st->PicklingError,
4048 "length of the NEWOBJ_EX argument tuple must be "
4049 "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
4050 return -1;
4051 }
4052
4053 cls = PyTuple_GET_ITEM(argtup, 0);
4054 if (!PyType_Check(cls)) {
4055 PyErr_Format(st->PicklingError,
4056 "first item from NEWOBJ_EX argument tuple must "
4057 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4058 return -1;
4059 }
4060 args = PyTuple_GET_ITEM(argtup, 1);
4061 if (!PyTuple_Check(args)) {
4062 PyErr_Format(st->PicklingError,
4063 "second item from NEWOBJ_EX argument tuple must "
4064 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4065 return -1;
4066 }
4067 kwargs = PyTuple_GET_ITEM(argtup, 2);
4068 if (!PyDict_Check(kwargs)) {
4069 PyErr_Format(st->PicklingError,
4070 "third item from NEWOBJ_EX argument tuple must "
4071 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4072 return -1;
4073 }
4074
4075 if (self->proto >= 4) {
4076 if (save(st, self, cls, 0) < 0 ||
4077 save(st, self, args, 0) < 0 ||
4078 save(st, self, kwargs, 0) < 0 ||
4079 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4080 return -1;
4081 }
4082 }
4083 else {
4084 PyObject *newargs;
4085 PyObject *cls_new;
4086 Py_ssize_t i;
4087
4088 newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
4089 if (newargs == NULL)
4090 return -1;
4091
4092 cls_new = PyObject_GetAttr(cls, &_Py_ID(__new__));
4093 if (cls_new == NULL) {
4094 Py_DECREF(newargs);
4095 return -1;
4096 }
4097 PyTuple_SET_ITEM(newargs, 0, cls_new);
4098 PyTuple_SET_ITEM(newargs, 1, Py_NewRef(cls));
4099 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
4100 PyObject *item = PyTuple_GET_ITEM(args, i);
4101 PyTuple_SET_ITEM(newargs, i + 2, Py_NewRef(item));
4102 }
4103
4104 callable = PyObject_Call(st->partial, newargs, kwargs);
4105 Py_DECREF(newargs);
4106 if (callable == NULL)
4107 return -1;
4108
4109 newargs = PyTuple_New(0);
4110 if (newargs == NULL) {
4111 Py_DECREF(callable);
4112 return -1;
4113 }
4114
4115 if (save(st, self, callable, 0) < 0 ||
4116 save(st, self, newargs, 0) < 0 ||
4117 _Pickler_Write(self, &reduce_op, 1) < 0) {
4118 Py_DECREF(newargs);
4119 Py_DECREF(callable);
4120 return -1;
4121 }
4122 Py_DECREF(newargs);
4123 Py_DECREF(callable);
4124 }
4125 }
4126 else if (use_newobj) {
4127 PyObject *cls;
4128 PyObject *newargtup;
4129 PyObject *obj_class;
4130 int p;
4131
4132 /* Sanity checks. */
4133 if (PyTuple_GET_SIZE(argtup) < 1) {
4134 PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
4135 return -1;
4136 }
4137
4138 cls = PyTuple_GET_ITEM(argtup, 0);
4139 if (!PyType_Check(cls)) {
4140 PyErr_SetString(st->PicklingError, "args[0] from "
4141 "__newobj__ args is not a type");
4142 return -1;
4143 }
4144
4145 if (obj != NULL) {
4146 obj_class = get_class(obj);
4147 if (obj_class == NULL) {
4148 return -1;
4149 }
4150 p = obj_class != cls;
4151 Py_DECREF(obj_class);
4152 if (p) {
4153 PyErr_SetString(st->PicklingError, "args[0] from "
4154 "__newobj__ args has the wrong class");
4155 return -1;
4156 }
4157 }
4158 /* XXX: These calls save() are prone to infinite recursion. Imagine
4159 what happen if the value returned by the __reduce__() method of
4160 some extension type contains another object of the same type. Ouch!
4161
4162 Here is a quick example, that I ran into, to illustrate what I
4163 mean:
4164
4165 >>> import pickle, copyreg
4166 >>> copyreg.dispatch_table.pop(complex)
4167 >>> pickle.dumps(1+2j)
4168 Traceback (most recent call last):
4169 ...
4170 RecursionError: maximum recursion depth exceeded
4171
4172 Removing the complex class from copyreg.dispatch_table made the
4173 __reduce_ex__() method emit another complex object:
4174
4175 >>> (1+1j).__reduce_ex__(2)
4176 (<function __newobj__ at 0xb7b71c3c>,
4177 (<class 'complex'>, (1+1j)), None, None, None)
4178
4179 Thus when save() was called on newargstup (the 2nd item) recursion
4180 ensued. Of course, the bug was in the complex class which had a
4181 broken __getnewargs__() that emitted another complex object. But,
4182 the point, here, is it is quite easy to end up with a broken reduce
4183 function. */
4184
4185 /* Save the class and its __new__ arguments. */
4186 if (save(st, self, cls, 0) < 0) {
4187 return -1;
4188 }
4189
4190 newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
4191 if (newargtup == NULL)
4192 return -1;
4193
4194 p = save(st, self, newargtup, 0);
4195 Py_DECREF(newargtup);
4196 if (p < 0)
4197 return -1;
4198
4199 /* Add NEWOBJ opcode. */
4200 if (_Pickler_Write(self, &newobj_op, 1) < 0)
4201 return -1;
4202 }
4203 else { /* Not using NEWOBJ. */
4204 if (save(st, self, callable, 0) < 0 ||
4205 save(st, self, argtup, 0) < 0 ||
4206 _Pickler_Write(self, &reduce_op, 1) < 0)
4207 return -1;
4208 }
4209
4210 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4211 the caller do not want to memoize the object. Not particularly useful,
4212 but that is to mimic the behavior save_reduce() in pickle.py when
4213 obj is None. */
4214 if (obj != NULL) {
4215 /* If the object is already in the memo, this means it is
4216 recursive. In this case, throw away everything we put on the
4217 stack, and fetch the object back from the memo. */
4218 if (PyMemoTable_Get(self->memo, obj)) {
4219 const char pop_op = POP;
4220
4221 if (_Pickler_Write(self, &pop_op, 1) < 0)
4222 return -1;
4223 if (memo_get(st, self, obj) < 0)
4224 return -1;
4225
4226 return 0;
4227 }
4228 else if (memo_put(st, self, obj) < 0)
4229 return -1;
4230 }
4231
4232 if (listitems && batch_list(st, self, listitems) < 0)
4233 return -1;
4234
4235 if (dictitems && batch_dict(st, self, dictitems) < 0)
4236 return -1;
4237
4238 if (state) {
4239 if (state_setter == NULL) {
4240 if (save(st, self, state, 0) < 0 ||
4241 _Pickler_Write(self, &build_op, 1) < 0)
4242 return -1;
4243 }
4244 else {
4245
4246 /* If a state_setter is specified, call it instead of load_build to
4247 * update obj's with its previous state.
4248 * The first 4 save/write instructions push state_setter and its
4249 * tuple of expected arguments (obj, state) onto the stack. The
4250 * REDUCE opcode triggers the state_setter(obj, state) function
4251 * call. Finally, because state-updating routines only do in-place
4252 * modification, the whole operation has to be stack-transparent.
4253 * Thus, we finally pop the call's output from the stack.*/
4254
4255 const char tupletwo_op = TUPLE2;
4256 const char pop_op = POP;
4257 if (save(st, self, state_setter, 0) < 0 ||
4258 save(st, self, obj, 0) < 0 || save(st, self, state, 0) < 0 ||
4259 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4260 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4261 _Pickler_Write(self, &pop_op, 1) < 0)
4262 return -1;
4263 }
4264 }
4265 return 0;
4266 }
4267
4268 static int
save(PickleState * st,PicklerObject * self,PyObject * obj,int pers_save)4269 save(PickleState *st, PicklerObject *self, PyObject *obj, int pers_save)
4270 {
4271 PyTypeObject *type;
4272 PyObject *reduce_func = NULL;
4273 PyObject *reduce_value = NULL;
4274 int status = 0;
4275
4276 if (_Pickler_OpcodeBoundary(self) < 0)
4277 return -1;
4278
4279 /* The extra pers_save argument is necessary to avoid calling save_pers()
4280 on its returned object. */
4281 if (!pers_save && self->persistent_id) {
4282 /* save_pers() returns:
4283 -1 to signal an error;
4284 0 if it did nothing successfully;
4285 1 if a persistent id was saved.
4286 */
4287 if ((status = save_pers(st, self, obj)) != 0)
4288 return status;
4289 }
4290
4291 type = Py_TYPE(obj);
4292
4293 /* The old cPickle had an optimization that used switch-case statement
4294 dispatching on the first letter of the type name. This has was removed
4295 since benchmarks shown that this optimization was actually slowing
4296 things down. */
4297
4298 /* Atom types; these aren't memoized, so don't check the memo. */
4299
4300 if (obj == Py_None) {
4301 return save_none(self, obj);
4302 }
4303 else if (obj == Py_False || obj == Py_True) {
4304 return save_bool(self, obj);
4305 }
4306 else if (type == &PyLong_Type) {
4307 return save_long(self, obj);
4308 }
4309 else if (type == &PyFloat_Type) {
4310 return save_float(self, obj);
4311 }
4312
4313 /* Check the memo to see if it has the object. If so, generate
4314 a GET (or BINGET) opcode, instead of pickling the object
4315 once again. */
4316 if (PyMemoTable_Get(self->memo, obj)) {
4317 return memo_get(st, self, obj);
4318 }
4319
4320 if (type == &PyBytes_Type) {
4321 return save_bytes(st, self, obj);
4322 }
4323 else if (type == &PyUnicode_Type) {
4324 return save_unicode(st, self, obj);
4325 }
4326
4327 /* We're only calling _Py_EnterRecursiveCall here so that atomic
4328 types above are pickled faster. */
4329 if (_Py_EnterRecursiveCall(" while pickling an object")) {
4330 return -1;
4331 }
4332
4333 if (type == &PyDict_Type) {
4334 status = save_dict(st, self, obj);
4335 goto done;
4336 }
4337 else if (type == &PySet_Type) {
4338 status = save_set(st, self, obj);
4339 goto done;
4340 }
4341 else if (type == &PyFrozenSet_Type) {
4342 status = save_frozenset(st, self, obj);
4343 goto done;
4344 }
4345 else if (type == &PyList_Type) {
4346 status = save_list(st, self, obj);
4347 goto done;
4348 }
4349 else if (type == &PyTuple_Type) {
4350 status = save_tuple(st, self, obj);
4351 goto done;
4352 }
4353 else if (type == &PyByteArray_Type) {
4354 status = save_bytearray(st, self, obj);
4355 goto done;
4356 }
4357 else if (type == &PyPickleBuffer_Type) {
4358 status = save_picklebuffer(st, self, obj);
4359 goto done;
4360 }
4361
4362 /* Now, check reducer_override. If it returns NotImplemented,
4363 * fallback to save_type or save_global, and then perhaps to the
4364 * regular reduction mechanism.
4365 */
4366 if (self->reducer_override != NULL) {
4367 reduce_value = PyObject_CallOneArg(self->reducer_override, obj);
4368 if (reduce_value == NULL) {
4369 goto error;
4370 }
4371 if (reduce_value != Py_NotImplemented) {
4372 goto reduce;
4373 }
4374 Py_SETREF(reduce_value, NULL);
4375 }
4376
4377 if (type == &PyType_Type) {
4378 status = save_type(st, self, obj);
4379 goto done;
4380 }
4381 else if (type == &PyFunction_Type) {
4382 status = save_global(st, self, obj, NULL);
4383 goto done;
4384 }
4385
4386 /* XXX: This part needs some unit tests. */
4387
4388 /* Get a reduction callable, and call it. This may come from
4389 * self.dispatch_table, copyreg.dispatch_table, the object's
4390 * __reduce_ex__ method, or the object's __reduce__ method.
4391 */
4392 if (self->dispatch_table == NULL) {
4393 reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4394 (PyObject *)type);
4395 if (reduce_func == NULL) {
4396 if (PyErr_Occurred()) {
4397 goto error;
4398 }
4399 } else {
4400 /* PyDict_GetItemWithError() returns a borrowed reference.
4401 Increase the reference count to be consistent with
4402 PyObject_GetItem and _PyObject_GetAttrId used below. */
4403 Py_INCREF(reduce_func);
4404 }
4405 }
4406 else if (PyMapping_GetOptionalItem(self->dispatch_table, (PyObject *)type,
4407 &reduce_func) < 0)
4408 {
4409 goto error;
4410 }
4411
4412 if (reduce_func != NULL) {
4413 reduce_value = _Pickle_FastCall(reduce_func, Py_NewRef(obj));
4414 }
4415 else if (PyType_IsSubtype(type, &PyType_Type)) {
4416 status = save_global(st, self, obj, NULL);
4417 goto done;
4418 }
4419 else {
4420 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4421 automatically defined as __reduce__. While this is convenient, this
4422 make it impossible to know which method was actually called. Of
4423 course, this is not a big deal. But still, it would be nice to let
4424 the user know which method was called when something go
4425 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4426 don't actually have to check for a __reduce__ method. */
4427
4428 /* Check for a __reduce_ex__ method. */
4429 if (PyObject_GetOptionalAttr(obj, &_Py_ID(__reduce_ex__), &reduce_func) < 0) {
4430 goto error;
4431 }
4432 if (reduce_func != NULL) {
4433 PyObject *proto;
4434 proto = PyLong_FromLong(self->proto);
4435 if (proto != NULL) {
4436 reduce_value = _Pickle_FastCall(reduce_func, proto);
4437 }
4438 }
4439 else {
4440 /* Check for a __reduce__ method. */
4441 if (PyObject_GetOptionalAttr(obj, &_Py_ID(__reduce__), &reduce_func) < 0) {
4442 goto error;
4443 }
4444 if (reduce_func != NULL) {
4445 reduce_value = PyObject_CallNoArgs(reduce_func);
4446 }
4447 else {
4448 PyErr_Format(st->PicklingError,
4449 "can't pickle '%.200s' object: %R",
4450 type->tp_name, obj);
4451 goto error;
4452 }
4453 }
4454 }
4455
4456 if (reduce_value == NULL)
4457 goto error;
4458
4459 reduce:
4460 if (PyUnicode_Check(reduce_value)) {
4461 status = save_global(st, self, obj, reduce_value);
4462 goto done;
4463 }
4464
4465 if (!PyTuple_Check(reduce_value)) {
4466 PyErr_SetString(st->PicklingError,
4467 "__reduce__ must return a string or tuple");
4468 goto error;
4469 }
4470
4471 status = save_reduce(st, self, reduce_value, obj);
4472
4473 if (0) {
4474 error:
4475 status = -1;
4476 }
4477 done:
4478
4479 _Py_LeaveRecursiveCall();
4480 Py_XDECREF(reduce_func);
4481 Py_XDECREF(reduce_value);
4482
4483 return status;
4484 }
4485
4486 static PyObject *
persistent_id(PyObject * self,PyObject * obj)4487 persistent_id(PyObject *self, PyObject *obj)
4488 {
4489 Py_RETURN_NONE;
4490 }
4491
4492 static int
dump(PickleState * state,PicklerObject * self,PyObject * obj)4493 dump(PickleState *state, PicklerObject *self, PyObject *obj)
4494 {
4495 const char stop_op = STOP;
4496 int status = -1;
4497 PyObject *tmp;
4498
4499 /* Cache the persistent_id method. */
4500 tmp = PyObject_GetAttr((PyObject *)self, &_Py_ID(persistent_id));
4501 if (tmp == NULL) {
4502 goto error;
4503 }
4504 if (PyCFunction_Check(tmp) &&
4505 PyCFunction_GET_SELF(tmp) == (PyObject *)self &&
4506 PyCFunction_GET_FUNCTION(tmp) == persistent_id)
4507 {
4508 Py_CLEAR(tmp);
4509 }
4510 Py_XSETREF(self->persistent_id, tmp);
4511
4512 /* Cache the reducer_override method, if it exists. */
4513 if (PyObject_GetOptionalAttr((PyObject *)self, &_Py_ID(reducer_override),
4514 &tmp) < 0) {
4515 goto error;
4516 }
4517 Py_XSETREF(self->reducer_override, tmp);
4518
4519 if (self->proto >= 2) {
4520 char header[2];
4521
4522 header[0] = PROTO;
4523 assert(self->proto >= 0 && self->proto < 256);
4524 header[1] = (unsigned char)self->proto;
4525 if (_Pickler_Write(self, header, 2) < 0)
4526 goto error;
4527 if (self->proto >= 4)
4528 self->framing = 1;
4529 }
4530
4531 if (save(state, self, obj, 0) < 0 ||
4532 _Pickler_Write(self, &stop_op, 1) < 0 ||
4533 _Pickler_CommitFrame(self) < 0)
4534 goto error;
4535
4536 // Success
4537 status = 0;
4538
4539 error:
4540 self->framing = 0;
4541
4542 /* Break the reference cycle we generated at the beginning this function
4543 * call when setting the persistent_id and the reducer_override attributes
4544 * of the Pickler instance to a bound method of the same instance.
4545 * This is important as the Pickler instance holds a reference to each
4546 * object it has pickled (through its memo): thus, these objects won't
4547 * be garbage-collected as long as the Pickler itself is not collected. */
4548 Py_CLEAR(self->persistent_id);
4549 Py_CLEAR(self->reducer_override);
4550 return status;
4551 }
4552
4553 /*[clinic input]
4554
4555 _pickle.Pickler.clear_memo
4556
4557 Clears the pickler's "memo".
4558
4559 The memo is the data structure that remembers which objects the
4560 pickler has already seen, so that shared or recursive objects are
4561 pickled by reference and not by value. This method is useful when
4562 re-using picklers.
4563 [clinic start generated code]*/
4564
4565 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4566 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4567 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4568 {
4569 if (self->memo)
4570 PyMemoTable_Clear(self->memo);
4571
4572 Py_RETURN_NONE;
4573 }
4574
4575 /*[clinic input]
4576
4577 _pickle.Pickler.dump
4578
4579 cls: defining_class
4580 obj: object
4581 /
4582
4583 Write a pickled representation of the given object to the open file.
4584 [clinic start generated code]*/
4585
4586 static PyObject *
_pickle_Pickler_dump_impl(PicklerObject * self,PyTypeObject * cls,PyObject * obj)4587 _pickle_Pickler_dump_impl(PicklerObject *self, PyTypeObject *cls,
4588 PyObject *obj)
4589 /*[clinic end generated code: output=952cf7f68b1445bb input=f949d84151983594]*/
4590 {
4591 PickleState *st = _Pickle_GetStateByClass(cls);
4592 /* Check whether the Pickler was initialized correctly (issue3664).
4593 Developers often forget to call __init__() in their subclasses, which
4594 would trigger a segfault without this check. */
4595 if (self->write == NULL) {
4596 PyErr_Format(st->PicklingError,
4597 "Pickler.__init__() was not called by %s.__init__()",
4598 Py_TYPE(self)->tp_name);
4599 return NULL;
4600 }
4601
4602 if (_Pickler_ClearBuffer(self) < 0)
4603 return NULL;
4604
4605 if (dump(st, self, obj) < 0)
4606 return NULL;
4607
4608 if (_Pickler_FlushToFile(self) < 0)
4609 return NULL;
4610
4611 Py_RETURN_NONE;
4612 }
4613
4614 /*[clinic input]
4615
4616 _pickle.Pickler.__sizeof__ -> size_t
4617
4618 Returns size in memory, in bytes.
4619 [clinic start generated code]*/
4620
4621 static size_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4622 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4623 /*[clinic end generated code: output=23ad75658d3b59ff input=d8127c8e7012ebd7]*/
4624 {
4625 size_t res = _PyObject_SIZE(Py_TYPE(self));
4626 if (self->memo != NULL) {
4627 res += sizeof(PyMemoTable);
4628 res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4629 }
4630 if (self->output_buffer != NULL) {
4631 size_t s = _PySys_GetSizeOf(self->output_buffer);
4632 if (s == (size_t)-1) {
4633 return -1;
4634 }
4635 res += s;
4636 }
4637 return res;
4638 }
4639
4640 static struct PyMethodDef Pickler_methods[] = {
4641 {"persistent_id", persistent_id, METH_O,
4642 PyDoc_STR("persistent_id($self, obj, /)\n--\n\n")},
4643 _PICKLE_PICKLER_DUMP_METHODDEF
4644 _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4645 _PICKLE_PICKLER___SIZEOF___METHODDEF
4646 {NULL, NULL} /* sentinel */
4647 };
4648
4649 static int
Pickler_clear(PicklerObject * self)4650 Pickler_clear(PicklerObject *self)
4651 {
4652 Py_CLEAR(self->output_buffer);
4653 Py_CLEAR(self->write);
4654 Py_CLEAR(self->persistent_id);
4655 Py_CLEAR(self->dispatch_table);
4656 Py_CLEAR(self->fast_memo);
4657 Py_CLEAR(self->reducer_override);
4658 Py_CLEAR(self->buffer_callback);
4659
4660 if (self->memo != NULL) {
4661 PyMemoTable *memo = self->memo;
4662 self->memo = NULL;
4663 PyMemoTable_Del(memo);
4664 }
4665 return 0;
4666 }
4667
4668 static void
Pickler_dealloc(PicklerObject * self)4669 Pickler_dealloc(PicklerObject *self)
4670 {
4671 PyTypeObject *tp = Py_TYPE(self);
4672 PyObject_GC_UnTrack(self);
4673 (void)Pickler_clear(self);
4674 tp->tp_free((PyObject *)self);
4675 Py_DECREF(tp);
4676 }
4677
4678 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4679 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4680 {
4681 Py_VISIT(Py_TYPE(self));
4682 Py_VISIT(self->write);
4683 Py_VISIT(self->persistent_id);
4684 Py_VISIT(self->dispatch_table);
4685 Py_VISIT(self->fast_memo);
4686 Py_VISIT(self->reducer_override);
4687 Py_VISIT(self->buffer_callback);
4688 PyMemoTable *memo = self->memo;
4689 if (memo && memo->mt_table) {
4690 Py_ssize_t i = memo->mt_allocated;
4691 while (--i >= 0) {
4692 Py_VISIT(memo->mt_table[i].me_key);
4693 }
4694 }
4695
4696 return 0;
4697 }
4698
4699
4700 /*[clinic input]
4701
4702 _pickle.Pickler.__init__
4703
4704 file: object
4705 protocol: object = None
4706 fix_imports: bool = True
4707 buffer_callback: object = None
4708
4709 This takes a binary file for writing a pickle data stream.
4710
4711 The optional *protocol* argument tells the pickler to use the given
4712 protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
4713 protocol is 4. It was introduced in Python 3.4, and is incompatible
4714 with previous versions.
4715
4716 Specifying a negative protocol version selects the highest protocol
4717 version supported. The higher the protocol used, the more recent the
4718 version of Python needed to read the pickle produced.
4719
4720 The *file* argument must have a write() method that accepts a single
4721 bytes argument. It can thus be a file object opened for binary
4722 writing, an io.BytesIO instance, or any other custom object that meets
4723 this interface.
4724
4725 If *fix_imports* is True and protocol is less than 3, pickle will try
4726 to map the new Python 3 names to the old module names used in Python
4727 2, so that the pickle data stream is readable with Python 2.
4728
4729 If *buffer_callback* is None (the default), buffer views are
4730 serialized into *file* as part of the pickle stream.
4731
4732 If *buffer_callback* is not None, then it can be called any number
4733 of times with a buffer view. If the callback returns a false value
4734 (such as None), the given buffer is out-of-band; otherwise the
4735 buffer is serialized in-band, i.e. inside the pickle stream.
4736
4737 It is an error if *buffer_callback* is not None and *protocol*
4738 is None or smaller than 5.
4739
4740 [clinic start generated code]*/
4741
4742 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)4743 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4744 PyObject *protocol, int fix_imports,
4745 PyObject *buffer_callback)
4746 /*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/
4747 {
4748 /* In case of multiple __init__() calls, clear previous content. */
4749 if (self->write != NULL)
4750 (void)Pickler_clear(self);
4751
4752 if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4753 return -1;
4754
4755 if (_Pickler_SetOutputStream(self, file) < 0)
4756 return -1;
4757
4758 if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4759 return -1;
4760
4761 /* memo and output_buffer may have already been created in _Pickler_New */
4762 if (self->memo == NULL) {
4763 self->memo = PyMemoTable_New();
4764 if (self->memo == NULL)
4765 return -1;
4766 }
4767 self->output_len = 0;
4768 if (self->output_buffer == NULL) {
4769 self->max_output_len = WRITE_BUF_SIZE;
4770 self->output_buffer = PyBytes_FromStringAndSize(NULL,
4771 self->max_output_len);
4772 if (self->output_buffer == NULL)
4773 return -1;
4774 }
4775
4776 self->fast = 0;
4777 self->fast_nesting = 0;
4778 self->fast_memo = NULL;
4779
4780 if (self->dispatch_table != NULL) {
4781 return 0;
4782 }
4783 if (PyObject_GetOptionalAttr((PyObject *)self, &_Py_ID(dispatch_table),
4784 &self->dispatch_table) < 0) {
4785 return -1;
4786 }
4787
4788 return 0;
4789 }
4790
4791
4792 /* Define a proxy object for the Pickler's internal memo object. This is to
4793 * avoid breaking code like:
4794 * pickler.memo.clear()
4795 * and
4796 * pickler.memo = saved_memo
4797 * Is this a good idea? Not really, but we don't want to break code that uses
4798 * it. Note that we don't implement the entire mapping API here. This is
4799 * intentional, as these should be treated as black-box implementation details.
4800 */
4801
4802 /*[clinic input]
4803 _pickle.PicklerMemoProxy.clear
4804
4805 Remove all items from memo.
4806 [clinic start generated code]*/
4807
4808 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4809 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4810 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4811 {
4812 if (self->pickler->memo)
4813 PyMemoTable_Clear(self->pickler->memo);
4814 Py_RETURN_NONE;
4815 }
4816
4817 /*[clinic input]
4818 _pickle.PicklerMemoProxy.copy
4819
4820 Copy the memo to a new object.
4821 [clinic start generated code]*/
4822
4823 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4824 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4825 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4826 {
4827 PyMemoTable *memo;
4828 PyObject *new_memo = PyDict_New();
4829 if (new_memo == NULL)
4830 return NULL;
4831
4832 memo = self->pickler->memo;
4833 for (size_t i = 0; i < memo->mt_allocated; ++i) {
4834 PyMemoEntry entry = memo->mt_table[i];
4835 if (entry.me_key != NULL) {
4836 int status;
4837 PyObject *key, *value;
4838
4839 key = PyLong_FromVoidPtr(entry.me_key);
4840 if (key == NULL) {
4841 goto error;
4842 }
4843 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4844 if (value == NULL) {
4845 Py_DECREF(key);
4846 goto error;
4847 }
4848 status = PyDict_SetItem(new_memo, key, value);
4849 Py_DECREF(key);
4850 Py_DECREF(value);
4851 if (status < 0)
4852 goto error;
4853 }
4854 }
4855 return new_memo;
4856
4857 error:
4858 Py_XDECREF(new_memo);
4859 return NULL;
4860 }
4861
4862 /*[clinic input]
4863 _pickle.PicklerMemoProxy.__reduce__
4864
4865 Implement pickle support.
4866 [clinic start generated code]*/
4867
4868 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4869 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4870 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4871 {
4872 PyObject *reduce_value, *dict_args;
4873 PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4874 if (contents == NULL)
4875 return NULL;
4876
4877 reduce_value = PyTuple_New(2);
4878 if (reduce_value == NULL) {
4879 Py_DECREF(contents);
4880 return NULL;
4881 }
4882 dict_args = PyTuple_New(1);
4883 if (dict_args == NULL) {
4884 Py_DECREF(contents);
4885 Py_DECREF(reduce_value);
4886 return NULL;
4887 }
4888 PyTuple_SET_ITEM(dict_args, 0, contents);
4889 PyTuple_SET_ITEM(reduce_value, 0, Py_NewRef(&PyDict_Type));
4890 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4891 return reduce_value;
4892 }
4893
4894 static PyMethodDef picklerproxy_methods[] = {
4895 _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4896 _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4897 _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4898 {NULL, NULL} /* sentinel */
4899 };
4900
4901 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4902 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4903 {
4904 PyTypeObject *tp = Py_TYPE(self);
4905 PyObject_GC_UnTrack(self);
4906 Py_CLEAR(self->pickler);
4907 tp->tp_free((PyObject *)self);
4908 Py_DECREF(tp);
4909 }
4910
4911 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4912 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4913 visitproc visit, void *arg)
4914 {
4915 Py_VISIT(Py_TYPE(self));
4916 Py_VISIT(self->pickler);
4917 return 0;
4918 }
4919
4920 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4921 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4922 {
4923 Py_CLEAR(self->pickler);
4924 return 0;
4925 }
4926
4927 static PyType_Slot memoproxy_slots[] = {
4928 {Py_tp_dealloc, PicklerMemoProxy_dealloc},
4929 {Py_tp_traverse, PicklerMemoProxy_traverse},
4930 {Py_tp_clear, PicklerMemoProxy_clear},
4931 {Py_tp_methods, picklerproxy_methods},
4932 {Py_tp_hash, PyObject_HashNotImplemented},
4933 {0, NULL},
4934 };
4935
4936 static PyType_Spec memoproxy_spec = {
4937 .name = "_pickle.PicklerMemoProxy",
4938 .basicsize = sizeof(PicklerMemoProxyObject),
4939 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
4940 Py_TPFLAGS_IMMUTABLETYPE),
4941 .slots = memoproxy_slots,
4942 };
4943
4944 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4945 PicklerMemoProxy_New(PicklerObject *pickler)
4946 {
4947 PicklerMemoProxyObject *self;
4948 PickleState *st = _Pickle_FindStateByType(Py_TYPE(pickler));
4949 self = PyObject_GC_New(PicklerMemoProxyObject, st->PicklerMemoProxyType);
4950 if (self == NULL)
4951 return NULL;
4952 self->pickler = (PicklerObject*)Py_NewRef(pickler);
4953 PyObject_GC_Track(self);
4954 return (PyObject *)self;
4955 }
4956
4957 /*****************************************************************************/
4958
4959 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4960 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4961 {
4962 return PicklerMemoProxy_New(self);
4963 }
4964
4965 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4966 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4967 {
4968 PyMemoTable *new_memo = NULL;
4969
4970 if (obj == NULL) {
4971 PyErr_SetString(PyExc_TypeError,
4972 "attribute deletion is not supported");
4973 return -1;
4974 }
4975
4976 PickleState *st = _Pickle_FindStateByType(Py_TYPE(self));
4977 if (Py_IS_TYPE(obj, st->PicklerMemoProxyType)) {
4978 PicklerObject *pickler =
4979 ((PicklerMemoProxyObject *)obj)->pickler;
4980
4981 new_memo = PyMemoTable_Copy(pickler->memo);
4982 if (new_memo == NULL)
4983 return -1;
4984 }
4985 else if (PyDict_Check(obj)) {
4986 Py_ssize_t i = 0;
4987 PyObject *key, *value;
4988
4989 new_memo = PyMemoTable_New();
4990 if (new_memo == NULL)
4991 return -1;
4992
4993 while (PyDict_Next(obj, &i, &key, &value)) {
4994 Py_ssize_t memo_id;
4995 PyObject *memo_obj;
4996
4997 if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
4998 PyErr_SetString(PyExc_TypeError,
4999 "'memo' values must be 2-item tuples");
5000 goto error;
5001 }
5002 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
5003 if (memo_id == -1 && PyErr_Occurred())
5004 goto error;
5005 memo_obj = PyTuple_GET_ITEM(value, 1);
5006 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
5007 goto error;
5008 }
5009 }
5010 else {
5011 PyErr_Format(PyExc_TypeError,
5012 "'memo' attribute must be a PicklerMemoProxy object "
5013 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
5014 return -1;
5015 }
5016
5017 PyMemoTable_Del(self->memo);
5018 self->memo = new_memo;
5019
5020 return 0;
5021
5022 error:
5023 if (new_memo)
5024 PyMemoTable_Del(new_memo);
5025 return -1;
5026 }
5027
5028 static PyObject *
Pickler_getattr(PyObject * self,PyObject * name)5029 Pickler_getattr(PyObject *self, PyObject *name)
5030 {
5031 if (PyUnicode_Check(name)
5032 && PyUnicode_EqualToUTF8(name, "persistent_id")
5033 && ((PicklerObject *)self)->persistent_id_attr)
5034 {
5035 return Py_NewRef(((PicklerObject *)self)->persistent_id_attr);
5036 }
5037
5038 return PyObject_GenericGetAttr(self, name);
5039 }
5040
5041 static int
Pickler_setattr(PyObject * self,PyObject * name,PyObject * value)5042 Pickler_setattr(PyObject *self, PyObject *name, PyObject *value)
5043 {
5044 if (PyUnicode_Check(name)
5045 && PyUnicode_EqualToUTF8(name, "persistent_id"))
5046 {
5047 Py_XINCREF(value);
5048 Py_XSETREF(((PicklerObject *)self)->persistent_id_attr, value);
5049 return 0;
5050 }
5051
5052 return PyObject_GenericSetAttr(self, name, value);
5053 }
5054
5055 static PyMemberDef Pickler_members[] = {
5056 {"bin", Py_T_INT, offsetof(PicklerObject, bin)},
5057 {"fast", Py_T_INT, offsetof(PicklerObject, fast)},
5058 {"dispatch_table", Py_T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
5059 {NULL}
5060 };
5061
5062 static PyGetSetDef Pickler_getsets[] = {
5063 {"memo", (getter)Pickler_get_memo,
5064 (setter)Pickler_set_memo},
5065 {NULL}
5066 };
5067
5068 static PyType_Slot pickler_type_slots[] = {
5069 {Py_tp_dealloc, Pickler_dealloc},
5070 {Py_tp_getattro, Pickler_getattr},
5071 {Py_tp_setattro, Pickler_setattr},
5072 {Py_tp_methods, Pickler_methods},
5073 {Py_tp_members, Pickler_members},
5074 {Py_tp_getset, Pickler_getsets},
5075 {Py_tp_clear, Pickler_clear},
5076 {Py_tp_doc, (char*)_pickle_Pickler___init____doc__},
5077 {Py_tp_traverse, Pickler_traverse},
5078 {Py_tp_init, _pickle_Pickler___init__},
5079 {Py_tp_new, PyType_GenericNew},
5080 {Py_tp_alloc, PyType_GenericAlloc},
5081 {Py_tp_free, PyObject_GC_Del},
5082 {0, NULL},
5083 };
5084
5085 static PyType_Spec pickler_type_spec = {
5086 .name = "_pickle.Pickler",
5087 .basicsize = sizeof(PicklerObject),
5088 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
5089 Py_TPFLAGS_IMMUTABLETYPE),
5090 .slots = pickler_type_slots,
5091 };
5092
5093 /* Temporary helper for calling self.find_class().
5094
5095 XXX: It would be nice to able to avoid Python function call overhead, by
5096 using directly the C version of find_class(), when find_class() is not
5097 overridden by a subclass. Although, this could become rather hackish. A
5098 simpler optimization would be to call the C function when self is not a
5099 subclass instance. */
5100 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)5101 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5102 {
5103 return PyObject_CallMethodObjArgs((PyObject *)self, &_Py_ID(find_class),
5104 module_name, global_name, NULL);
5105 }
5106
5107 static Py_ssize_t
marker(PickleState * st,UnpicklerObject * self)5108 marker(PickleState *st, UnpicklerObject *self)
5109 {
5110 if (self->num_marks < 1) {
5111 PyErr_SetString(st->UnpicklingError, "could not find MARK");
5112 return -1;
5113 }
5114
5115 Py_ssize_t mark = self->marks[--self->num_marks];
5116 self->stack->mark_set = self->num_marks != 0;
5117 self->stack->fence = self->num_marks ?
5118 self->marks[self->num_marks - 1] : 0;
5119 return mark;
5120 }
5121
5122 static int
load_none(PickleState * state,UnpicklerObject * self)5123 load_none(PickleState *state, UnpicklerObject *self)
5124 {
5125 PDATA_APPEND(self->stack, Py_None, -1);
5126 return 0;
5127 }
5128
5129 static int
load_int(PickleState * state,UnpicklerObject * self)5130 load_int(PickleState *state, UnpicklerObject *self)
5131 {
5132 PyObject *value;
5133 char *endptr, *s;
5134 Py_ssize_t len;
5135 long x;
5136
5137 if ((len = _Unpickler_Readline(state, self, &s)) < 0)
5138 return -1;
5139 if (len < 2)
5140 return bad_readline(state);
5141
5142 errno = 0;
5143 /* XXX: Should the base argument of strtol() be explicitly set to 10?
5144 XXX(avassalotti): Should this uses PyOS_strtol()? */
5145 x = strtol(s, &endptr, 0);
5146
5147 if (errno || (*endptr != '\n' && *endptr != '\0')) {
5148 /* Hm, maybe we've got something long. Let's try reading
5149 * it as a Python int object. */
5150 errno = 0;
5151 /* XXX: Same thing about the base here. */
5152 value = PyLong_FromString(s, NULL, 0);
5153 if (value == NULL) {
5154 PyErr_SetString(PyExc_ValueError,
5155 "could not convert string to int");
5156 return -1;
5157 }
5158 }
5159 else {
5160 if (len == 3 && (x == 0 || x == 1)) {
5161 if ((value = PyBool_FromLong(x)) == NULL)
5162 return -1;
5163 }
5164 else {
5165 if ((value = PyLong_FromLong(x)) == NULL)
5166 return -1;
5167 }
5168 }
5169
5170 PDATA_PUSH(self->stack, value, -1);
5171 return 0;
5172 }
5173
5174 static int
load_bool(PickleState * state,UnpicklerObject * self,PyObject * boolean)5175 load_bool(PickleState *state, UnpicklerObject *self, PyObject *boolean)
5176 {
5177 assert(boolean == Py_True || boolean == Py_False);
5178 PDATA_APPEND(self->stack, boolean, -1);
5179 return 0;
5180 }
5181
5182 /* s contains x bytes of an unsigned little-endian integer. Return its value
5183 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5184 */
5185 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)5186 calc_binsize(char *bytes, int nbytes)
5187 {
5188 unsigned char *s = (unsigned char *)bytes;
5189 int i;
5190 size_t x = 0;
5191
5192 if (nbytes > (int)sizeof(size_t)) {
5193 /* Check for integer overflow. BINBYTES8 and BINUNICODE8 opcodes
5194 * have 64-bit size that can't be represented on 32-bit platform.
5195 */
5196 for (i = (int)sizeof(size_t); i < nbytes; i++) {
5197 if (s[i])
5198 return -1;
5199 }
5200 nbytes = (int)sizeof(size_t);
5201 }
5202 for (i = 0; i < nbytes; i++) {
5203 x |= (size_t) s[i] << (8 * i);
5204 }
5205
5206 if (x > PY_SSIZE_T_MAX)
5207 return -1;
5208 else
5209 return (Py_ssize_t) x;
5210 }
5211
5212 /* s contains x bytes of a little-endian integer. Return its value as a
5213 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
5214 * int, but when x is 4 it's a signed one. This is a historical source
5215 * of x-platform bugs.
5216 */
5217 static long
calc_binint(char * bytes,int nbytes)5218 calc_binint(char *bytes, int nbytes)
5219 {
5220 unsigned char *s = (unsigned char *)bytes;
5221 Py_ssize_t i;
5222 long x = 0;
5223
5224 for (i = 0; i < nbytes; i++) {
5225 x |= (long)s[i] << (8 * i);
5226 }
5227
5228 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5229 * is signed, so on a box with longs bigger than 4 bytes we need
5230 * to extend a BININT's sign bit to the full width.
5231 */
5232 if (SIZEOF_LONG > 4 && nbytes == 4) {
5233 x |= -(x & (1L << 31));
5234 }
5235
5236 return x;
5237 }
5238
5239 static int
load_binintx(UnpicklerObject * self,char * s,int size)5240 load_binintx(UnpicklerObject *self, char *s, int size)
5241 {
5242 PyObject *value;
5243 long x;
5244
5245 x = calc_binint(s, size);
5246
5247 if ((value = PyLong_FromLong(x)) == NULL)
5248 return -1;
5249
5250 PDATA_PUSH(self->stack, value, -1);
5251 return 0;
5252 }
5253
5254 static int
load_binint(PickleState * state,UnpicklerObject * self)5255 load_binint(PickleState *state, UnpicklerObject *self)
5256 {
5257 char *s;
5258 if (_Unpickler_Read(self, state, &s, 4) < 0)
5259 return -1;
5260
5261 return load_binintx(self, s, 4);
5262 }
5263
5264 static int
load_binint1(PickleState * state,UnpicklerObject * self)5265 load_binint1(PickleState *state, UnpicklerObject *self)
5266 {
5267 char *s;
5268 if (_Unpickler_Read(self, state, &s, 1) < 0)
5269 return -1;
5270
5271 return load_binintx(self, s, 1);
5272 }
5273
5274 static int
load_binint2(PickleState * state,UnpicklerObject * self)5275 load_binint2(PickleState *state, UnpicklerObject *self)
5276 {
5277 char *s;
5278 if (_Unpickler_Read(self, state, &s, 2) < 0)
5279 return -1;
5280
5281 return load_binintx(self, s, 2);
5282 }
5283
5284 static int
load_long(PickleState * state,UnpicklerObject * self)5285 load_long(PickleState *state, UnpicklerObject *self)
5286 {
5287 PyObject *value;
5288 char *s = NULL;
5289 Py_ssize_t len;
5290
5291 if ((len = _Unpickler_Readline(state, self, &s)) < 0)
5292 return -1;
5293 if (len < 2)
5294 return bad_readline(state);
5295
5296 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5297 the 'L' before calling PyLong_FromString. In order to maintain
5298 compatibility with Python 3.0.0, we don't actually *require*
5299 the 'L' to be present. */
5300 if (s[len-2] == 'L')
5301 s[len-2] = '\0';
5302 /* XXX: Should the base argument explicitly set to 10? */
5303 value = PyLong_FromString(s, NULL, 0);
5304 if (value == NULL)
5305 return -1;
5306
5307 PDATA_PUSH(self->stack, value, -1);
5308 return 0;
5309 }
5310
5311 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
5312 * data following.
5313 */
5314 static int
load_counted_long(PickleState * st,UnpicklerObject * self,int size)5315 load_counted_long(PickleState *st, UnpicklerObject *self, int size)
5316 {
5317 PyObject *value;
5318 char *nbytes;
5319 char *pdata;
5320
5321 assert(size == 1 || size == 4);
5322 if (_Unpickler_Read(self, st, &nbytes, size) < 0)
5323 return -1;
5324
5325 size = calc_binint(nbytes, size);
5326 if (size < 0) {
5327 /* Corrupt or hostile pickle -- we never write one like this */
5328 PyErr_SetString(st->UnpicklingError,
5329 "LONG pickle has negative byte count");
5330 return -1;
5331 }
5332
5333 if (size == 0)
5334 value = PyLong_FromLong(0L);
5335 else {
5336 /* Read the raw little-endian bytes and convert. */
5337 if (_Unpickler_Read(self, st, &pdata, size) < 0)
5338 return -1;
5339 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5340 1 /* little endian */ , 1 /* signed */ );
5341 }
5342 if (value == NULL)
5343 return -1;
5344 PDATA_PUSH(self->stack, value, -1);
5345 return 0;
5346 }
5347
5348 static int
load_float(PickleState * state,UnpicklerObject * self)5349 load_float(PickleState *state, UnpicklerObject *self)
5350 {
5351 PyObject *value;
5352 char *endptr, *s;
5353 Py_ssize_t len;
5354 double d;
5355
5356 if ((len = _Unpickler_Readline(state, self, &s)) < 0)
5357 return -1;
5358 if (len < 2)
5359 return bad_readline(state);
5360
5361 errno = 0;
5362 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5363 if (d == -1.0 && PyErr_Occurred())
5364 return -1;
5365 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5366 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5367 return -1;
5368 }
5369 value = PyFloat_FromDouble(d);
5370 if (value == NULL)
5371 return -1;
5372
5373 PDATA_PUSH(self->stack, value, -1);
5374 return 0;
5375 }
5376
5377 static int
load_binfloat(PickleState * state,UnpicklerObject * self)5378 load_binfloat(PickleState *state, UnpicklerObject *self)
5379 {
5380 PyObject *value;
5381 double x;
5382 char *s;
5383
5384 if (_Unpickler_Read(self, state, &s, 8) < 0)
5385 return -1;
5386
5387 x = PyFloat_Unpack8(s, 0);
5388 if (x == -1.0 && PyErr_Occurred())
5389 return -1;
5390
5391 if ((value = PyFloat_FromDouble(x)) == NULL)
5392 return -1;
5393
5394 PDATA_PUSH(self->stack, value, -1);
5395 return 0;
5396 }
5397
5398 static int
load_string(PickleState * st,UnpicklerObject * self)5399 load_string(PickleState *st, UnpicklerObject *self)
5400 {
5401 PyObject *bytes;
5402 PyObject *obj;
5403 Py_ssize_t len;
5404 char *s, *p;
5405
5406 if ((len = _Unpickler_Readline(st, self, &s)) < 0)
5407 return -1;
5408 /* Strip the newline */
5409 len--;
5410 /* Strip outermost quotes */
5411 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5412 p = s + 1;
5413 len -= 2;
5414 }
5415 else {
5416 PyErr_SetString(st->UnpicklingError,
5417 "the STRING opcode argument must be quoted");
5418 return -1;
5419 }
5420 assert(len >= 0);
5421
5422 /* Use the PyBytes API to decode the string, since that is what is used
5423 to encode, and then coerce the result to Unicode. */
5424 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5425 if (bytes == NULL)
5426 return -1;
5427
5428 /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5429 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5430 if (strcmp(self->encoding, "bytes") == 0) {
5431 obj = bytes;
5432 }
5433 else {
5434 obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5435 Py_DECREF(bytes);
5436 if (obj == NULL) {
5437 return -1;
5438 }
5439 }
5440
5441 PDATA_PUSH(self->stack, obj, -1);
5442 return 0;
5443 }
5444
5445 static int
load_counted_binstring(PickleState * st,UnpicklerObject * self,int nbytes)5446 load_counted_binstring(PickleState *st, UnpicklerObject *self, int nbytes)
5447 {
5448 PyObject *obj;
5449 Py_ssize_t size;
5450 char *s;
5451
5452 if (_Unpickler_Read(self, st, &s, nbytes) < 0)
5453 return -1;
5454
5455 size = calc_binsize(s, nbytes);
5456 if (size < 0) {
5457 PyErr_Format(st->UnpicklingError,
5458 "BINSTRING exceeds system's maximum size of %zd bytes",
5459 PY_SSIZE_T_MAX);
5460 return -1;
5461 }
5462
5463 if (_Unpickler_Read(self, st, &s, size) < 0)
5464 return -1;
5465
5466 /* Convert Python 2.x strings to bytes if the *encoding* given to the
5467 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5468 if (strcmp(self->encoding, "bytes") == 0) {
5469 obj = PyBytes_FromStringAndSize(s, size);
5470 }
5471 else {
5472 obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5473 }
5474 if (obj == NULL) {
5475 return -1;
5476 }
5477
5478 PDATA_PUSH(self->stack, obj, -1);
5479 return 0;
5480 }
5481
5482 static int
load_counted_binbytes(PickleState * state,UnpicklerObject * self,int nbytes)5483 load_counted_binbytes(PickleState *state, UnpicklerObject *self, int nbytes)
5484 {
5485 PyObject *bytes;
5486 Py_ssize_t size;
5487 char *s;
5488
5489 if (_Unpickler_Read(self, state, &s, nbytes) < 0)
5490 return -1;
5491
5492 size = calc_binsize(s, nbytes);
5493 if (size < 0) {
5494 PyErr_Format(PyExc_OverflowError,
5495 "BINBYTES exceeds system's maximum size of %zd bytes",
5496 PY_SSIZE_T_MAX);
5497 return -1;
5498 }
5499
5500 bytes = PyBytes_FromStringAndSize(NULL, size);
5501 if (bytes == NULL)
5502 return -1;
5503 if (_Unpickler_ReadInto(state, self, PyBytes_AS_STRING(bytes), size) < 0) {
5504 Py_DECREF(bytes);
5505 return -1;
5506 }
5507
5508 PDATA_PUSH(self->stack, bytes, -1);
5509 return 0;
5510 }
5511
5512 static int
load_counted_bytearray(PickleState * state,UnpicklerObject * self)5513 load_counted_bytearray(PickleState *state, UnpicklerObject *self)
5514 {
5515 PyObject *bytearray;
5516 Py_ssize_t size;
5517 char *s;
5518
5519 if (_Unpickler_Read(self, state, &s, 8) < 0) {
5520 return -1;
5521 }
5522
5523 size = calc_binsize(s, 8);
5524 if (size < 0) {
5525 PyErr_Format(PyExc_OverflowError,
5526 "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5527 PY_SSIZE_T_MAX);
5528 return -1;
5529 }
5530
5531 bytearray = PyByteArray_FromStringAndSize(NULL, size);
5532 if (bytearray == NULL) {
5533 return -1;
5534 }
5535 char *str = PyByteArray_AS_STRING(bytearray);
5536 if (_Unpickler_ReadInto(state, self, str, size) < 0) {
5537 Py_DECREF(bytearray);
5538 return -1;
5539 }
5540
5541 PDATA_PUSH(self->stack, bytearray, -1);
5542 return 0;
5543 }
5544
5545 static int
load_next_buffer(PickleState * st,UnpicklerObject * self)5546 load_next_buffer(PickleState *st, UnpicklerObject *self)
5547 {
5548 if (self->buffers == NULL) {
5549 PyErr_SetString(st->UnpicklingError,
5550 "pickle stream refers to out-of-band data "
5551 "but no *buffers* argument was given");
5552 return -1;
5553 }
5554 PyObject *buf = PyIter_Next(self->buffers);
5555 if (buf == NULL) {
5556 if (!PyErr_Occurred()) {
5557 PyErr_SetString(st->UnpicklingError,
5558 "not enough out-of-band buffers");
5559 }
5560 return -1;
5561 }
5562
5563 PDATA_PUSH(self->stack, buf, -1);
5564 return 0;
5565 }
5566
5567 static int
load_readonly_buffer(PickleState * state,UnpicklerObject * self)5568 load_readonly_buffer(PickleState *state, UnpicklerObject *self)
5569 {
5570 Py_ssize_t len = Py_SIZE(self->stack);
5571 if (len <= self->stack->fence) {
5572 return Pdata_stack_underflow(state, self->stack);
5573 }
5574
5575 PyObject *obj = self->stack->data[len - 1];
5576 PyObject *view = PyMemoryView_FromObject(obj);
5577 if (view == NULL) {
5578 return -1;
5579 }
5580 if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5581 /* Original object is writable */
5582 PyMemoryView_GET_BUFFER(view)->readonly = 1;
5583 self->stack->data[len - 1] = view;
5584 Py_DECREF(obj);
5585 }
5586 else {
5587 /* Original object is read-only, no need to replace it */
5588 Py_DECREF(view);
5589 }
5590 return 0;
5591 }
5592
5593 static int
load_unicode(PickleState * state,UnpicklerObject * self)5594 load_unicode(PickleState *state, UnpicklerObject *self)
5595 {
5596 PyObject *str;
5597 Py_ssize_t len;
5598 char *s = NULL;
5599
5600 if ((len = _Unpickler_Readline(state, self, &s)) < 0)
5601 return -1;
5602 if (len < 1)
5603 return bad_readline(state);
5604
5605 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5606 if (str == NULL)
5607 return -1;
5608
5609 PDATA_PUSH(self->stack, str, -1);
5610 return 0;
5611 }
5612
5613 static int
load_counted_binunicode(PickleState * state,UnpicklerObject * self,int nbytes)5614 load_counted_binunicode(PickleState *state, UnpicklerObject *self, int nbytes)
5615 {
5616 PyObject *str;
5617 Py_ssize_t size;
5618 char *s;
5619
5620 if (_Unpickler_Read(self, state, &s, nbytes) < 0)
5621 return -1;
5622
5623 size = calc_binsize(s, nbytes);
5624 if (size < 0) {
5625 PyErr_Format(PyExc_OverflowError,
5626 "BINUNICODE exceeds system's maximum size of %zd bytes",
5627 PY_SSIZE_T_MAX);
5628 return -1;
5629 }
5630
5631 if (_Unpickler_Read(self, state, &s, size) < 0)
5632 return -1;
5633
5634 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5635 if (str == NULL)
5636 return -1;
5637
5638 PDATA_PUSH(self->stack, str, -1);
5639 return 0;
5640 }
5641
5642 static int
load_counted_tuple(PickleState * state,UnpicklerObject * self,Py_ssize_t len)5643 load_counted_tuple(PickleState *state, UnpicklerObject *self, Py_ssize_t len)
5644 {
5645 PyObject *tuple;
5646
5647 if (Py_SIZE(self->stack) < len)
5648 return Pdata_stack_underflow(state, self->stack);
5649
5650 tuple = Pdata_poptuple(state, self->stack, Py_SIZE(self->stack) - len);
5651 if (tuple == NULL)
5652 return -1;
5653 PDATA_PUSH(self->stack, tuple, -1);
5654 return 0;
5655 }
5656
5657 static int
load_tuple(PickleState * state,UnpicklerObject * self)5658 load_tuple(PickleState *state, UnpicklerObject *self)
5659 {
5660 Py_ssize_t i;
5661
5662 if ((i = marker(state, self)) < 0)
5663 return -1;
5664
5665 return load_counted_tuple(state, self, Py_SIZE(self->stack) - i);
5666 }
5667
5668 static int
load_empty_list(PickleState * state,UnpicklerObject * self)5669 load_empty_list(PickleState *state, UnpicklerObject *self)
5670 {
5671 PyObject *list;
5672
5673 if ((list = PyList_New(0)) == NULL)
5674 return -1;
5675 PDATA_PUSH(self->stack, list, -1);
5676 return 0;
5677 }
5678
5679 static int
load_empty_dict(PickleState * state,UnpicklerObject * self)5680 load_empty_dict(PickleState *state, UnpicklerObject *self)
5681 {
5682 PyObject *dict;
5683
5684 if ((dict = PyDict_New()) == NULL)
5685 return -1;
5686 PDATA_PUSH(self->stack, dict, -1);
5687 return 0;
5688 }
5689
5690 static int
load_empty_set(PickleState * state,UnpicklerObject * self)5691 load_empty_set(PickleState *state, UnpicklerObject *self)
5692 {
5693 PyObject *set;
5694
5695 if ((set = PySet_New(NULL)) == NULL)
5696 return -1;
5697 PDATA_PUSH(self->stack, set, -1);
5698 return 0;
5699 }
5700
5701 static int
load_list(PickleState * state,UnpicklerObject * self)5702 load_list(PickleState *state, UnpicklerObject *self)
5703 {
5704 PyObject *list;
5705 Py_ssize_t i;
5706
5707 if ((i = marker(state, self)) < 0)
5708 return -1;
5709
5710 list = Pdata_poplist(self->stack, i);
5711 if (list == NULL)
5712 return -1;
5713 PDATA_PUSH(self->stack, list, -1);
5714 return 0;
5715 }
5716
5717 static int
load_dict(PickleState * st,UnpicklerObject * self)5718 load_dict(PickleState *st, UnpicklerObject *self)
5719 {
5720 PyObject *dict, *key, *value;
5721 Py_ssize_t i, j, k;
5722
5723 if ((i = marker(st, self)) < 0)
5724 return -1;
5725 j = Py_SIZE(self->stack);
5726
5727 if ((dict = PyDict_New()) == NULL)
5728 return -1;
5729
5730 if ((j - i) % 2 != 0) {
5731 PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5732 Py_DECREF(dict);
5733 return -1;
5734 }
5735
5736 for (k = i + 1; k < j; k += 2) {
5737 key = self->stack->data[k - 1];
5738 value = self->stack->data[k];
5739 if (PyDict_SetItem(dict, key, value) < 0) {
5740 Py_DECREF(dict);
5741 return -1;
5742 }
5743 }
5744 Pdata_clear(self->stack, i);
5745 PDATA_PUSH(self->stack, dict, -1);
5746 return 0;
5747 }
5748
5749 static int
load_frozenset(PickleState * state,UnpicklerObject * self)5750 load_frozenset(PickleState *state, UnpicklerObject *self)
5751 {
5752 PyObject *items;
5753 PyObject *frozenset;
5754 Py_ssize_t i;
5755
5756 if ((i = marker(state, self)) < 0)
5757 return -1;
5758
5759 items = Pdata_poptuple(state, self->stack, i);
5760 if (items == NULL)
5761 return -1;
5762
5763 frozenset = PyFrozenSet_New(items);
5764 Py_DECREF(items);
5765 if (frozenset == NULL)
5766 return -1;
5767
5768 PDATA_PUSH(self->stack, frozenset, -1);
5769 return 0;
5770 }
5771
5772 static PyObject *
instantiate(PyObject * cls,PyObject * args)5773 instantiate(PyObject *cls, PyObject *args)
5774 {
5775 /* Caller must assure args are a tuple. Normally, args come from
5776 Pdata_poptuple which packs objects from the top of the stack
5777 into a newly created tuple. */
5778 assert(PyTuple_Check(args));
5779 if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5780 int rc = PyObject_HasAttrWithError(cls, &_Py_ID(__getinitargs__));
5781 if (rc < 0) {
5782 return NULL;
5783 }
5784 if (!rc) {
5785 return PyObject_CallMethodOneArg(cls, &_Py_ID(__new__), cls);
5786 }
5787 }
5788 return PyObject_CallObject(cls, args);
5789 }
5790
5791 static int
load_obj(PickleState * state,UnpicklerObject * self)5792 load_obj(PickleState *state, UnpicklerObject *self)
5793 {
5794 PyObject *cls, *args, *obj = NULL;
5795 Py_ssize_t i;
5796
5797 if ((i = marker(state, self)) < 0)
5798 return -1;
5799
5800 if (Py_SIZE(self->stack) - i < 1)
5801 return Pdata_stack_underflow(state, self->stack);
5802
5803 args = Pdata_poptuple(state, self->stack, i + 1);
5804 if (args == NULL)
5805 return -1;
5806
5807 PDATA_POP(state, self->stack, cls);
5808 if (cls) {
5809 obj = instantiate(cls, args);
5810 Py_DECREF(cls);
5811 }
5812 Py_DECREF(args);
5813 if (obj == NULL)
5814 return -1;
5815
5816 PDATA_PUSH(self->stack, obj, -1);
5817 return 0;
5818 }
5819
5820 static int
load_inst(PickleState * state,UnpicklerObject * self)5821 load_inst(PickleState *state, UnpicklerObject *self)
5822 {
5823 PyObject *cls = NULL;
5824 PyObject *args = NULL;
5825 PyObject *obj = NULL;
5826 PyObject *module_name;
5827 PyObject *class_name;
5828 Py_ssize_t len;
5829 Py_ssize_t i;
5830 char *s;
5831
5832 if ((i = marker(state, self)) < 0)
5833 return -1;
5834 if ((len = _Unpickler_Readline(state, self, &s)) < 0)
5835 return -1;
5836 if (len < 2)
5837 return bad_readline(state);
5838
5839 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5840 identifiers are permitted in Python 3.0, since the INST opcode is only
5841 supported by older protocols on Python 2.x. */
5842 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5843 if (module_name == NULL)
5844 return -1;
5845
5846 if ((len = _Unpickler_Readline(state, self, &s)) >= 0) {
5847 if (len < 2) {
5848 Py_DECREF(module_name);
5849 return bad_readline(state);
5850 }
5851 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5852 if (class_name != NULL) {
5853 cls = find_class(self, module_name, class_name);
5854 Py_DECREF(class_name);
5855 }
5856 }
5857 Py_DECREF(module_name);
5858
5859 if (cls == NULL)
5860 return -1;
5861
5862 if ((args = Pdata_poptuple(state, self->stack, i)) != NULL) {
5863 obj = instantiate(cls, args);
5864 Py_DECREF(args);
5865 }
5866 Py_DECREF(cls);
5867
5868 if (obj == NULL)
5869 return -1;
5870
5871 PDATA_PUSH(self->stack, obj, -1);
5872 return 0;
5873 }
5874
5875 static void
newobj_unpickling_error(PickleState * st,const char * msg,int use_kwargs,PyObject * arg)5876 newobj_unpickling_error(PickleState *st, const char *msg, int use_kwargs,
5877 PyObject *arg)
5878 {
5879 PyErr_Format(st->UnpicklingError, msg,
5880 use_kwargs ? "NEWOBJ_EX" : "NEWOBJ",
5881 Py_TYPE(arg)->tp_name);
5882 }
5883
5884 static int
load_newobj(PickleState * state,UnpicklerObject * self,int use_kwargs)5885 load_newobj(PickleState *state, UnpicklerObject *self, int use_kwargs)
5886 {
5887 PyObject *cls, *args, *kwargs = NULL;
5888 PyObject *obj;
5889
5890 /* Stack is ... cls args [kwargs], and we want to call
5891 * cls.__new__(cls, *args, **kwargs).
5892 */
5893 if (use_kwargs) {
5894 PDATA_POP(state, self->stack, kwargs);
5895 if (kwargs == NULL) {
5896 return -1;
5897 }
5898 }
5899 PDATA_POP(state, self->stack, args);
5900 if (args == NULL) {
5901 Py_XDECREF(kwargs);
5902 return -1;
5903 }
5904 PDATA_POP(state, self->stack, cls);
5905 if (cls == NULL) {
5906 Py_XDECREF(kwargs);
5907 Py_DECREF(args);
5908 return -1;
5909 }
5910
5911 if (!PyType_Check(cls)) {
5912 newobj_unpickling_error(state,
5913 "%s class argument must be a type, not %.200s",
5914 use_kwargs, cls);
5915 goto error;
5916 }
5917 if (((PyTypeObject *)cls)->tp_new == NULL) {
5918 newobj_unpickling_error(state,
5919 "%s class argument '%.200s' doesn't have __new__",
5920 use_kwargs, cls);
5921 goto error;
5922 }
5923 if (!PyTuple_Check(args)) {
5924 newobj_unpickling_error(state,
5925 "%s args argument must be a tuple, not %.200s",
5926 use_kwargs, args);
5927 goto error;
5928 }
5929 if (use_kwargs && !PyDict_Check(kwargs)) {
5930 newobj_unpickling_error(state,
5931 "%s kwargs argument must be a dict, not %.200s",
5932 use_kwargs, kwargs);
5933 goto error;
5934 }
5935
5936 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5937 if (obj == NULL) {
5938 goto error;
5939 }
5940 Py_XDECREF(kwargs);
5941 Py_DECREF(args);
5942 Py_DECREF(cls);
5943 PDATA_PUSH(self->stack, obj, -1);
5944 return 0;
5945
5946 error:
5947 Py_XDECREF(kwargs);
5948 Py_DECREF(args);
5949 Py_DECREF(cls);
5950 return -1;
5951 }
5952
5953 static int
load_global(PickleState * state,UnpicklerObject * self)5954 load_global(PickleState *state, UnpicklerObject *self)
5955 {
5956 PyObject *global = NULL;
5957 PyObject *module_name;
5958 PyObject *global_name;
5959 Py_ssize_t len;
5960 char *s;
5961
5962 if ((len = _Unpickler_Readline(state, self, &s)) < 0)
5963 return -1;
5964 if (len < 2)
5965 return bad_readline(state);
5966 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5967 if (!module_name)
5968 return -1;
5969
5970 if ((len = _Unpickler_Readline(state, self, &s)) >= 0) {
5971 if (len < 2) {
5972 Py_DECREF(module_name);
5973 return bad_readline(state);
5974 }
5975 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5976 if (global_name) {
5977 global = find_class(self, module_name, global_name);
5978 Py_DECREF(global_name);
5979 }
5980 }
5981 Py_DECREF(module_name);
5982
5983 if (global == NULL)
5984 return -1;
5985 PDATA_PUSH(self->stack, global, -1);
5986 return 0;
5987 }
5988
5989 static int
load_stack_global(PickleState * st,UnpicklerObject * self)5990 load_stack_global(PickleState *st, UnpicklerObject *self)
5991 {
5992 PyObject *global;
5993 PyObject *module_name;
5994 PyObject *global_name;
5995
5996 PDATA_POP(st, self->stack, global_name);
5997 if (global_name == NULL) {
5998 return -1;
5999 }
6000 PDATA_POP(st, self->stack, module_name);
6001 if (module_name == NULL) {
6002 Py_DECREF(global_name);
6003 return -1;
6004 }
6005 if (!PyUnicode_CheckExact(module_name) ||
6006 !PyUnicode_CheckExact(global_name))
6007 {
6008 PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
6009 Py_DECREF(global_name);
6010 Py_DECREF(module_name);
6011 return -1;
6012 }
6013 global = find_class(self, module_name, global_name);
6014 Py_DECREF(global_name);
6015 Py_DECREF(module_name);
6016 if (global == NULL)
6017 return -1;
6018 PDATA_PUSH(self->stack, global, -1);
6019 return 0;
6020 }
6021
6022 static int
load_persid(PickleState * st,UnpicklerObject * self)6023 load_persid(PickleState *st, UnpicklerObject *self)
6024 {
6025 PyObject *pid, *obj;
6026 Py_ssize_t len;
6027 char *s;
6028
6029 if ((len = _Unpickler_Readline(st, self, &s)) < 0)
6030 return -1;
6031 if (len < 1)
6032 return bad_readline(st);
6033
6034 pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6035 if (pid == NULL) {
6036 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6037 PyErr_SetString(st->UnpicklingError,
6038 "persistent IDs in protocol 0 must be "
6039 "ASCII strings");
6040 }
6041 return -1;
6042 }
6043
6044 obj = PyObject_CallOneArg(self->persistent_load, pid);
6045 Py_DECREF(pid);
6046 if (obj == NULL)
6047 return -1;
6048
6049 PDATA_PUSH(self->stack, obj, -1);
6050 return 0;
6051 }
6052
6053 static int
load_binpersid(PickleState * st,UnpicklerObject * self)6054 load_binpersid(PickleState *st, UnpicklerObject *self)
6055 {
6056 PyObject *pid, *obj;
6057
6058 PDATA_POP(st, self->stack, pid);
6059 if (pid == NULL)
6060 return -1;
6061
6062 obj = PyObject_CallOneArg(self->persistent_load, pid);
6063 Py_DECREF(pid);
6064 if (obj == NULL)
6065 return -1;
6066
6067 PDATA_PUSH(self->stack, obj, -1);
6068 return 0;
6069 }
6070
6071 static int
load_pop(PickleState * state,UnpicklerObject * self)6072 load_pop(PickleState *state, UnpicklerObject *self)
6073 {
6074 Py_ssize_t len = Py_SIZE(self->stack);
6075
6076 /* Note that we split the (pickle.py) stack into two stacks,
6077 * an object stack and a mark stack. We have to be clever and
6078 * pop the right one. We do this by looking at the top of the
6079 * mark stack first, and only signalling a stack underflow if
6080 * the object stack is empty and the mark stack doesn't match
6081 * our expectations.
6082 */
6083 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
6084 self->num_marks--;
6085 self->stack->mark_set = self->num_marks != 0;
6086 self->stack->fence = self->num_marks ?
6087 self->marks[self->num_marks - 1] : 0;
6088 } else if (len <= self->stack->fence)
6089 return Pdata_stack_underflow(state, self->stack);
6090 else {
6091 len--;
6092 Py_DECREF(self->stack->data[len]);
6093 Py_SET_SIZE(self->stack, len);
6094 }
6095 return 0;
6096 }
6097
6098 static int
load_pop_mark(PickleState * state,UnpicklerObject * self)6099 load_pop_mark(PickleState *state, UnpicklerObject *self)
6100 {
6101 Py_ssize_t i;
6102 if ((i = marker(state, self)) < 0)
6103 return -1;
6104
6105 Pdata_clear(self->stack, i);
6106
6107 return 0;
6108 }
6109
6110 static int
load_dup(PickleState * state,UnpicklerObject * self)6111 load_dup(PickleState *state, UnpicklerObject *self)
6112 {
6113 PyObject *last;
6114 Py_ssize_t len = Py_SIZE(self->stack);
6115
6116 if (len <= self->stack->fence)
6117 return Pdata_stack_underflow(state, self->stack);
6118 last = self->stack->data[len - 1];
6119 PDATA_APPEND(self->stack, last, -1);
6120 return 0;
6121 }
6122
6123 static int
load_get(PickleState * st,UnpicklerObject * self)6124 load_get(PickleState *st, UnpicklerObject *self)
6125 {
6126 PyObject *key, *value;
6127 Py_ssize_t idx;
6128 Py_ssize_t len;
6129 char *s;
6130
6131 if ((len = _Unpickler_Readline(st, self, &s)) < 0)
6132 return -1;
6133 if (len < 2)
6134 return bad_readline(st);
6135
6136 key = PyLong_FromString(s, NULL, 10);
6137 if (key == NULL)
6138 return -1;
6139 idx = PyLong_AsSsize_t(key);
6140 if (idx == -1 && PyErr_Occurred()) {
6141 Py_DECREF(key);
6142 return -1;
6143 }
6144
6145 value = _Unpickler_MemoGet(self, idx);
6146 if (value == NULL) {
6147 if (!PyErr_Occurred()) {
6148 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6149 }
6150 Py_DECREF(key);
6151 return -1;
6152 }
6153 Py_DECREF(key);
6154
6155 PDATA_APPEND(self->stack, value, -1);
6156 return 0;
6157 }
6158
6159 static int
load_binget(PickleState * st,UnpicklerObject * self)6160 load_binget(PickleState *st, UnpicklerObject *self)
6161 {
6162 PyObject *value;
6163 Py_ssize_t idx;
6164 char *s;
6165
6166 if (_Unpickler_Read(self, st, &s, 1) < 0)
6167 return -1;
6168
6169 idx = Py_CHARMASK(s[0]);
6170
6171 value = _Unpickler_MemoGet(self, idx);
6172 if (value == NULL) {
6173 PyObject *key = PyLong_FromSsize_t(idx);
6174 if (key != NULL) {
6175 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6176 Py_DECREF(key);
6177 }
6178 return -1;
6179 }
6180
6181 PDATA_APPEND(self->stack, value, -1);
6182 return 0;
6183 }
6184
6185 static int
load_long_binget(PickleState * st,UnpicklerObject * self)6186 load_long_binget(PickleState *st, UnpicklerObject *self)
6187 {
6188 PyObject *value;
6189 Py_ssize_t idx;
6190 char *s;
6191
6192 if (_Unpickler_Read(self, st, &s, 4) < 0)
6193 return -1;
6194
6195 idx = calc_binsize(s, 4);
6196
6197 value = _Unpickler_MemoGet(self, idx);
6198 if (value == NULL) {
6199 PyObject *key = PyLong_FromSsize_t(idx);
6200 if (key != NULL) {
6201 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6202 Py_DECREF(key);
6203 }
6204 return -1;
6205 }
6206
6207 PDATA_APPEND(self->stack, value, -1);
6208 return 0;
6209 }
6210
6211 /* Push an object from the extension registry (EXT[124]). nbytes is
6212 * the number of bytes following the opcode, holding the index (code) value.
6213 */
6214 static int
load_extension(PickleState * st,UnpicklerObject * self,int nbytes)6215 load_extension(PickleState *st, UnpicklerObject *self, int nbytes)
6216 {
6217 char *codebytes; /* the nbytes bytes after the opcode */
6218 long code; /* calc_binint returns long */
6219 PyObject *py_code; /* code as a Python int */
6220 PyObject *obj; /* the object to push */
6221 PyObject *pair; /* (module_name, class_name) */
6222 PyObject *module_name, *class_name;
6223
6224 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
6225 if (_Unpickler_Read(self, st, &codebytes, nbytes) < 0)
6226 return -1;
6227 code = calc_binint(codebytes, nbytes);
6228 if (code <= 0) { /* note that 0 is forbidden */
6229 /* Corrupt or hostile pickle. */
6230 PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
6231 return -1;
6232 }
6233
6234 /* Look for the code in the cache. */
6235 py_code = PyLong_FromLong(code);
6236 if (py_code == NULL)
6237 return -1;
6238 obj = PyDict_GetItemWithError(st->extension_cache, py_code);
6239 if (obj != NULL) {
6240 /* Bingo. */
6241 Py_DECREF(py_code);
6242 PDATA_APPEND(self->stack, obj, -1);
6243 return 0;
6244 }
6245 if (PyErr_Occurred()) {
6246 Py_DECREF(py_code);
6247 return -1;
6248 }
6249
6250 /* Look up the (module_name, class_name) pair. */
6251 pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
6252 if (pair == NULL) {
6253 Py_DECREF(py_code);
6254 if (!PyErr_Occurred()) {
6255 PyErr_Format(PyExc_ValueError, "unregistered extension "
6256 "code %ld", code);
6257 }
6258 return -1;
6259 }
6260 /* Since the extension registry is manipulable via Python code,
6261 * confirm that pair is really a 2-tuple of strings.
6262 */
6263 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6264 goto error;
6265 }
6266
6267 module_name = PyTuple_GET_ITEM(pair, 0);
6268 if (!PyUnicode_Check(module_name)) {
6269 goto error;
6270 }
6271
6272 class_name = PyTuple_GET_ITEM(pair, 1);
6273 if (!PyUnicode_Check(class_name)) {
6274 goto error;
6275 }
6276
6277 /* Load the object. */
6278 obj = find_class(self, module_name, class_name);
6279 if (obj == NULL) {
6280 Py_DECREF(py_code);
6281 return -1;
6282 }
6283 /* Cache code -> obj. */
6284 code = PyDict_SetItem(st->extension_cache, py_code, obj);
6285 Py_DECREF(py_code);
6286 if (code < 0) {
6287 Py_DECREF(obj);
6288 return -1;
6289 }
6290 PDATA_PUSH(self->stack, obj, -1);
6291 return 0;
6292
6293 error:
6294 Py_DECREF(py_code);
6295 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6296 "isn't a 2-tuple of strings", code);
6297 return -1;
6298 }
6299
6300 static int
load_put(PickleState * state,UnpicklerObject * self)6301 load_put(PickleState *state, UnpicklerObject *self)
6302 {
6303 PyObject *key, *value;
6304 Py_ssize_t idx;
6305 Py_ssize_t len;
6306 char *s = NULL;
6307
6308 if ((len = _Unpickler_Readline(state, self, &s)) < 0)
6309 return -1;
6310 if (len < 2)
6311 return bad_readline(state);
6312 if (Py_SIZE(self->stack) <= self->stack->fence)
6313 return Pdata_stack_underflow(state, self->stack);
6314 value = self->stack->data[Py_SIZE(self->stack) - 1];
6315
6316 key = PyLong_FromString(s, NULL, 10);
6317 if (key == NULL)
6318 return -1;
6319 idx = PyLong_AsSsize_t(key);
6320 Py_DECREF(key);
6321 if (idx < 0) {
6322 if (!PyErr_Occurred())
6323 PyErr_SetString(PyExc_ValueError,
6324 "negative PUT argument");
6325 return -1;
6326 }
6327
6328 return _Unpickler_MemoPut(self, idx, value);
6329 }
6330
6331 static int
load_binput(PickleState * state,UnpicklerObject * self)6332 load_binput(PickleState *state, UnpicklerObject *self)
6333 {
6334 PyObject *value;
6335 Py_ssize_t idx;
6336 char *s;
6337
6338 if (_Unpickler_Read(self, state, &s, 1) < 0)
6339 return -1;
6340
6341 if (Py_SIZE(self->stack) <= self->stack->fence)
6342 return Pdata_stack_underflow(state, self->stack);
6343 value = self->stack->data[Py_SIZE(self->stack) - 1];
6344
6345 idx = Py_CHARMASK(s[0]);
6346
6347 return _Unpickler_MemoPut(self, idx, value);
6348 }
6349
6350 static int
load_long_binput(PickleState * state,UnpicklerObject * self)6351 load_long_binput(PickleState *state, UnpicklerObject *self)
6352 {
6353 PyObject *value;
6354 Py_ssize_t idx;
6355 char *s;
6356
6357 if (_Unpickler_Read(self, state, &s, 4) < 0)
6358 return -1;
6359
6360 if (Py_SIZE(self->stack) <= self->stack->fence)
6361 return Pdata_stack_underflow(state, self->stack);
6362 value = self->stack->data[Py_SIZE(self->stack) - 1];
6363
6364 idx = calc_binsize(s, 4);
6365 if (idx < 0) {
6366 PyErr_SetString(PyExc_ValueError,
6367 "negative LONG_BINPUT argument");
6368 return -1;
6369 }
6370
6371 return _Unpickler_MemoPut(self, idx, value);
6372 }
6373
6374 static int
load_memoize(PickleState * state,UnpicklerObject * self)6375 load_memoize(PickleState *state, UnpicklerObject *self)
6376 {
6377 PyObject *value;
6378
6379 if (Py_SIZE(self->stack) <= self->stack->fence)
6380 return Pdata_stack_underflow(state, self->stack);
6381 value = self->stack->data[Py_SIZE(self->stack) - 1];
6382
6383 return _Unpickler_MemoPut(self, self->memo_len, value);
6384 }
6385
6386 static int
do_append(PickleState * state,UnpicklerObject * self,Py_ssize_t x)6387 do_append(PickleState *state, UnpicklerObject *self, Py_ssize_t x)
6388 {
6389 PyObject *value;
6390 PyObject *slice;
6391 PyObject *list;
6392 PyObject *result;
6393 Py_ssize_t len, i;
6394
6395 len = Py_SIZE(self->stack);
6396 if (x > len || x <= self->stack->fence)
6397 return Pdata_stack_underflow(state, self->stack);
6398 if (len == x) /* nothing to do */
6399 return 0;
6400
6401 list = self->stack->data[x - 1];
6402
6403 if (PyList_CheckExact(list)) {
6404 Py_ssize_t list_len;
6405 int ret;
6406
6407 slice = Pdata_poplist(self->stack, x);
6408 if (!slice)
6409 return -1;
6410 list_len = PyList_GET_SIZE(list);
6411 ret = PyList_SetSlice(list, list_len, list_len, slice);
6412 Py_DECREF(slice);
6413 return ret;
6414 }
6415 else {
6416 PyObject *extend_func;
6417
6418 if (PyObject_GetOptionalAttr(list, &_Py_ID(extend), &extend_func) < 0) {
6419 return -1;
6420 }
6421 if (extend_func != NULL) {
6422 slice = Pdata_poplist(self->stack, x);
6423 if (!slice) {
6424 Py_DECREF(extend_func);
6425 return -1;
6426 }
6427 result = _Pickle_FastCall(extend_func, slice);
6428 Py_DECREF(extend_func);
6429 if (result == NULL)
6430 return -1;
6431 Py_DECREF(result);
6432 }
6433 else {
6434 PyObject *append_func;
6435
6436 /* Even if the PEP 307 requires extend() and append() methods,
6437 fall back on append() if the object has no extend() method
6438 for backward compatibility. */
6439 append_func = PyObject_GetAttr(list, &_Py_ID(append));
6440 if (append_func == NULL)
6441 return -1;
6442 for (i = x; i < len; i++) {
6443 value = self->stack->data[i];
6444 result = _Pickle_FastCall(append_func, value);
6445 if (result == NULL) {
6446 Pdata_clear(self->stack, i + 1);
6447 Py_SET_SIZE(self->stack, x);
6448 Py_DECREF(append_func);
6449 return -1;
6450 }
6451 Py_DECREF(result);
6452 }
6453 Py_SET_SIZE(self->stack, x);
6454 Py_DECREF(append_func);
6455 }
6456 }
6457
6458 return 0;
6459 }
6460
6461 static int
load_append(PickleState * state,UnpicklerObject * self)6462 load_append(PickleState *state, UnpicklerObject *self)
6463 {
6464 if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6465 return Pdata_stack_underflow(state, self->stack);
6466 return do_append(state, self, Py_SIZE(self->stack) - 1);
6467 }
6468
6469 static int
load_appends(PickleState * state,UnpicklerObject * self)6470 load_appends(PickleState *state, UnpicklerObject *self)
6471 {
6472 Py_ssize_t i = marker(state, self);
6473 if (i < 0)
6474 return -1;
6475 return do_append(state, self, i);
6476 }
6477
6478 static int
do_setitems(PickleState * st,UnpicklerObject * self,Py_ssize_t x)6479 do_setitems(PickleState *st, UnpicklerObject *self, Py_ssize_t x)
6480 {
6481 PyObject *value, *key;
6482 PyObject *dict;
6483 Py_ssize_t len, i;
6484 int status = 0;
6485
6486 len = Py_SIZE(self->stack);
6487 if (x > len || x <= self->stack->fence)
6488 return Pdata_stack_underflow(st, self->stack);
6489 if (len == x) /* nothing to do */
6490 return 0;
6491 if ((len - x) % 2 != 0) {
6492 /* Corrupt or hostile pickle -- we never write one like this. */
6493 PyErr_SetString(st->UnpicklingError,
6494 "odd number of items for SETITEMS");
6495 return -1;
6496 }
6497
6498 /* Here, dict does not actually need to be a PyDict; it could be anything
6499 that supports the __setitem__ attribute. */
6500 dict = self->stack->data[x - 1];
6501
6502 for (i = x + 1; i < len; i += 2) {
6503 key = self->stack->data[i - 1];
6504 value = self->stack->data[i];
6505 if (PyObject_SetItem(dict, key, value) < 0) {
6506 status = -1;
6507 break;
6508 }
6509 }
6510
6511 Pdata_clear(self->stack, x);
6512 return status;
6513 }
6514
6515 static int
load_setitem(PickleState * state,UnpicklerObject * self)6516 load_setitem(PickleState *state, UnpicklerObject *self)
6517 {
6518 return do_setitems(state, self, Py_SIZE(self->stack) - 2);
6519 }
6520
6521 static int
load_setitems(PickleState * state,UnpicklerObject * self)6522 load_setitems(PickleState *state, UnpicklerObject *self)
6523 {
6524 Py_ssize_t i = marker(state, self);
6525 if (i < 0)
6526 return -1;
6527 return do_setitems(state, self, i);
6528 }
6529
6530 static int
load_additems(PickleState * state,UnpicklerObject * self)6531 load_additems(PickleState *state, UnpicklerObject *self)
6532 {
6533 PyObject *set;
6534 Py_ssize_t mark, len, i;
6535
6536 mark = marker(state, self);
6537 if (mark < 0)
6538 return -1;
6539 len = Py_SIZE(self->stack);
6540 if (mark > len || mark <= self->stack->fence)
6541 return Pdata_stack_underflow(state, self->stack);
6542 if (len == mark) /* nothing to do */
6543 return 0;
6544
6545 set = self->stack->data[mark - 1];
6546
6547 if (PySet_Check(set)) {
6548 PyObject *items;
6549 int status;
6550
6551 items = Pdata_poptuple(state, self->stack, mark);
6552 if (items == NULL)
6553 return -1;
6554
6555 status = _PySet_Update(set, items);
6556 Py_DECREF(items);
6557 return status;
6558 }
6559 else {
6560 PyObject *add_func;
6561
6562 add_func = PyObject_GetAttr(set, &_Py_ID(add));
6563 if (add_func == NULL)
6564 return -1;
6565 for (i = mark; i < len; i++) {
6566 PyObject *result;
6567 PyObject *item;
6568
6569 item = self->stack->data[i];
6570 result = _Pickle_FastCall(add_func, item);
6571 if (result == NULL) {
6572 Pdata_clear(self->stack, i + 1);
6573 Py_SET_SIZE(self->stack, mark);
6574 Py_DECREF(add_func);
6575 return -1;
6576 }
6577 Py_DECREF(result);
6578 }
6579 Py_SET_SIZE(self->stack, mark);
6580 Py_DECREF(add_func);
6581 }
6582
6583 return 0;
6584 }
6585
6586 static int
load_build(PickleState * st,UnpicklerObject * self)6587 load_build(PickleState *st, UnpicklerObject *self)
6588 {
6589 PyObject *inst, *slotstate;
6590 PyObject *setstate;
6591 int status = 0;
6592
6593 /* Stack is ... instance, state. We want to leave instance at
6594 * the stack top, possibly mutated via instance.__setstate__(state).
6595 */
6596 if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6597 return Pdata_stack_underflow(st, self->stack);
6598
6599 PyObject *state;
6600 PDATA_POP(st, self->stack, state);
6601 if (state == NULL)
6602 return -1;
6603
6604 inst = self->stack->data[Py_SIZE(self->stack) - 1];
6605
6606 if (PyObject_GetOptionalAttr(inst, &_Py_ID(__setstate__), &setstate) < 0) {
6607 Py_DECREF(state);
6608 return -1;
6609 }
6610 if (setstate != NULL) {
6611 PyObject *result;
6612
6613 /* The explicit __setstate__ is responsible for everything. */
6614 result = _Pickle_FastCall(setstate, state);
6615 Py_DECREF(setstate);
6616 if (result == NULL)
6617 return -1;
6618 Py_DECREF(result);
6619 return 0;
6620 }
6621
6622 /* A default __setstate__. First see whether state embeds a
6623 * slot state dict too (a proto 2 addition).
6624 */
6625 if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6626 PyObject *tmp = state;
6627
6628 state = PyTuple_GET_ITEM(tmp, 0);
6629 slotstate = PyTuple_GET_ITEM(tmp, 1);
6630 Py_INCREF(state);
6631 Py_INCREF(slotstate);
6632 Py_DECREF(tmp);
6633 }
6634 else
6635 slotstate = NULL;
6636
6637 /* Set inst.__dict__ from the state dict (if any). */
6638 if (state != Py_None) {
6639 PyObject *dict;
6640 PyObject *d_key, *d_value;
6641 Py_ssize_t i;
6642
6643 if (!PyDict_Check(state)) {
6644 PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6645 goto error;
6646 }
6647 dict = PyObject_GetAttr(inst, &_Py_ID(__dict__));
6648 if (dict == NULL)
6649 goto error;
6650
6651 i = 0;
6652 while (PyDict_Next(state, &i, &d_key, &d_value)) {
6653 /* normally the keys for instance attributes are
6654 interned. we should try to do that here. */
6655 Py_INCREF(d_key);
6656 if (PyUnicode_CheckExact(d_key)) {
6657 PyInterpreterState *interp = _PyInterpreterState_GET();
6658 _PyUnicode_InternMortal(interp, &d_key);
6659 }
6660 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6661 Py_DECREF(d_key);
6662 Py_DECREF(dict);
6663 goto error;
6664 }
6665 Py_DECREF(d_key);
6666 }
6667 Py_DECREF(dict);
6668 }
6669
6670 /* Also set instance attributes from the slotstate dict (if any). */
6671 if (slotstate != NULL) {
6672 PyObject *d_key, *d_value;
6673 Py_ssize_t i;
6674
6675 if (!PyDict_Check(slotstate)) {
6676 PyErr_SetString(st->UnpicklingError,
6677 "slot state is not a dictionary");
6678 goto error;
6679 }
6680 i = 0;
6681 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6682 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6683 goto error;
6684 }
6685 }
6686
6687 if (0) {
6688 error:
6689 status = -1;
6690 }
6691
6692 Py_DECREF(state);
6693 Py_XDECREF(slotstate);
6694 return status;
6695 }
6696
6697 static int
load_mark(PickleState * state,UnpicklerObject * self)6698 load_mark(PickleState *state, UnpicklerObject *self)
6699 {
6700
6701 /* Note that we split the (pickle.py) stack into two stacks, an
6702 * object stack and a mark stack. Here we push a mark onto the
6703 * mark stack.
6704 */
6705
6706 if (self->num_marks >= self->marks_size) {
6707 size_t alloc = ((size_t)self->num_marks << 1) + 20;
6708 Py_ssize_t *marks_new = self->marks;
6709 PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6710 if (marks_new == NULL) {
6711 PyErr_NoMemory();
6712 return -1;
6713 }
6714 self->marks = marks_new;
6715 self->marks_size = (Py_ssize_t)alloc;
6716 }
6717
6718 self->stack->mark_set = 1;
6719 self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6720
6721 return 0;
6722 }
6723
6724 static int
load_reduce(PickleState * state,UnpicklerObject * self)6725 load_reduce(PickleState *state, UnpicklerObject *self)
6726 {
6727 PyObject *callable = NULL;
6728 PyObject *argtup = NULL;
6729 PyObject *obj = NULL;
6730
6731 PDATA_POP(state, self->stack, argtup);
6732 if (argtup == NULL)
6733 return -1;
6734 PDATA_POP(state, self->stack, callable);
6735 if (callable) {
6736 obj = PyObject_CallObject(callable, argtup);
6737 Py_DECREF(callable);
6738 }
6739 Py_DECREF(argtup);
6740
6741 if (obj == NULL)
6742 return -1;
6743
6744 PDATA_PUSH(self->stack, obj, -1);
6745 return 0;
6746 }
6747
6748 /* Just raises an error if we don't know the protocol specified. PROTO
6749 * is the first opcode for protocols >= 2.
6750 */
6751 static int
load_proto(PickleState * state,UnpicklerObject * self)6752 load_proto(PickleState *state, UnpicklerObject *self)
6753 {
6754 char *s;
6755 int i;
6756
6757 if (_Unpickler_Read(self, state, &s, 1) < 0)
6758 return -1;
6759
6760 i = (unsigned char)s[0];
6761 if (i <= HIGHEST_PROTOCOL) {
6762 self->proto = i;
6763 return 0;
6764 }
6765
6766 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6767 return -1;
6768 }
6769
6770 static int
load_frame(PickleState * state,UnpicklerObject * self)6771 load_frame(PickleState *state, UnpicklerObject *self)
6772 {
6773 char *s;
6774 Py_ssize_t frame_len;
6775
6776 if (_Unpickler_Read(self, state, &s, 8) < 0)
6777 return -1;
6778
6779 frame_len = calc_binsize(s, 8);
6780 if (frame_len < 0) {
6781 PyErr_Format(PyExc_OverflowError,
6782 "FRAME length exceeds system's maximum of %zd bytes",
6783 PY_SSIZE_T_MAX);
6784 return -1;
6785 }
6786
6787 if (_Unpickler_Read(self, state, &s, frame_len) < 0)
6788 return -1;
6789
6790 /* Rewind to start of frame */
6791 self->next_read_idx -= frame_len;
6792 return 0;
6793 }
6794
6795 static PyObject *
load(PickleState * st,UnpicklerObject * self)6796 load(PickleState *st, UnpicklerObject *self)
6797 {
6798 PyObject *value = NULL;
6799 PyObject *tmp;
6800 char *s = NULL;
6801
6802 self->num_marks = 0;
6803 self->stack->mark_set = 0;
6804 self->stack->fence = 0;
6805 self->proto = 0;
6806 if (Py_SIZE(self->stack))
6807 Pdata_clear(self->stack, 0);
6808
6809 /* Cache the persistent_load method. */
6810 tmp = PyObject_GetAttr((PyObject *)self, &_Py_ID(persistent_load));
6811 if (tmp == NULL) {
6812 goto error;
6813 }
6814 Py_XSETREF(self->persistent_load, tmp);
6815
6816 /* Convenient macros for the dispatch while-switch loop just below. */
6817 #define OP(opcode, load_func) \
6818 case opcode: if (load_func(st, self) < 0) break; continue;
6819
6820 #define OP_ARG(opcode, load_func, arg) \
6821 case opcode: if (load_func(st, self, (arg)) < 0) break; continue;
6822
6823 while (1) {
6824 if (_Unpickler_Read(self, st, &s, 1) < 0) {
6825 if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6826 PyErr_Format(PyExc_EOFError, "Ran out of input");
6827 }
6828 goto error;
6829 }
6830
6831 switch ((enum opcode)s[0]) {
6832 OP(NONE, load_none)
6833 OP(BININT, load_binint)
6834 OP(BININT1, load_binint1)
6835 OP(BININT2, load_binint2)
6836 OP(INT, load_int)
6837 OP(LONG, load_long)
6838 OP_ARG(LONG1, load_counted_long, 1)
6839 OP_ARG(LONG4, load_counted_long, 4)
6840 OP(FLOAT, load_float)
6841 OP(BINFLOAT, load_binfloat)
6842 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6843 OP_ARG(BINBYTES, load_counted_binbytes, 4)
6844 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6845 OP(BYTEARRAY8, load_counted_bytearray)
6846 OP(NEXT_BUFFER, load_next_buffer)
6847 OP(READONLY_BUFFER, load_readonly_buffer)
6848 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6849 OP_ARG(BINSTRING, load_counted_binstring, 4)
6850 OP(STRING, load_string)
6851 OP(UNICODE, load_unicode)
6852 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6853 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6854 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6855 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6856 OP_ARG(TUPLE1, load_counted_tuple, 1)
6857 OP_ARG(TUPLE2, load_counted_tuple, 2)
6858 OP_ARG(TUPLE3, load_counted_tuple, 3)
6859 OP(TUPLE, load_tuple)
6860 OP(EMPTY_LIST, load_empty_list)
6861 OP(LIST, load_list)
6862 OP(EMPTY_DICT, load_empty_dict)
6863 OP(DICT, load_dict)
6864 OP(EMPTY_SET, load_empty_set)
6865 OP(ADDITEMS, load_additems)
6866 OP(FROZENSET, load_frozenset)
6867 OP(OBJ, load_obj)
6868 OP(INST, load_inst)
6869 OP_ARG(NEWOBJ, load_newobj, 0)
6870 OP_ARG(NEWOBJ_EX, load_newobj, 1)
6871 OP(GLOBAL, load_global)
6872 OP(STACK_GLOBAL, load_stack_global)
6873 OP(APPEND, load_append)
6874 OP(APPENDS, load_appends)
6875 OP(BUILD, load_build)
6876 OP(DUP, load_dup)
6877 OP(BINGET, load_binget)
6878 OP(LONG_BINGET, load_long_binget)
6879 OP(GET, load_get)
6880 OP(MARK, load_mark)
6881 OP(BINPUT, load_binput)
6882 OP(LONG_BINPUT, load_long_binput)
6883 OP(PUT, load_put)
6884 OP(MEMOIZE, load_memoize)
6885 OP(POP, load_pop)
6886 OP(POP_MARK, load_pop_mark)
6887 OP(SETITEM, load_setitem)
6888 OP(SETITEMS, load_setitems)
6889 OP(PERSID, load_persid)
6890 OP(BINPERSID, load_binpersid)
6891 OP(REDUCE, load_reduce)
6892 OP(PROTO, load_proto)
6893 OP(FRAME, load_frame)
6894 OP_ARG(EXT1, load_extension, 1)
6895 OP_ARG(EXT2, load_extension, 2)
6896 OP_ARG(EXT4, load_extension, 4)
6897 OP_ARG(NEWTRUE, load_bool, Py_True)
6898 OP_ARG(NEWFALSE, load_bool, Py_False)
6899
6900 case STOP:
6901 break;
6902
6903 default:
6904 {
6905 unsigned char c = (unsigned char) *s;
6906 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6907 PyErr_Format(st->UnpicklingError,
6908 "invalid load key, '%c'.", c);
6909 }
6910 else {
6911 PyErr_Format(st->UnpicklingError,
6912 "invalid load key, '\\x%02x'.", c);
6913 }
6914 goto error;
6915 }
6916 }
6917
6918 break; /* and we are done! */
6919 }
6920
6921 if (PyErr_Occurred()) {
6922 goto error;
6923 }
6924
6925 if (_Unpickler_SkipConsumed(self) < 0)
6926 goto error;
6927
6928 Py_CLEAR(self->persistent_load);
6929 PDATA_POP(st, self->stack, value);
6930 return value;
6931
6932 error:
6933 Py_CLEAR(self->persistent_load);
6934 return NULL;
6935 }
6936
6937 /*[clinic input]
6938
6939 _pickle.Unpickler.persistent_load
6940
6941 cls: defining_class
6942 pid: object
6943 /
6944
6945 [clinic start generated code]*/
6946
6947 static PyObject *
_pickle_Unpickler_persistent_load_impl(UnpicklerObject * self,PyTypeObject * cls,PyObject * pid)6948 _pickle_Unpickler_persistent_load_impl(UnpicklerObject *self,
6949 PyTypeObject *cls, PyObject *pid)
6950 /*[clinic end generated code: output=9f4706f1330cb14d input=2f9554fae051276e]*/
6951 {
6952 PickleState *st = _Pickle_GetStateByClass(cls);
6953 PyErr_SetString(st->UnpicklingError,
6954 "A load persistent id instruction was encountered, "
6955 "but no persistent_load function was specified.");
6956 return NULL;
6957 }
6958
6959 /*[clinic input]
6960
6961 _pickle.Unpickler.load
6962
6963 cls: defining_class
6964
6965 Load a pickle.
6966
6967 Read a pickled object representation from the open file object given
6968 in the constructor, and return the reconstituted object hierarchy
6969 specified therein.
6970 [clinic start generated code]*/
6971
6972 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self,PyTypeObject * cls)6973 _pickle_Unpickler_load_impl(UnpicklerObject *self, PyTypeObject *cls)
6974 /*[clinic end generated code: output=cc88168f608e3007 input=f5d2f87e61d5f07f]*/
6975 {
6976 UnpicklerObject *unpickler = (UnpicklerObject*)self;
6977
6978 PickleState *st = _Pickle_GetStateByClass(cls);
6979
6980 /* Check whether the Unpickler was initialized correctly. This prevents
6981 segfaulting if a subclass overridden __init__ with a function that does
6982 not call Unpickler.__init__(). Here, we simply ensure that self->read
6983 is not NULL. */
6984 if (unpickler->read == NULL) {
6985 PyErr_Format(st->UnpicklingError,
6986 "Unpickler.__init__() was not called by %s.__init__()",
6987 Py_TYPE(unpickler)->tp_name);
6988 return NULL;
6989 }
6990
6991 return load(st, unpickler);
6992 }
6993
6994 /* The name of find_class() is misleading. In newer pickle protocols, this
6995 function is used for loading any global (i.e., functions), not just
6996 classes. The name is kept only for backward compatibility. */
6997
6998 /*[clinic input]
6999
7000 _pickle.Unpickler.find_class
7001
7002 cls: defining_class
7003 module_name: object
7004 global_name: object
7005 /
7006
7007 Return an object from a specified module.
7008
7009 If necessary, the module will be imported. Subclasses may override
7010 this method (e.g. to restrict unpickling of arbitrary classes and
7011 functions).
7012
7013 This method is called whenever a class or a function object is
7014 needed. Both arguments passed are str objects.
7015 [clinic start generated code]*/
7016
7017 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyTypeObject * cls,PyObject * module_name,PyObject * global_name)7018 _pickle_Unpickler_find_class_impl(UnpicklerObject *self, PyTypeObject *cls,
7019 PyObject *module_name,
7020 PyObject *global_name)
7021 /*[clinic end generated code: output=99577948abb0be81 input=9577745719219fc7]*/
7022 {
7023 PyObject *global;
7024 PyObject *module;
7025
7026 if (PySys_Audit("pickle.find_class", "OO",
7027 module_name, global_name) < 0) {
7028 return NULL;
7029 }
7030
7031 /* Try to map the old names used in Python 2.x to the new ones used in
7032 Python 3.x. We do this only with old pickle protocols and when the
7033 user has not disabled the feature. */
7034 if (self->proto < 3 && self->fix_imports) {
7035 PyObject *key;
7036 PyObject *item;
7037 PickleState *st = _Pickle_GetStateByClass(cls);
7038
7039 /* Check if the global (i.e., a function or a class) was renamed
7040 or moved to another module. */
7041 key = PyTuple_Pack(2, module_name, global_name);
7042 if (key == NULL)
7043 return NULL;
7044 item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
7045 Py_DECREF(key);
7046 if (item) {
7047 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7048 PyErr_Format(PyExc_RuntimeError,
7049 "_compat_pickle.NAME_MAPPING values should be "
7050 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7051 return NULL;
7052 }
7053 module_name = PyTuple_GET_ITEM(item, 0);
7054 global_name = PyTuple_GET_ITEM(item, 1);
7055 if (!PyUnicode_Check(module_name) ||
7056 !PyUnicode_Check(global_name)) {
7057 PyErr_Format(PyExc_RuntimeError,
7058 "_compat_pickle.NAME_MAPPING values should be "
7059 "pairs of str, not (%.200s, %.200s)",
7060 Py_TYPE(module_name)->tp_name,
7061 Py_TYPE(global_name)->tp_name);
7062 return NULL;
7063 }
7064 }
7065 else if (PyErr_Occurred()) {
7066 return NULL;
7067 }
7068 else {
7069 /* Check if the module was renamed. */
7070 item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7071 if (item) {
7072 if (!PyUnicode_Check(item)) {
7073 PyErr_Format(PyExc_RuntimeError,
7074 "_compat_pickle.IMPORT_MAPPING values should be "
7075 "strings, not %.200s", Py_TYPE(item)->tp_name);
7076 return NULL;
7077 }
7078 module_name = item;
7079 }
7080 else if (PyErr_Occurred()) {
7081 return NULL;
7082 }
7083 }
7084 }
7085
7086 /*
7087 * we don't use PyImport_GetModule here, because it can return partially-
7088 * initialised modules, which then cause the getattribute to fail.
7089 */
7090 module = PyImport_Import(module_name);
7091 if (module == NULL) {
7092 return NULL;
7093 }
7094 global = getattribute(module, global_name, self->proto >= 4);
7095 Py_DECREF(module);
7096 return global;
7097 }
7098
7099 /*[clinic input]
7100
7101 _pickle.Unpickler.__sizeof__ -> size_t
7102
7103 Returns size in memory, in bytes.
7104 [clinic start generated code]*/
7105
7106 static size_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)7107 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7108 /*[clinic end generated code: output=4648d84c228196df input=27180b2b6b524012]*/
7109 {
7110 size_t res = _PyObject_SIZE(Py_TYPE(self));
7111 if (self->memo != NULL)
7112 res += self->memo_size * sizeof(PyObject *);
7113 if (self->marks != NULL)
7114 res += (size_t)self->marks_size * sizeof(Py_ssize_t);
7115 if (self->input_line != NULL)
7116 res += strlen(self->input_line) + 1;
7117 if (self->encoding != NULL)
7118 res += strlen(self->encoding) + 1;
7119 if (self->errors != NULL)
7120 res += strlen(self->errors) + 1;
7121 return res;
7122 }
7123
7124 static struct PyMethodDef Unpickler_methods[] = {
7125 _PICKLE_UNPICKLER_PERSISTENT_LOAD_METHODDEF
7126 _PICKLE_UNPICKLER_LOAD_METHODDEF
7127 _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
7128 _PICKLE_UNPICKLER___SIZEOF___METHODDEF
7129 {NULL, NULL} /* sentinel */
7130 };
7131
7132 static int
Unpickler_clear(UnpicklerObject * self)7133 Unpickler_clear(UnpicklerObject *self)
7134 {
7135 Py_CLEAR(self->readline);
7136 Py_CLEAR(self->readinto);
7137 Py_CLEAR(self->read);
7138 Py_CLEAR(self->peek);
7139 Py_CLEAR(self->stack);
7140 Py_CLEAR(self->persistent_load);
7141 Py_CLEAR(self->buffers);
7142 if (self->buffer.buf != NULL) {
7143 PyBuffer_Release(&self->buffer);
7144 self->buffer.buf = NULL;
7145 }
7146
7147 _Unpickler_MemoCleanup(self);
7148 PyMem_Free(self->marks);
7149 self->marks = NULL;
7150 PyMem_Free(self->input_line);
7151 self->input_line = NULL;
7152 PyMem_Free(self->encoding);
7153 self->encoding = NULL;
7154 PyMem_Free(self->errors);
7155 self->errors = NULL;
7156
7157 return 0;
7158 }
7159
7160 static void
Unpickler_dealloc(UnpicklerObject * self)7161 Unpickler_dealloc(UnpicklerObject *self)
7162 {
7163 PyTypeObject *tp = Py_TYPE(self);
7164 PyObject_GC_UnTrack((PyObject *)self);
7165 (void)Unpickler_clear(self);
7166 tp->tp_free((PyObject *)self);
7167 Py_DECREF(tp);
7168 }
7169
7170 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)7171 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7172 {
7173 Py_VISIT(Py_TYPE(self));
7174 Py_VISIT(self->readline);
7175 Py_VISIT(self->readinto);
7176 Py_VISIT(self->read);
7177 Py_VISIT(self->peek);
7178 Py_VISIT(self->stack);
7179 Py_VISIT(self->persistent_load);
7180 Py_VISIT(self->buffers);
7181 PyObject **memo = self->memo;
7182 if (memo) {
7183 Py_ssize_t i = self->memo_size;
7184 while (--i >= 0) {
7185 Py_VISIT(memo[i]);
7186 }
7187 }
7188 return 0;
7189 }
7190
7191 /*[clinic input]
7192
7193 _pickle.Unpickler.__init__
7194
7195 file: object
7196 *
7197 fix_imports: bool = True
7198 encoding: str = 'ASCII'
7199 errors: str = 'strict'
7200 buffers: object(c_default="NULL") = ()
7201
7202 This takes a binary file for reading a pickle data stream.
7203
7204 The protocol version of the pickle is detected automatically, so no
7205 protocol argument is needed. Bytes past the pickled object's
7206 representation are ignored.
7207
7208 The argument *file* must have two methods, a read() method that takes
7209 an integer argument, and a readline() method that requires no
7210 arguments. Both methods should return bytes. Thus *file* can be a
7211 binary file object opened for reading, an io.BytesIO object, or any
7212 other custom object that meets this interface.
7213
7214 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7215 which are used to control compatibility support for pickle stream
7216 generated by Python 2. If *fix_imports* is True, pickle will try to
7217 map the old Python 2 names to the new names used in Python 3. The
7218 *encoding* and *errors* tell pickle how to decode 8-bit string
7219 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7220 respectively. The *encoding* can be 'bytes' to read these 8-bit
7221 string instances as bytes objects.
7222 [clinic start generated code]*/
7223
7224 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7225 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7226 int fix_imports, const char *encoding,
7227 const char *errors, PyObject *buffers)
7228 /*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
7229 {
7230 /* In case of multiple __init__() calls, clear previous content. */
7231 if (self->read != NULL)
7232 (void)Unpickler_clear(self);
7233
7234 if (_Unpickler_SetInputStream(self, file) < 0)
7235 return -1;
7236
7237 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
7238 return -1;
7239
7240 if (_Unpickler_SetBuffers(self, buffers) < 0)
7241 return -1;
7242
7243 self->fix_imports = fix_imports;
7244
7245 PyTypeObject *tp = Py_TYPE(self);
7246 PickleState *state = _Pickle_FindStateByType(tp);
7247 self->stack = (Pdata *)Pdata_New(state);
7248 if (self->stack == NULL)
7249 return -1;
7250
7251 self->memo_size = 32;
7252 self->memo = _Unpickler_NewMemo(self->memo_size);
7253 if (self->memo == NULL)
7254 return -1;
7255
7256 self->proto = 0;
7257
7258 return 0;
7259 }
7260
7261
7262 /* Define a proxy object for the Unpickler's internal memo object. This is to
7263 * avoid breaking code like:
7264 * unpickler.memo.clear()
7265 * and
7266 * unpickler.memo = saved_memo
7267 * Is this a good idea? Not really, but we don't want to break code that uses
7268 * it. Note that we don't implement the entire mapping API here. This is
7269 * intentional, as these should be treated as black-box implementation details.
7270 *
7271 * We do, however, have to implement pickling/unpickling support because of
7272 * real-world code like cvs2svn.
7273 */
7274
7275 /*[clinic input]
7276 _pickle.UnpicklerMemoProxy.clear
7277
7278 Remove all items from memo.
7279 [clinic start generated code]*/
7280
7281 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)7282 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
7283 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
7284 {
7285 _Unpickler_MemoCleanup(self->unpickler);
7286 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7287 if (self->unpickler->memo == NULL)
7288 return NULL;
7289 Py_RETURN_NONE;
7290 }
7291
7292 /*[clinic input]
7293 _pickle.UnpicklerMemoProxy.copy
7294
7295 Copy the memo to a new object.
7296 [clinic start generated code]*/
7297
7298 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)7299 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
7300 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
7301 {
7302 size_t i;
7303 PyObject *new_memo = PyDict_New();
7304 if (new_memo == NULL)
7305 return NULL;
7306
7307 for (i = 0; i < self->unpickler->memo_size; i++) {
7308 int status;
7309 PyObject *key, *value;
7310
7311 value = self->unpickler->memo[i];
7312 if (value == NULL)
7313 continue;
7314
7315 key = PyLong_FromSsize_t(i);
7316 if (key == NULL)
7317 goto error;
7318 status = PyDict_SetItem(new_memo, key, value);
7319 Py_DECREF(key);
7320 if (status < 0)
7321 goto error;
7322 }
7323 return new_memo;
7324
7325 error:
7326 Py_DECREF(new_memo);
7327 return NULL;
7328 }
7329
7330 /*[clinic input]
7331 _pickle.UnpicklerMemoProxy.__reduce__
7332
7333 Implement pickling support.
7334 [clinic start generated code]*/
7335
7336 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)7337 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
7338 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
7339 {
7340 PyObject *reduce_value;
7341 PyObject *constructor_args;
7342 PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
7343 if (contents == NULL)
7344 return NULL;
7345
7346 reduce_value = PyTuple_New(2);
7347 if (reduce_value == NULL) {
7348 Py_DECREF(contents);
7349 return NULL;
7350 }
7351 constructor_args = PyTuple_New(1);
7352 if (constructor_args == NULL) {
7353 Py_DECREF(contents);
7354 Py_DECREF(reduce_value);
7355 return NULL;
7356 }
7357 PyTuple_SET_ITEM(constructor_args, 0, contents);
7358 PyTuple_SET_ITEM(reduce_value, 0, Py_NewRef(&PyDict_Type));
7359 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7360 return reduce_value;
7361 }
7362
7363 static PyMethodDef unpicklerproxy_methods[] = {
7364 _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7365 _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7366 _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
7367 {NULL, NULL} /* sentinel */
7368 };
7369
7370 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)7371 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7372 {
7373 PyTypeObject *tp = Py_TYPE(self);
7374 PyObject_GC_UnTrack(self);
7375 Py_CLEAR(self->unpickler);
7376 tp->tp_free((PyObject *)self);
7377 Py_DECREF(tp);
7378 }
7379
7380 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)7381 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7382 visitproc visit, void *arg)
7383 {
7384 Py_VISIT(Py_TYPE(self));
7385 Py_VISIT(self->unpickler);
7386 return 0;
7387 }
7388
7389 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)7390 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7391 {
7392 Py_CLEAR(self->unpickler);
7393 return 0;
7394 }
7395
7396 static PyType_Slot unpickler_memoproxy_slots[] = {
7397 {Py_tp_dealloc, UnpicklerMemoProxy_dealloc},
7398 {Py_tp_traverse, UnpicklerMemoProxy_traverse},
7399 {Py_tp_clear, UnpicklerMemoProxy_clear},
7400 {Py_tp_methods, unpicklerproxy_methods},
7401 {Py_tp_hash, PyObject_HashNotImplemented},
7402 {0, NULL},
7403 };
7404
7405 static PyType_Spec unpickler_memoproxy_spec = {
7406 .name = "_pickle.UnpicklerMemoProxy",
7407 .basicsize = sizeof(UnpicklerMemoProxyObject),
7408 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
7409 Py_TPFLAGS_IMMUTABLETYPE),
7410 .slots = unpickler_memoproxy_slots,
7411 };
7412
7413 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)7414 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7415 {
7416 PickleState *state = _Pickle_FindStateByType(Py_TYPE(unpickler));
7417 UnpicklerMemoProxyObject *self;
7418 self = PyObject_GC_New(UnpicklerMemoProxyObject,
7419 state->UnpicklerMemoProxyType);
7420 if (self == NULL)
7421 return NULL;
7422 self->unpickler = (UnpicklerObject*)Py_NewRef(unpickler);
7423 PyObject_GC_Track(self);
7424 return (PyObject *)self;
7425 }
7426
7427 /*****************************************************************************/
7428
7429
7430 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7431 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7432 {
7433 return UnpicklerMemoProxy_New(self);
7434 }
7435
7436 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7437 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7438 {
7439 PyObject **new_memo;
7440 size_t new_memo_size = 0;
7441
7442 if (obj == NULL) {
7443 PyErr_SetString(PyExc_TypeError,
7444 "attribute deletion is not supported");
7445 return -1;
7446 }
7447
7448 PickleState *state = _Pickle_FindStateByType(Py_TYPE(self));
7449 if (Py_IS_TYPE(obj, state->UnpicklerMemoProxyType)) {
7450 UnpicklerObject *unpickler =
7451 ((UnpicklerMemoProxyObject *)obj)->unpickler;
7452
7453 new_memo_size = unpickler->memo_size;
7454 new_memo = _Unpickler_NewMemo(new_memo_size);
7455 if (new_memo == NULL)
7456 return -1;
7457
7458 for (size_t i = 0; i < new_memo_size; i++) {
7459 new_memo[i] = Py_XNewRef(unpickler->memo[i]);
7460 }
7461 }
7462 else if (PyDict_Check(obj)) {
7463 Py_ssize_t i = 0;
7464 PyObject *key, *value;
7465
7466 new_memo_size = PyDict_GET_SIZE(obj);
7467 new_memo = _Unpickler_NewMemo(new_memo_size);
7468 if (new_memo == NULL)
7469 return -1;
7470
7471 while (PyDict_Next(obj, &i, &key, &value)) {
7472 Py_ssize_t idx;
7473 if (!PyLong_Check(key)) {
7474 PyErr_SetString(PyExc_TypeError,
7475 "memo key must be integers");
7476 goto error;
7477 }
7478 idx = PyLong_AsSsize_t(key);
7479 if (idx == -1 && PyErr_Occurred())
7480 goto error;
7481 if (idx < 0) {
7482 PyErr_SetString(PyExc_ValueError,
7483 "memo key must be positive integers.");
7484 goto error;
7485 }
7486 if (_Unpickler_MemoPut(self, idx, value) < 0)
7487 goto error;
7488 }
7489 }
7490 else {
7491 PyErr_Format(PyExc_TypeError,
7492 "'memo' attribute must be an UnpicklerMemoProxy object "
7493 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7494 return -1;
7495 }
7496
7497 _Unpickler_MemoCleanup(self);
7498 self->memo_size = new_memo_size;
7499 self->memo = new_memo;
7500
7501 return 0;
7502
7503 error:
7504 if (new_memo_size) {
7505 for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7506 Py_XDECREF(new_memo[i]);
7507 }
7508 PyMem_Free(new_memo);
7509 }
7510 return -1;
7511 }
7512
7513 static PyObject *
Unpickler_getattr(PyObject * self,PyObject * name)7514 Unpickler_getattr(PyObject *self, PyObject *name)
7515 {
7516 if (PyUnicode_Check(name)
7517 && PyUnicode_EqualToUTF8(name, "persistent_load")
7518 && ((UnpicklerObject *)self)->persistent_load_attr)
7519 {
7520 return Py_NewRef(((UnpicklerObject *)self)->persistent_load_attr);
7521 }
7522
7523 return PyObject_GenericGetAttr(self, name);
7524 }
7525
7526 static int
Unpickler_setattr(PyObject * self,PyObject * name,PyObject * value)7527 Unpickler_setattr(PyObject *self, PyObject *name, PyObject *value)
7528 {
7529 if (PyUnicode_Check(name)
7530 && PyUnicode_EqualToUTF8(name, "persistent_load"))
7531 {
7532 Py_XINCREF(value);
7533 Py_XSETREF(((UnpicklerObject *)self)->persistent_load_attr, value);
7534 return 0;
7535 }
7536
7537 return PyObject_GenericSetAttr(self, name, value);
7538 }
7539
7540 static PyGetSetDef Unpickler_getsets[] = {
7541 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7542 {NULL}
7543 };
7544
7545 static PyType_Slot unpickler_type_slots[] = {
7546 {Py_tp_dealloc, Unpickler_dealloc},
7547 {Py_tp_doc, (char *)_pickle_Unpickler___init____doc__},
7548 {Py_tp_getattro, Unpickler_getattr},
7549 {Py_tp_setattro, Unpickler_setattr},
7550 {Py_tp_traverse, Unpickler_traverse},
7551 {Py_tp_clear, Unpickler_clear},
7552 {Py_tp_methods, Unpickler_methods},
7553 {Py_tp_getset, Unpickler_getsets},
7554 {Py_tp_init, _pickle_Unpickler___init__},
7555 {Py_tp_alloc, PyType_GenericAlloc},
7556 {Py_tp_new, PyType_GenericNew},
7557 {Py_tp_free, PyObject_GC_Del},
7558 {0, NULL},
7559 };
7560
7561 static PyType_Spec unpickler_type_spec = {
7562 .name = "_pickle.Unpickler",
7563 .basicsize = sizeof(UnpicklerObject),
7564 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
7565 Py_TPFLAGS_IMMUTABLETYPE),
7566 .slots = unpickler_type_slots,
7567 };
7568
7569 /*[clinic input]
7570
7571 _pickle.dump
7572
7573 obj: object
7574 file: object
7575 protocol: object = None
7576 *
7577 fix_imports: bool = True
7578 buffer_callback: object = None
7579
7580 Write a pickled representation of obj to the open file object file.
7581
7582 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7583 be more efficient.
7584
7585 The optional *protocol* argument tells the pickler to use the given
7586 protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7587 protocol is 4. It was introduced in Python 3.4, and is incompatible
7588 with previous versions.
7589
7590 Specifying a negative protocol version selects the highest protocol
7591 version supported. The higher the protocol used, the more recent the
7592 version of Python needed to read the pickle produced.
7593
7594 The *file* argument must have a write() method that accepts a single
7595 bytes argument. It can thus be a file object opened for binary
7596 writing, an io.BytesIO instance, or any other custom object that meets
7597 this interface.
7598
7599 If *fix_imports* is True and protocol is less than 3, pickle will try
7600 to map the new Python 3 names to the old module names used in Python
7601 2, so that the pickle data stream is readable with Python 2.
7602
7603 If *buffer_callback* is None (the default), buffer views are serialized
7604 into *file* as part of the pickle stream. It is an error if
7605 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7606
7607 [clinic start generated code]*/
7608
7609 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7610 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7611 PyObject *protocol, int fix_imports,
7612 PyObject *buffer_callback)
7613 /*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/
7614 {
7615 PickleState *state = _Pickle_GetState(module);
7616 PicklerObject *pickler = _Pickler_New(state);
7617
7618 if (pickler == NULL)
7619 return NULL;
7620
7621 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7622 goto error;
7623
7624 if (_Pickler_SetOutputStream(pickler, file) < 0)
7625 goto error;
7626
7627 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7628 goto error;
7629
7630 if (dump(state, pickler, obj) < 0)
7631 goto error;
7632
7633 if (_Pickler_FlushToFile(pickler) < 0)
7634 goto error;
7635
7636 Py_DECREF(pickler);
7637 Py_RETURN_NONE;
7638
7639 error:
7640 Py_XDECREF(pickler);
7641 return NULL;
7642 }
7643
7644 /*[clinic input]
7645
7646 _pickle.dumps
7647
7648 obj: object
7649 protocol: object = None
7650 *
7651 fix_imports: bool = True
7652 buffer_callback: object = None
7653
7654 Return the pickled representation of the object as a bytes object.
7655
7656 The optional *protocol* argument tells the pickler to use the given
7657 protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7658 protocol is 4. It was introduced in Python 3.4, and is incompatible
7659 with previous versions.
7660
7661 Specifying a negative protocol version selects the highest protocol
7662 version supported. The higher the protocol used, the more recent the
7663 version of Python needed to read the pickle produced.
7664
7665 If *fix_imports* is True and *protocol* is less than 3, pickle will
7666 try to map the new Python 3 names to the old module names used in
7667 Python 2, so that the pickle data stream is readable with Python 2.
7668
7669 If *buffer_callback* is None (the default), buffer views are serialized
7670 into *file* as part of the pickle stream. It is an error if
7671 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7672
7673 [clinic start generated code]*/
7674
7675 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7676 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7677 int fix_imports, PyObject *buffer_callback)
7678 /*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/
7679 {
7680 PyObject *result;
7681 PickleState *state = _Pickle_GetState(module);
7682 PicklerObject *pickler = _Pickler_New(state);
7683
7684 if (pickler == NULL)
7685 return NULL;
7686
7687 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7688 goto error;
7689
7690 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7691 goto error;
7692
7693 if (dump(state, pickler, obj) < 0)
7694 goto error;
7695
7696 result = _Pickler_GetString(pickler);
7697 Py_DECREF(pickler);
7698 return result;
7699
7700 error:
7701 Py_XDECREF(pickler);
7702 return NULL;
7703 }
7704
7705 /*[clinic input]
7706
7707 _pickle.load
7708
7709 file: object
7710 *
7711 fix_imports: bool = True
7712 encoding: str = 'ASCII'
7713 errors: str = 'strict'
7714 buffers: object(c_default="NULL") = ()
7715
7716 Read and return an object from the pickle data stored in a file.
7717
7718 This is equivalent to ``Unpickler(file).load()``, but may be more
7719 efficient.
7720
7721 The protocol version of the pickle is detected automatically, so no
7722 protocol argument is needed. Bytes past the pickled object's
7723 representation are ignored.
7724
7725 The argument *file* must have two methods, a read() method that takes
7726 an integer argument, and a readline() method that requires no
7727 arguments. Both methods should return bytes. Thus *file* can be a
7728 binary file object opened for reading, an io.BytesIO object, or any
7729 other custom object that meets this interface.
7730
7731 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7732 which are used to control compatibility support for pickle stream
7733 generated by Python 2. If *fix_imports* is True, pickle will try to
7734 map the old Python 2 names to the new names used in Python 3. The
7735 *encoding* and *errors* tell pickle how to decode 8-bit string
7736 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7737 respectively. The *encoding* can be 'bytes' to read these 8-bit
7738 string instances as bytes objects.
7739 [clinic start generated code]*/
7740
7741 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7742 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7743 const char *encoding, const char *errors,
7744 PyObject *buffers)
7745 /*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
7746 {
7747 PyObject *result;
7748 UnpicklerObject *unpickler = _Unpickler_New(module);
7749
7750 if (unpickler == NULL)
7751 return NULL;
7752
7753 if (_Unpickler_SetInputStream(unpickler, file) < 0)
7754 goto error;
7755
7756 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7757 goto error;
7758
7759 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7760 goto error;
7761
7762 unpickler->fix_imports = fix_imports;
7763
7764 PickleState *state = _Pickle_GetState(module);
7765 result = load(state, unpickler);
7766 Py_DECREF(unpickler);
7767 return result;
7768
7769 error:
7770 Py_XDECREF(unpickler);
7771 return NULL;
7772 }
7773
7774 /*[clinic input]
7775
7776 _pickle.loads
7777
7778 data: object
7779 /
7780 *
7781 fix_imports: bool = True
7782 encoding: str = 'ASCII'
7783 errors: str = 'strict'
7784 buffers: object(c_default="NULL") = ()
7785
7786 Read and return an object from the given pickle data.
7787
7788 The protocol version of the pickle is detected automatically, so no
7789 protocol argument is needed. Bytes past the pickled object's
7790 representation are ignored.
7791
7792 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7793 which are used to control compatibility support for pickle stream
7794 generated by Python 2. If *fix_imports* is True, pickle will try to
7795 map the old Python 2 names to the new names used in Python 3. The
7796 *encoding* and *errors* tell pickle how to decode 8-bit string
7797 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7798 respectively. The *encoding* can be 'bytes' to read these 8-bit
7799 string instances as bytes objects.
7800 [clinic start generated code]*/
7801
7802 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7803 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7804 const char *encoding, const char *errors,
7805 PyObject *buffers)
7806 /*[clinic end generated code: output=82ac1e6b588e6d02 input=b3615540d0535087]*/
7807 {
7808 PyObject *result;
7809 UnpicklerObject *unpickler = _Unpickler_New(module);
7810
7811 if (unpickler == NULL)
7812 return NULL;
7813
7814 if (_Unpickler_SetStringInput(unpickler, data) < 0)
7815 goto error;
7816
7817 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7818 goto error;
7819
7820 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7821 goto error;
7822
7823 unpickler->fix_imports = fix_imports;
7824
7825 PickleState *state = _Pickle_GetState(module);
7826 result = load(state, unpickler);
7827 Py_DECREF(unpickler);
7828 return result;
7829
7830 error:
7831 Py_XDECREF(unpickler);
7832 return NULL;
7833 }
7834
7835 static struct PyMethodDef pickle_methods[] = {
7836 _PICKLE_DUMP_METHODDEF
7837 _PICKLE_DUMPS_METHODDEF
7838 _PICKLE_LOAD_METHODDEF
7839 _PICKLE_LOADS_METHODDEF
7840 {NULL, NULL} /* sentinel */
7841 };
7842
7843 static int
pickle_clear(PyObject * m)7844 pickle_clear(PyObject *m)
7845 {
7846 _Pickle_ClearState(_Pickle_GetState(m));
7847 return 0;
7848 }
7849
7850 static void
pickle_free(PyObject * m)7851 pickle_free(PyObject *m)
7852 {
7853 _Pickle_ClearState(_Pickle_GetState(m));
7854 }
7855
7856 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7857 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7858 {
7859 PickleState *st = _Pickle_GetState(m);
7860 Py_VISIT(st->PickleError);
7861 Py_VISIT(st->PicklingError);
7862 Py_VISIT(st->UnpicklingError);
7863 Py_VISIT(st->dispatch_table);
7864 Py_VISIT(st->extension_registry);
7865 Py_VISIT(st->extension_cache);
7866 Py_VISIT(st->inverted_registry);
7867 Py_VISIT(st->name_mapping_2to3);
7868 Py_VISIT(st->import_mapping_2to3);
7869 Py_VISIT(st->name_mapping_3to2);
7870 Py_VISIT(st->import_mapping_3to2);
7871 Py_VISIT(st->codecs_encode);
7872 Py_VISIT(st->getattr);
7873 Py_VISIT(st->partial);
7874 Py_VISIT(st->Pickler_Type);
7875 Py_VISIT(st->Unpickler_Type);
7876 Py_VISIT(st->Pdata_Type);
7877 Py_VISIT(st->PicklerMemoProxyType);
7878 Py_VISIT(st->UnpicklerMemoProxyType);
7879 return 0;
7880 }
7881
7882 static int
_pickle_exec(PyObject * m)7883 _pickle_exec(PyObject *m)
7884 {
7885 PickleState *st = _Pickle_GetState(m);
7886
7887 #define CREATE_TYPE(mod, type, spec) \
7888 do { \
7889 type = (PyTypeObject *)PyType_FromMetaclass(NULL, mod, spec, NULL); \
7890 if (type == NULL) { \
7891 return -1; \
7892 } \
7893 } while (0)
7894
7895 CREATE_TYPE(m, st->Pdata_Type, &pdata_spec);
7896 CREATE_TYPE(m, st->PicklerMemoProxyType, &memoproxy_spec);
7897 CREATE_TYPE(m, st->UnpicklerMemoProxyType, &unpickler_memoproxy_spec);
7898 CREATE_TYPE(m, st->Pickler_Type, &pickler_type_spec);
7899 CREATE_TYPE(m, st->Unpickler_Type, &unpickler_type_spec);
7900
7901 #undef CREATE_TYPE
7902
7903 /* Add types */
7904 if (PyModule_AddType(m, &PyPickleBuffer_Type) < 0) {
7905 return -1;
7906 }
7907 if (PyModule_AddType(m, st->Pickler_Type) < 0) {
7908 return -1;
7909 }
7910 if (PyModule_AddType(m, st->Unpickler_Type) < 0) {
7911 return -1;
7912 }
7913
7914 /* Initialize the exceptions. */
7915 st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7916 if (st->PickleError == NULL)
7917 return -1;
7918 st->PicklingError = \
7919 PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7920 if (st->PicklingError == NULL)
7921 return -1;
7922 st->UnpicklingError = \
7923 PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7924 if (st->UnpicklingError == NULL)
7925 return -1;
7926
7927 if (PyModule_AddObjectRef(m, "PickleError", st->PickleError) < 0) {
7928 return -1;
7929 }
7930 if (PyModule_AddObjectRef(m, "PicklingError", st->PicklingError) < 0) {
7931 return -1;
7932 }
7933 if (PyModule_AddObjectRef(m, "UnpicklingError", st->UnpicklingError) < 0) {
7934 return -1;
7935 }
7936
7937 if (_Pickle_InitState(st) < 0)
7938 return -1;
7939
7940 return 0;
7941 }
7942
7943 static PyModuleDef_Slot pickle_slots[] = {
7944 {Py_mod_exec, _pickle_exec},
7945 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
7946 {Py_mod_gil, Py_MOD_GIL_NOT_USED},
7947 {0, NULL},
7948 };
7949
7950 static struct PyModuleDef _picklemodule = {
7951 PyModuleDef_HEAD_INIT,
7952 .m_name = "_pickle",
7953 .m_doc = pickle_module_doc,
7954 .m_size = sizeof(PickleState),
7955 .m_methods = pickle_methods,
7956 .m_slots = pickle_slots,
7957 .m_traverse = pickle_traverse,
7958 .m_clear = pickle_clear,
7959 .m_free = (freefunc)pickle_free,
7960 };
7961
7962 PyMODINIT_FUNC
PyInit__pickle(void)7963 PyInit__pickle(void)
7964 {
7965 return PyModuleDef_Init(&_picklemodule);
7966 }
7967