1 /* pickle accelerator C extensor: _pickle module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
6
7 #if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8 # error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
9 #endif
10
11 #include "Python.h"
12 #include "structmember.h"
13
14 PyDoc_STRVAR(pickle_module_doc,
15 "Optimized C implementation for the Python pickle module.");
16
17 /*[clinic input]
18 module _pickle
19 class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
20 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
21 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
22 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
23 [clinic start generated code]*/
24 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
25
26 /* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
27 Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
28 already includes it. */
29 enum {
30 HIGHEST_PROTOCOL = 5,
31 DEFAULT_PROTOCOL = 4
32 };
33
34 /* Pickle opcodes. These must be kept updated with pickle.py.
35 Extensive docs are in pickletools.py. */
36 enum opcode {
37 MARK = '(',
38 STOP = '.',
39 POP = '0',
40 POP_MARK = '1',
41 DUP = '2',
42 FLOAT = 'F',
43 INT = 'I',
44 BININT = 'J',
45 BININT1 = 'K',
46 LONG = 'L',
47 BININT2 = 'M',
48 NONE = 'N',
49 PERSID = 'P',
50 BINPERSID = 'Q',
51 REDUCE = 'R',
52 STRING = 'S',
53 BINSTRING = 'T',
54 SHORT_BINSTRING = 'U',
55 UNICODE = 'V',
56 BINUNICODE = 'X',
57 APPEND = 'a',
58 BUILD = 'b',
59 GLOBAL = 'c',
60 DICT = 'd',
61 EMPTY_DICT = '}',
62 APPENDS = 'e',
63 GET = 'g',
64 BINGET = 'h',
65 INST = 'i',
66 LONG_BINGET = 'j',
67 LIST = 'l',
68 EMPTY_LIST = ']',
69 OBJ = 'o',
70 PUT = 'p',
71 BINPUT = 'q',
72 LONG_BINPUT = 'r',
73 SETITEM = 's',
74 TUPLE = 't',
75 EMPTY_TUPLE = ')',
76 SETITEMS = 'u',
77 BINFLOAT = 'G',
78
79 /* Protocol 2. */
80 PROTO = '\x80',
81 NEWOBJ = '\x81',
82 EXT1 = '\x82',
83 EXT2 = '\x83',
84 EXT4 = '\x84',
85 TUPLE1 = '\x85',
86 TUPLE2 = '\x86',
87 TUPLE3 = '\x87',
88 NEWTRUE = '\x88',
89 NEWFALSE = '\x89',
90 LONG1 = '\x8a',
91 LONG4 = '\x8b',
92
93 /* Protocol 3 (Python 3.x) */
94 BINBYTES = 'B',
95 SHORT_BINBYTES = 'C',
96
97 /* Protocol 4 */
98 SHORT_BINUNICODE = '\x8c',
99 BINUNICODE8 = '\x8d',
100 BINBYTES8 = '\x8e',
101 EMPTY_SET = '\x8f',
102 ADDITEMS = '\x90',
103 FROZENSET = '\x91',
104 NEWOBJ_EX = '\x92',
105 STACK_GLOBAL = '\x93',
106 MEMOIZE = '\x94',
107 FRAME = '\x95',
108
109 /* Protocol 5 */
110 BYTEARRAY8 = '\x96',
111 NEXT_BUFFER = '\x97',
112 READONLY_BUFFER = '\x98'
113 };
114
115 enum {
116 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
117 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
118 break if this gets out of synch with pickle.py, but it's unclear that would
119 help anything either. */
120 BATCHSIZE = 1000,
121
122 /* Nesting limit until Pickler, when running in "fast mode", starts
123 checking for self-referential data-structures. */
124 FAST_NESTING_LIMIT = 50,
125
126 /* Initial size of the write buffer of Pickler. */
127 WRITE_BUF_SIZE = 4096,
128
129 /* Prefetch size when unpickling (disabled on unpeekable streams) */
130 PREFETCH = 8192 * 16,
131
132 FRAME_SIZE_MIN = 4,
133 FRAME_SIZE_TARGET = 64 * 1024,
134 FRAME_HEADER_SIZE = 9
135 };
136
137 /*************************************************************************/
138
139 /* State of the pickle module, per PEP 3121. */
140 typedef struct {
141 /* Exception classes for pickle. */
142 PyObject *PickleError;
143 PyObject *PicklingError;
144 PyObject *UnpicklingError;
145
146 /* copyreg.dispatch_table, {type_object: pickling_function} */
147 PyObject *dispatch_table;
148
149 /* For the extension opcodes EXT1, EXT2 and EXT4. */
150
151 /* copyreg._extension_registry, {(module_name, function_name): code} */
152 PyObject *extension_registry;
153 /* copyreg._extension_cache, {code: object} */
154 PyObject *extension_cache;
155 /* copyreg._inverted_registry, {code: (module_name, function_name)} */
156 PyObject *inverted_registry;
157
158 /* Import mappings for compatibility with Python 2.x */
159
160 /* _compat_pickle.NAME_MAPPING,
161 {(oldmodule, oldname): (newmodule, newname)} */
162 PyObject *name_mapping_2to3;
163 /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
164 PyObject *import_mapping_2to3;
165 /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
166 PyObject *name_mapping_3to2;
167 PyObject *import_mapping_3to2;
168
169 /* codecs.encode, used for saving bytes in older protocols */
170 PyObject *codecs_encode;
171 /* builtins.getattr, used for saving nested names with protocol < 4 */
172 PyObject *getattr;
173 /* functools.partial, used for implementing __newobj_ex__ with protocols
174 2 and 3 */
175 PyObject *partial;
176 } PickleState;
177
178 /* Forward declaration of the _pickle module definition. */
179 static struct PyModuleDef _picklemodule;
180
181 /* Given a module object, get its per-module state. */
182 static PickleState *
_Pickle_GetState(PyObject * module)183 _Pickle_GetState(PyObject *module)
184 {
185 return (PickleState *)PyModule_GetState(module);
186 }
187
188 /* Find the module instance imported in the currently running sub-interpreter
189 and get its state. */
190 static PickleState *
_Pickle_GetGlobalState(void)191 _Pickle_GetGlobalState(void)
192 {
193 return _Pickle_GetState(PyState_FindModule(&_picklemodule));
194 }
195
196 /* Clear the given pickle module state. */
197 static void
_Pickle_ClearState(PickleState * st)198 _Pickle_ClearState(PickleState *st)
199 {
200 Py_CLEAR(st->PickleError);
201 Py_CLEAR(st->PicklingError);
202 Py_CLEAR(st->UnpicklingError);
203 Py_CLEAR(st->dispatch_table);
204 Py_CLEAR(st->extension_registry);
205 Py_CLEAR(st->extension_cache);
206 Py_CLEAR(st->inverted_registry);
207 Py_CLEAR(st->name_mapping_2to3);
208 Py_CLEAR(st->import_mapping_2to3);
209 Py_CLEAR(st->name_mapping_3to2);
210 Py_CLEAR(st->import_mapping_3to2);
211 Py_CLEAR(st->codecs_encode);
212 Py_CLEAR(st->getattr);
213 Py_CLEAR(st->partial);
214 }
215
216 /* Initialize the given pickle module state. */
217 static int
_Pickle_InitState(PickleState * st)218 _Pickle_InitState(PickleState *st)
219 {
220 PyObject *copyreg = NULL;
221 PyObject *compat_pickle = NULL;
222 PyObject *codecs = NULL;
223 PyObject *functools = NULL;
224 _Py_IDENTIFIER(getattr);
225
226 st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
227 if (st->getattr == NULL)
228 goto error;
229
230 copyreg = PyImport_ImportModule("copyreg");
231 if (!copyreg)
232 goto error;
233 st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
234 if (!st->dispatch_table)
235 goto error;
236 if (!PyDict_CheckExact(st->dispatch_table)) {
237 PyErr_Format(PyExc_RuntimeError,
238 "copyreg.dispatch_table should be a dict, not %.200s",
239 Py_TYPE(st->dispatch_table)->tp_name);
240 goto error;
241 }
242 st->extension_registry = \
243 PyObject_GetAttrString(copyreg, "_extension_registry");
244 if (!st->extension_registry)
245 goto error;
246 if (!PyDict_CheckExact(st->extension_registry)) {
247 PyErr_Format(PyExc_RuntimeError,
248 "copyreg._extension_registry should be a dict, "
249 "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
250 goto error;
251 }
252 st->inverted_registry = \
253 PyObject_GetAttrString(copyreg, "_inverted_registry");
254 if (!st->inverted_registry)
255 goto error;
256 if (!PyDict_CheckExact(st->inverted_registry)) {
257 PyErr_Format(PyExc_RuntimeError,
258 "copyreg._inverted_registry should be a dict, "
259 "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
260 goto error;
261 }
262 st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
263 if (!st->extension_cache)
264 goto error;
265 if (!PyDict_CheckExact(st->extension_cache)) {
266 PyErr_Format(PyExc_RuntimeError,
267 "copyreg._extension_cache should be a dict, "
268 "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
269 goto error;
270 }
271 Py_CLEAR(copyreg);
272
273 /* Load the 2.x -> 3.x stdlib module mapping tables */
274 compat_pickle = PyImport_ImportModule("_compat_pickle");
275 if (!compat_pickle)
276 goto error;
277 st->name_mapping_2to3 = \
278 PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
279 if (!st->name_mapping_2to3)
280 goto error;
281 if (!PyDict_CheckExact(st->name_mapping_2to3)) {
282 PyErr_Format(PyExc_RuntimeError,
283 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
284 Py_TYPE(st->name_mapping_2to3)->tp_name);
285 goto error;
286 }
287 st->import_mapping_2to3 = \
288 PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
289 if (!st->import_mapping_2to3)
290 goto error;
291 if (!PyDict_CheckExact(st->import_mapping_2to3)) {
292 PyErr_Format(PyExc_RuntimeError,
293 "_compat_pickle.IMPORT_MAPPING should be a dict, "
294 "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
295 goto error;
296 }
297 /* ... and the 3.x -> 2.x mapping tables */
298 st->name_mapping_3to2 = \
299 PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
300 if (!st->name_mapping_3to2)
301 goto error;
302 if (!PyDict_CheckExact(st->name_mapping_3to2)) {
303 PyErr_Format(PyExc_RuntimeError,
304 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
305 "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
306 goto error;
307 }
308 st->import_mapping_3to2 = \
309 PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
310 if (!st->import_mapping_3to2)
311 goto error;
312 if (!PyDict_CheckExact(st->import_mapping_3to2)) {
313 PyErr_Format(PyExc_RuntimeError,
314 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
315 "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
316 goto error;
317 }
318 Py_CLEAR(compat_pickle);
319
320 codecs = PyImport_ImportModule("codecs");
321 if (codecs == NULL)
322 goto error;
323 st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
324 if (st->codecs_encode == NULL) {
325 goto error;
326 }
327 if (!PyCallable_Check(st->codecs_encode)) {
328 PyErr_Format(PyExc_RuntimeError,
329 "codecs.encode should be a callable, not %.200s",
330 Py_TYPE(st->codecs_encode)->tp_name);
331 goto error;
332 }
333 Py_CLEAR(codecs);
334
335 functools = PyImport_ImportModule("functools");
336 if (!functools)
337 goto error;
338 st->partial = PyObject_GetAttrString(functools, "partial");
339 if (!st->partial)
340 goto error;
341 Py_CLEAR(functools);
342
343 return 0;
344
345 error:
346 Py_CLEAR(copyreg);
347 Py_CLEAR(compat_pickle);
348 Py_CLEAR(codecs);
349 Py_CLEAR(functools);
350 _Pickle_ClearState(st);
351 return -1;
352 }
353
354 /* Helper for calling a function with a single argument quickly.
355
356 This function steals the reference of the given argument. */
357 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)358 _Pickle_FastCall(PyObject *func, PyObject *obj)
359 {
360 PyObject *result;
361
362 result = PyObject_CallFunctionObjArgs(func, obj, NULL);
363 Py_DECREF(obj);
364 return result;
365 }
366
367 /*************************************************************************/
368
369 /* Retrieve and deconstruct a method for avoiding a reference cycle
370 (pickler -> bound method of pickler -> pickler) */
371 static int
init_method_ref(PyObject * self,_Py_Identifier * name,PyObject ** method_func,PyObject ** method_self)372 init_method_ref(PyObject *self, _Py_Identifier *name,
373 PyObject **method_func, PyObject **method_self)
374 {
375 PyObject *func, *func2;
376 int ret;
377
378 /* *method_func and *method_self should be consistent. All refcount decrements
379 should be occurred after setting *method_self and *method_func. */
380 ret = _PyObject_LookupAttrId(self, name, &func);
381 if (func == NULL) {
382 *method_self = NULL;
383 Py_CLEAR(*method_func);
384 return ret;
385 }
386
387 if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
388 /* Deconstruct a bound Python method */
389 func2 = PyMethod_GET_FUNCTION(func);
390 Py_INCREF(func2);
391 *method_self = self; /* borrowed */
392 Py_XSETREF(*method_func, func2);
393 Py_DECREF(func);
394 return 0;
395 }
396 else {
397 *method_self = NULL;
398 Py_XSETREF(*method_func, func);
399 return 0;
400 }
401 }
402
403 /* Bind a method if it was deconstructed */
404 static PyObject *
reconstruct_method(PyObject * func,PyObject * self)405 reconstruct_method(PyObject *func, PyObject *self)
406 {
407 if (self) {
408 return PyMethod_New(func, self);
409 }
410 else {
411 Py_INCREF(func);
412 return func;
413 }
414 }
415
416 static PyObject *
call_method(PyObject * func,PyObject * self,PyObject * obj)417 call_method(PyObject *func, PyObject *self, PyObject *obj)
418 {
419 if (self) {
420 return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
421 }
422 else {
423 return PyObject_CallFunctionObjArgs(func, obj, NULL);
424 }
425 }
426
427 /*************************************************************************/
428
429 /* Internal data type used as the unpickling stack. */
430 typedef struct {
431 PyObject_VAR_HEAD
432 PyObject **data;
433 int mark_set; /* is MARK set? */
434 Py_ssize_t fence; /* position of top MARK or 0 */
435 Py_ssize_t allocated; /* number of slots in data allocated */
436 } Pdata;
437
438 static void
Pdata_dealloc(Pdata * self)439 Pdata_dealloc(Pdata *self)
440 {
441 Py_ssize_t i = Py_SIZE(self);
442 while (--i >= 0) {
443 Py_DECREF(self->data[i]);
444 }
445 PyMem_FREE(self->data);
446 PyObject_Del(self);
447 }
448
449 static PyTypeObject Pdata_Type = {
450 PyVarObject_HEAD_INIT(NULL, 0)
451 "_pickle.Pdata", /*tp_name*/
452 sizeof(Pdata), /*tp_basicsize*/
453 sizeof(PyObject *), /*tp_itemsize*/
454 (destructor)Pdata_dealloc, /*tp_dealloc*/
455 };
456
457 static PyObject *
Pdata_New(void)458 Pdata_New(void)
459 {
460 Pdata *self;
461
462 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
463 return NULL;
464 Py_SIZE(self) = 0;
465 self->mark_set = 0;
466 self->fence = 0;
467 self->allocated = 8;
468 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
469 if (self->data)
470 return (PyObject *)self;
471 Py_DECREF(self);
472 return PyErr_NoMemory();
473 }
474
475
476 /* Retain only the initial clearto items. If clearto >= the current
477 * number of items, this is a (non-erroneous) NOP.
478 */
479 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)480 Pdata_clear(Pdata *self, Py_ssize_t clearto)
481 {
482 Py_ssize_t i = Py_SIZE(self);
483
484 assert(clearto >= self->fence);
485 if (clearto >= i)
486 return 0;
487
488 while (--i >= clearto) {
489 Py_CLEAR(self->data[i]);
490 }
491 Py_SIZE(self) = clearto;
492 return 0;
493 }
494
495 static int
Pdata_grow(Pdata * self)496 Pdata_grow(Pdata *self)
497 {
498 PyObject **data = self->data;
499 size_t allocated = (size_t)self->allocated;
500 size_t new_allocated;
501
502 new_allocated = (allocated >> 3) + 6;
503 /* check for integer overflow */
504 if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
505 goto nomemory;
506 new_allocated += allocated;
507 PyMem_RESIZE(data, PyObject *, new_allocated);
508 if (data == NULL)
509 goto nomemory;
510
511 self->data = data;
512 self->allocated = (Py_ssize_t)new_allocated;
513 return 0;
514
515 nomemory:
516 PyErr_NoMemory();
517 return -1;
518 }
519
520 static int
Pdata_stack_underflow(Pdata * self)521 Pdata_stack_underflow(Pdata *self)
522 {
523 PickleState *st = _Pickle_GetGlobalState();
524 PyErr_SetString(st->UnpicklingError,
525 self->mark_set ?
526 "unexpected MARK found" :
527 "unpickling stack underflow");
528 return -1;
529 }
530
531 /* D is a Pdata*. Pop the topmost element and store it into V, which
532 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
533 * is raised and V is set to NULL.
534 */
535 static PyObject *
Pdata_pop(Pdata * self)536 Pdata_pop(Pdata *self)
537 {
538 if (Py_SIZE(self) <= self->fence) {
539 Pdata_stack_underflow(self);
540 return NULL;
541 }
542 return self->data[--Py_SIZE(self)];
543 }
544 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
545
546 static int
Pdata_push(Pdata * self,PyObject * obj)547 Pdata_push(Pdata *self, PyObject *obj)
548 {
549 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
550 return -1;
551 }
552 self->data[Py_SIZE(self)++] = obj;
553 return 0;
554 }
555
556 /* Push an object on stack, transferring its ownership to the stack. */
557 #define PDATA_PUSH(D, O, ER) do { \
558 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
559
560 /* Push an object on stack, adding a new reference to the object. */
561 #define PDATA_APPEND(D, O, ER) do { \
562 Py_INCREF((O)); \
563 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
564
565 static PyObject *
Pdata_poptuple(Pdata * self,Py_ssize_t start)566 Pdata_poptuple(Pdata *self, Py_ssize_t start)
567 {
568 PyObject *tuple;
569 Py_ssize_t len, i, j;
570
571 if (start < self->fence) {
572 Pdata_stack_underflow(self);
573 return NULL;
574 }
575 len = Py_SIZE(self) - start;
576 tuple = PyTuple_New(len);
577 if (tuple == NULL)
578 return NULL;
579 for (i = start, j = 0; j < len; i++, j++)
580 PyTuple_SET_ITEM(tuple, j, self->data[i]);
581
582 Py_SIZE(self) = start;
583 return tuple;
584 }
585
586 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)587 Pdata_poplist(Pdata *self, Py_ssize_t start)
588 {
589 PyObject *list;
590 Py_ssize_t len, i, j;
591
592 len = Py_SIZE(self) - start;
593 list = PyList_New(len);
594 if (list == NULL)
595 return NULL;
596 for (i = start, j = 0; j < len; i++, j++)
597 PyList_SET_ITEM(list, j, self->data[i]);
598
599 Py_SIZE(self) = start;
600 return list;
601 }
602
603 typedef struct {
604 PyObject *me_key;
605 Py_ssize_t me_value;
606 } PyMemoEntry;
607
608 typedef struct {
609 size_t mt_mask;
610 size_t mt_used;
611 size_t mt_allocated;
612 PyMemoEntry *mt_table;
613 } PyMemoTable;
614
615 typedef struct PicklerObject {
616 PyObject_HEAD
617 PyMemoTable *memo; /* Memo table, keep track of the seen
618 objects to support self-referential objects
619 pickling. */
620 PyObject *pers_func; /* persistent_id() method, can be NULL */
621 PyObject *pers_func_self; /* borrowed reference to self if pers_func
622 is an unbound method, NULL otherwise */
623 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
624 PyObject *reducer_override; /* hook for invoking user-defined callbacks
625 instead of save_global when pickling
626 functions and classes*/
627
628 PyObject *write; /* write() method of the output stream. */
629 PyObject *output_buffer; /* Write into a local bytearray buffer before
630 flushing to the stream. */
631 Py_ssize_t output_len; /* Length of output_buffer. */
632 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
633 int proto; /* Pickle protocol number, >= 0 */
634 int bin; /* Boolean, true if proto > 0 */
635 int framing; /* True when framing is enabled, proto >= 4 */
636 Py_ssize_t frame_start; /* Position in output_buffer where the
637 current frame begins. -1 if there
638 is no frame currently open. */
639
640 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
641 int fast; /* Enable fast mode if set to a true value.
642 The fast mode disable the usage of memo,
643 therefore speeding the pickling process by
644 not generating superfluous PUT opcodes. It
645 should not be used if with self-referential
646 objects. */
647 int fast_nesting;
648 int fix_imports; /* Indicate whether Pickler should fix
649 the name of globals for Python 2.x. */
650 PyObject *fast_memo;
651 PyObject *buffer_callback; /* Callback for out-of-band buffers, or NULL */
652 } PicklerObject;
653
654 typedef struct UnpicklerObject {
655 PyObject_HEAD
656 Pdata *stack; /* Pickle data stack, store unpickled objects. */
657
658 /* The unpickler memo is just an array of PyObject *s. Using a dict
659 is unnecessary, since the keys are contiguous ints. */
660 PyObject **memo;
661 size_t memo_size; /* Capacity of the memo array */
662 size_t memo_len; /* Number of objects in the memo */
663
664 PyObject *pers_func; /* persistent_load() method, can be NULL. */
665 PyObject *pers_func_self; /* borrowed reference to self if pers_func
666 is an unbound method, NULL otherwise */
667
668 Py_buffer buffer;
669 char *input_buffer;
670 char *input_line;
671 Py_ssize_t input_len;
672 Py_ssize_t next_read_idx;
673 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
674
675 PyObject *read; /* read() method of the input stream. */
676 PyObject *readinto; /* readinto() method of the input stream. */
677 PyObject *readline; /* readline() method of the input stream. */
678 PyObject *peek; /* peek() method of the input stream, or NULL */
679 PyObject *buffers; /* iterable of out-of-band buffers, or NULL */
680
681 char *encoding; /* Name of the encoding to be used for
682 decoding strings pickled using Python
683 2.x. The default value is "ASCII" */
684 char *errors; /* Name of errors handling scheme to used when
685 decoding strings. The default value is
686 "strict". */
687 Py_ssize_t *marks; /* Mark stack, used for unpickling container
688 objects. */
689 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
690 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
691 int proto; /* Protocol of the pickle loaded. */
692 int fix_imports; /* Indicate whether Unpickler should fix
693 the name of globals pickled by Python 2.x. */
694 } UnpicklerObject;
695
696 typedef struct {
697 PyObject_HEAD
698 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
699 } PicklerMemoProxyObject;
700
701 typedef struct {
702 PyObject_HEAD
703 UnpicklerObject *unpickler;
704 } UnpicklerMemoProxyObject;
705
706 /* Forward declarations */
707 static int save(PicklerObject *, PyObject *, int);
708 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
709 static PyTypeObject Pickler_Type;
710 static PyTypeObject Unpickler_Type;
711
712 #include "clinic/_pickle.c.h"
713
714 /*************************************************************************
715 A custom hashtable mapping void* to Python ints. This is used by the pickler
716 for memoization. Using a custom hashtable rather than PyDict allows us to skip
717 a bunch of unnecessary object creation. This makes a huge performance
718 difference. */
719
720 #define MT_MINSIZE 8
721 #define PERTURB_SHIFT 5
722
723
724 static PyMemoTable *
PyMemoTable_New(void)725 PyMemoTable_New(void)
726 {
727 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
728 if (memo == NULL) {
729 PyErr_NoMemory();
730 return NULL;
731 }
732
733 memo->mt_used = 0;
734 memo->mt_allocated = MT_MINSIZE;
735 memo->mt_mask = MT_MINSIZE - 1;
736 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
737 if (memo->mt_table == NULL) {
738 PyMem_FREE(memo);
739 PyErr_NoMemory();
740 return NULL;
741 }
742 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
743
744 return memo;
745 }
746
747 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)748 PyMemoTable_Copy(PyMemoTable *self)
749 {
750 PyMemoTable *new = PyMemoTable_New();
751 if (new == NULL)
752 return NULL;
753
754 new->mt_used = self->mt_used;
755 new->mt_allocated = self->mt_allocated;
756 new->mt_mask = self->mt_mask;
757 /* The table we get from _New() is probably smaller than we wanted.
758 Free it and allocate one that's the right size. */
759 PyMem_FREE(new->mt_table);
760 new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
761 if (new->mt_table == NULL) {
762 PyMem_FREE(new);
763 PyErr_NoMemory();
764 return NULL;
765 }
766 for (size_t i = 0; i < self->mt_allocated; i++) {
767 Py_XINCREF(self->mt_table[i].me_key);
768 }
769 memcpy(new->mt_table, self->mt_table,
770 sizeof(PyMemoEntry) * self->mt_allocated);
771
772 return new;
773 }
774
775 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)776 PyMemoTable_Size(PyMemoTable *self)
777 {
778 return self->mt_used;
779 }
780
781 static int
PyMemoTable_Clear(PyMemoTable * self)782 PyMemoTable_Clear(PyMemoTable *self)
783 {
784 Py_ssize_t i = self->mt_allocated;
785
786 while (--i >= 0) {
787 Py_XDECREF(self->mt_table[i].me_key);
788 }
789 self->mt_used = 0;
790 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
791 return 0;
792 }
793
794 static void
PyMemoTable_Del(PyMemoTable * self)795 PyMemoTable_Del(PyMemoTable *self)
796 {
797 if (self == NULL)
798 return;
799 PyMemoTable_Clear(self);
800
801 PyMem_FREE(self->mt_table);
802 PyMem_FREE(self);
803 }
804
805 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
806 can be considerably simpler than dictobject.c's lookdict(). */
807 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)808 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
809 {
810 size_t i;
811 size_t perturb;
812 size_t mask = self->mt_mask;
813 PyMemoEntry *table = self->mt_table;
814 PyMemoEntry *entry;
815 Py_hash_t hash = (Py_hash_t)key >> 3;
816
817 i = hash & mask;
818 entry = &table[i];
819 if (entry->me_key == NULL || entry->me_key == key)
820 return entry;
821
822 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
823 i = (i << 2) + i + perturb + 1;
824 entry = &table[i & mask];
825 if (entry->me_key == NULL || entry->me_key == key)
826 return entry;
827 }
828 Py_UNREACHABLE();
829 }
830
831 /* Returns -1 on failure, 0 on success. */
832 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)833 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
834 {
835 PyMemoEntry *oldtable = NULL;
836 PyMemoEntry *oldentry, *newentry;
837 size_t new_size = MT_MINSIZE;
838 size_t to_process;
839
840 assert(min_size > 0);
841
842 if (min_size > PY_SSIZE_T_MAX) {
843 PyErr_NoMemory();
844 return -1;
845 }
846
847 /* Find the smallest valid table size >= min_size. */
848 while (new_size < min_size) {
849 new_size <<= 1;
850 }
851 /* new_size needs to be a power of two. */
852 assert((new_size & (new_size - 1)) == 0);
853
854 /* Allocate new table. */
855 oldtable = self->mt_table;
856 self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
857 if (self->mt_table == NULL) {
858 self->mt_table = oldtable;
859 PyErr_NoMemory();
860 return -1;
861 }
862 self->mt_allocated = new_size;
863 self->mt_mask = new_size - 1;
864 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
865
866 /* Copy entries from the old table. */
867 to_process = self->mt_used;
868 for (oldentry = oldtable; to_process > 0; oldentry++) {
869 if (oldentry->me_key != NULL) {
870 to_process--;
871 /* newentry is a pointer to a chunk of the new
872 mt_table, so we're setting the key:value pair
873 in-place. */
874 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
875 newentry->me_key = oldentry->me_key;
876 newentry->me_value = oldentry->me_value;
877 }
878 }
879
880 /* Deallocate the old table. */
881 PyMem_FREE(oldtable);
882 return 0;
883 }
884
885 /* Returns NULL on failure, a pointer to the value otherwise. */
886 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)887 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
888 {
889 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
890 if (entry->me_key == NULL)
891 return NULL;
892 return &entry->me_value;
893 }
894
895 /* Returns -1 on failure, 0 on success. */
896 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)897 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
898 {
899 PyMemoEntry *entry;
900
901 assert(key != NULL);
902
903 entry = _PyMemoTable_Lookup(self, key);
904 if (entry->me_key != NULL) {
905 entry->me_value = value;
906 return 0;
907 }
908 Py_INCREF(key);
909 entry->me_key = key;
910 entry->me_value = value;
911 self->mt_used++;
912
913 /* If we added a key, we can safely resize. Otherwise just return!
914 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
915 *
916 * Quadrupling the size improves average table sparseness
917 * (reducing collisions) at the cost of some memory. It also halves
918 * the number of expensive resize operations in a growing memo table.
919 *
920 * Very large memo tables (over 50K items) use doubling instead.
921 * This may help applications with severe memory constraints.
922 */
923 if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
924 return 0;
925 }
926 // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
927 size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
928 return _PyMemoTable_ResizeTable(self, desired_size);
929 }
930
931 #undef MT_MINSIZE
932 #undef PERTURB_SHIFT
933
934 /*************************************************************************/
935
936
937 static int
_Pickler_ClearBuffer(PicklerObject * self)938 _Pickler_ClearBuffer(PicklerObject *self)
939 {
940 Py_XSETREF(self->output_buffer,
941 PyBytes_FromStringAndSize(NULL, self->max_output_len));
942 if (self->output_buffer == NULL)
943 return -1;
944 self->output_len = 0;
945 self->frame_start = -1;
946 return 0;
947 }
948
949 static void
_write_size64(char * out,size_t value)950 _write_size64(char *out, size_t value)
951 {
952 size_t i;
953
954 Py_BUILD_ASSERT(sizeof(size_t) <= 8);
955
956 for (i = 0; i < sizeof(size_t); i++) {
957 out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
958 }
959 for (i = sizeof(size_t); i < 8; i++) {
960 out[i] = 0;
961 }
962 }
963
964 static int
_Pickler_CommitFrame(PicklerObject * self)965 _Pickler_CommitFrame(PicklerObject *self)
966 {
967 size_t frame_len;
968 char *qdata;
969
970 if (!self->framing || self->frame_start == -1)
971 return 0;
972 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
973 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
974 if (frame_len >= FRAME_SIZE_MIN) {
975 qdata[0] = FRAME;
976 _write_size64(qdata + 1, frame_len);
977 }
978 else {
979 memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
980 self->output_len -= FRAME_HEADER_SIZE;
981 }
982 self->frame_start = -1;
983 return 0;
984 }
985
986 static PyObject *
_Pickler_GetString(PicklerObject * self)987 _Pickler_GetString(PicklerObject *self)
988 {
989 PyObject *output_buffer = self->output_buffer;
990
991 assert(self->output_buffer != NULL);
992
993 if (_Pickler_CommitFrame(self))
994 return NULL;
995
996 self->output_buffer = NULL;
997 /* Resize down to exact size */
998 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
999 return NULL;
1000 return output_buffer;
1001 }
1002
1003 static int
_Pickler_FlushToFile(PicklerObject * self)1004 _Pickler_FlushToFile(PicklerObject *self)
1005 {
1006 PyObject *output, *result;
1007
1008 assert(self->write != NULL);
1009
1010 /* This will commit the frame first */
1011 output = _Pickler_GetString(self);
1012 if (output == NULL)
1013 return -1;
1014
1015 result = _Pickle_FastCall(self->write, output);
1016 Py_XDECREF(result);
1017 return (result == NULL) ? -1 : 0;
1018 }
1019
1020 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1021 _Pickler_OpcodeBoundary(PicklerObject *self)
1022 {
1023 Py_ssize_t frame_len;
1024
1025 if (!self->framing || self->frame_start == -1) {
1026 return 0;
1027 }
1028 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1029 if (frame_len >= FRAME_SIZE_TARGET) {
1030 if(_Pickler_CommitFrame(self)) {
1031 return -1;
1032 }
1033 /* Flush the content of the committed frame to the underlying
1034 * file and reuse the pickler buffer for the next frame so as
1035 * to limit memory usage when dumping large complex objects to
1036 * a file.
1037 *
1038 * self->write is NULL when called via dumps.
1039 */
1040 if (self->write != NULL) {
1041 if (_Pickler_FlushToFile(self) < 0) {
1042 return -1;
1043 }
1044 if (_Pickler_ClearBuffer(self) < 0) {
1045 return -1;
1046 }
1047 }
1048 }
1049 return 0;
1050 }
1051
1052 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1053 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1054 {
1055 Py_ssize_t i, n, required;
1056 char *buffer;
1057 int need_new_frame;
1058
1059 assert(s != NULL);
1060 need_new_frame = (self->framing && self->frame_start == -1);
1061
1062 if (need_new_frame)
1063 n = data_len + FRAME_HEADER_SIZE;
1064 else
1065 n = data_len;
1066
1067 required = self->output_len + n;
1068 if (required > self->max_output_len) {
1069 /* Make place in buffer for the pickle chunk */
1070 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1071 PyErr_NoMemory();
1072 return -1;
1073 }
1074 self->max_output_len = (self->output_len + n) / 2 * 3;
1075 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1076 return -1;
1077 }
1078 buffer = PyBytes_AS_STRING(self->output_buffer);
1079 if (need_new_frame) {
1080 /* Setup new frame */
1081 Py_ssize_t frame_start = self->output_len;
1082 self->frame_start = frame_start;
1083 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1084 /* Write an invalid value, for debugging */
1085 buffer[frame_start + i] = 0xFE;
1086 }
1087 self->output_len += FRAME_HEADER_SIZE;
1088 }
1089 if (data_len < 8) {
1090 /* This is faster than memcpy when the string is short. */
1091 for (i = 0; i < data_len; i++) {
1092 buffer[self->output_len + i] = s[i];
1093 }
1094 }
1095 else {
1096 memcpy(buffer + self->output_len, s, data_len);
1097 }
1098 self->output_len += data_len;
1099 return data_len;
1100 }
1101
1102 static PicklerObject *
_Pickler_New(void)1103 _Pickler_New(void)
1104 {
1105 PicklerObject *self;
1106
1107 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1108 if (self == NULL)
1109 return NULL;
1110
1111 self->pers_func = NULL;
1112 self->dispatch_table = NULL;
1113 self->buffer_callback = NULL;
1114 self->write = NULL;
1115 self->proto = 0;
1116 self->bin = 0;
1117 self->framing = 0;
1118 self->frame_start = -1;
1119 self->fast = 0;
1120 self->fast_nesting = 0;
1121 self->fix_imports = 0;
1122 self->fast_memo = NULL;
1123 self->max_output_len = WRITE_BUF_SIZE;
1124 self->output_len = 0;
1125 self->reducer_override = NULL;
1126
1127 self->memo = PyMemoTable_New();
1128 self->output_buffer = PyBytes_FromStringAndSize(NULL,
1129 self->max_output_len);
1130
1131 if (self->memo == NULL || self->output_buffer == NULL) {
1132 Py_DECREF(self);
1133 return NULL;
1134 }
1135
1136 PyObject_GC_Track(self);
1137 return self;
1138 }
1139
1140 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1141 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1142 {
1143 long proto;
1144
1145 if (protocol == Py_None) {
1146 proto = DEFAULT_PROTOCOL;
1147 }
1148 else {
1149 proto = PyLong_AsLong(protocol);
1150 if (proto < 0) {
1151 if (proto == -1 && PyErr_Occurred())
1152 return -1;
1153 proto = HIGHEST_PROTOCOL;
1154 }
1155 else if (proto > HIGHEST_PROTOCOL) {
1156 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1157 HIGHEST_PROTOCOL);
1158 return -1;
1159 }
1160 }
1161 self->proto = (int)proto;
1162 self->bin = proto > 0;
1163 self->fix_imports = fix_imports && proto < 3;
1164 return 0;
1165 }
1166
1167 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1168 be called once on a freshly created Pickler. */
1169 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1170 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1171 {
1172 _Py_IDENTIFIER(write);
1173 assert(file != NULL);
1174 if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1175 return -1;
1176 }
1177 if (self->write == NULL) {
1178 PyErr_SetString(PyExc_TypeError,
1179 "file must have a 'write' attribute");
1180 return -1;
1181 }
1182
1183 return 0;
1184 }
1185
1186 static int
_Pickler_SetBufferCallback(PicklerObject * self,PyObject * buffer_callback)1187 _Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1188 {
1189 if (buffer_callback == Py_None) {
1190 buffer_callback = NULL;
1191 }
1192 if (buffer_callback != NULL && self->proto < 5) {
1193 PyErr_SetString(PyExc_ValueError,
1194 "buffer_callback needs protocol >= 5");
1195 return -1;
1196 }
1197
1198 Py_XINCREF(buffer_callback);
1199 self->buffer_callback = buffer_callback;
1200 return 0;
1201 }
1202
1203 /* Returns the size of the input on success, -1 on failure. This takes its
1204 own reference to `input`. */
1205 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1206 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1207 {
1208 if (self->buffer.buf != NULL)
1209 PyBuffer_Release(&self->buffer);
1210 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1211 return -1;
1212 self->input_buffer = self->buffer.buf;
1213 self->input_len = self->buffer.len;
1214 self->next_read_idx = 0;
1215 self->prefetched_idx = self->input_len;
1216 return self->input_len;
1217 }
1218
1219 static int
bad_readline(void)1220 bad_readline(void)
1221 {
1222 PickleState *st = _Pickle_GetGlobalState();
1223 PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1224 return -1;
1225 }
1226
1227 /* Skip any consumed data that was only prefetched using peek() */
1228 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1229 _Unpickler_SkipConsumed(UnpicklerObject *self)
1230 {
1231 Py_ssize_t consumed;
1232 PyObject *r;
1233
1234 consumed = self->next_read_idx - self->prefetched_idx;
1235 if (consumed <= 0)
1236 return 0;
1237
1238 assert(self->peek); /* otherwise we did something wrong */
1239 /* This makes a useless copy... */
1240 r = PyObject_CallFunction(self->read, "n", consumed);
1241 if (r == NULL)
1242 return -1;
1243 Py_DECREF(r);
1244
1245 self->prefetched_idx = self->next_read_idx;
1246 return 0;
1247 }
1248
1249 static const Py_ssize_t READ_WHOLE_LINE = -1;
1250
1251 /* If reading from a file, we need to only pull the bytes we need, since there
1252 may be multiple pickle objects arranged contiguously in the same input
1253 buffer.
1254
1255 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1256 bytes from the input stream/buffer.
1257
1258 Update the unpickler's input buffer with the newly-read data. Returns -1 on
1259 failure; on success, returns the number of bytes read from the file.
1260
1261 On success, self->input_len will be 0; this is intentional so that when
1262 unpickling from a file, the "we've run out of data" code paths will trigger,
1263 causing the Unpickler to go back to the file for more data. Use the returned
1264 size to tell you how much data you can process. */
1265 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1266 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1267 {
1268 PyObject *data;
1269 Py_ssize_t read_size;
1270
1271 assert(self->read != NULL);
1272
1273 if (_Unpickler_SkipConsumed(self) < 0)
1274 return -1;
1275
1276 if (n == READ_WHOLE_LINE) {
1277 data = _PyObject_CallNoArg(self->readline);
1278 }
1279 else {
1280 PyObject *len;
1281 /* Prefetch some data without advancing the file pointer, if possible */
1282 if (self->peek && n < PREFETCH) {
1283 len = PyLong_FromSsize_t(PREFETCH);
1284 if (len == NULL)
1285 return -1;
1286 data = _Pickle_FastCall(self->peek, len);
1287 if (data == NULL) {
1288 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1289 return -1;
1290 /* peek() is probably not supported by the given file object */
1291 PyErr_Clear();
1292 Py_CLEAR(self->peek);
1293 }
1294 else {
1295 read_size = _Unpickler_SetStringInput(self, data);
1296 Py_DECREF(data);
1297 self->prefetched_idx = 0;
1298 if (n <= read_size)
1299 return n;
1300 }
1301 }
1302 len = PyLong_FromSsize_t(n);
1303 if (len == NULL)
1304 return -1;
1305 data = _Pickle_FastCall(self->read, len);
1306 }
1307 if (data == NULL)
1308 return -1;
1309
1310 read_size = _Unpickler_SetStringInput(self, data);
1311 Py_DECREF(data);
1312 return read_size;
1313 }
1314
1315 /* Don't call it directly: use _Unpickler_Read() */
1316 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,char ** s,Py_ssize_t n)1317 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1318 {
1319 Py_ssize_t num_read;
1320
1321 *s = NULL;
1322 if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1323 PickleState *st = _Pickle_GetGlobalState();
1324 PyErr_SetString(st->UnpicklingError,
1325 "read would overflow (invalid bytecode)");
1326 return -1;
1327 }
1328
1329 /* This case is handled by the _Unpickler_Read() macro for efficiency */
1330 assert(self->next_read_idx + n > self->input_len);
1331
1332 if (!self->read)
1333 return bad_readline();
1334
1335 /* Extend the buffer to satisfy desired size */
1336 num_read = _Unpickler_ReadFromFile(self, n);
1337 if (num_read < 0)
1338 return -1;
1339 if (num_read < n)
1340 return bad_readline();
1341 *s = self->input_buffer;
1342 self->next_read_idx = n;
1343 return n;
1344 }
1345
1346 /* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1347 *
1348 * This should only be used for non-small data reads where potentially
1349 * avoiding a copy is beneficial. This method does not try to prefetch
1350 * more data into the input buffer.
1351 *
1352 * _Unpickler_Read() is recommended in most cases.
1353 */
1354 static Py_ssize_t
_Unpickler_ReadInto(UnpicklerObject * self,char * buf,Py_ssize_t n)1355 _Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
1356 {
1357 assert(n != READ_WHOLE_LINE);
1358
1359 /* Read from available buffer data, if any */
1360 Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1361 if (in_buffer > 0) {
1362 Py_ssize_t to_read = Py_MIN(in_buffer, n);
1363 memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1364 self->next_read_idx += to_read;
1365 buf += to_read;
1366 n -= to_read;
1367 if (n == 0) {
1368 /* Entire read was satisfied from buffer */
1369 return n;
1370 }
1371 }
1372
1373 /* Read from file */
1374 if (!self->readinto) {
1375 return bad_readline();
1376 }
1377 if (_Unpickler_SkipConsumed(self) < 0) {
1378 return -1;
1379 }
1380
1381 /* Call readinto() into user buffer */
1382 PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1383 if (buf_obj == NULL) {
1384 return -1;
1385 }
1386 PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1387 if (read_size_obj == NULL) {
1388 return -1;
1389 }
1390 Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1391 Py_DECREF(read_size_obj);
1392
1393 if (read_size < 0) {
1394 if (!PyErr_Occurred()) {
1395 PyErr_SetString(PyExc_ValueError,
1396 "readinto() returned negative size");
1397 }
1398 return -1;
1399 }
1400 if (read_size < n) {
1401 return bad_readline();
1402 }
1403 return n;
1404 }
1405
1406 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1407
1408 This should be used for all data reads, rather than accessing the unpickler's
1409 input buffer directly. This method deals correctly with reading from input
1410 streams, which the input buffer doesn't deal with.
1411
1412 Note that when reading from a file-like object, self->next_read_idx won't
1413 be updated (it should remain at 0 for the entire unpickling process). You
1414 should use this function's return value to know how many bytes you can
1415 consume.
1416
1417 Returns -1 (with an exception set) on failure. On success, return the
1418 number of chars read. */
1419 #define _Unpickler_Read(self, s, n) \
1420 (((n) <= (self)->input_len - (self)->next_read_idx) \
1421 ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1422 (self)->next_read_idx += (n), \
1423 (n)) \
1424 : _Unpickler_ReadImpl(self, (s), (n)))
1425
1426 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1427 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1428 char **result)
1429 {
1430 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1431 if (input_line == NULL) {
1432 PyErr_NoMemory();
1433 return -1;
1434 }
1435
1436 memcpy(input_line, line, len);
1437 input_line[len] = '\0';
1438 self->input_line = input_line;
1439 *result = self->input_line;
1440 return len;
1441 }
1442
1443 /* Read a line from the input stream/buffer. If we run off the end of the input
1444 before hitting \n, raise an error.
1445
1446 Returns the number of chars read, or -1 on failure. */
1447 static Py_ssize_t
_Unpickler_Readline(UnpicklerObject * self,char ** result)1448 _Unpickler_Readline(UnpicklerObject *self, char **result)
1449 {
1450 Py_ssize_t i, num_read;
1451
1452 for (i = self->next_read_idx; i < self->input_len; i++) {
1453 if (self->input_buffer[i] == '\n') {
1454 char *line_start = self->input_buffer + self->next_read_idx;
1455 num_read = i - self->next_read_idx + 1;
1456 self->next_read_idx = i + 1;
1457 return _Unpickler_CopyLine(self, line_start, num_read, result);
1458 }
1459 }
1460 if (!self->read)
1461 return bad_readline();
1462
1463 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1464 if (num_read < 0)
1465 return -1;
1466 if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1467 return bad_readline();
1468 self->next_read_idx = num_read;
1469 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1470 }
1471
1472 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1473 will be modified in place. */
1474 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1475 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1476 {
1477 size_t i;
1478
1479 assert(new_size > self->memo_size);
1480
1481 PyObject **memo_new = self->memo;
1482 PyMem_RESIZE(memo_new, PyObject *, new_size);
1483 if (memo_new == NULL) {
1484 PyErr_NoMemory();
1485 return -1;
1486 }
1487 self->memo = memo_new;
1488 for (i = self->memo_size; i < new_size; i++)
1489 self->memo[i] = NULL;
1490 self->memo_size = new_size;
1491 return 0;
1492 }
1493
1494 /* Returns NULL if idx is out of bounds. */
1495 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1496 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1497 {
1498 if (idx >= self->memo_size)
1499 return NULL;
1500
1501 return self->memo[idx];
1502 }
1503
1504 /* Returns -1 (with an exception set) on failure, 0 on success.
1505 This takes its own reference to `value`. */
1506 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1507 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1508 {
1509 PyObject *old_item;
1510
1511 if (idx >= self->memo_size) {
1512 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1513 return -1;
1514 assert(idx < self->memo_size);
1515 }
1516 Py_INCREF(value);
1517 old_item = self->memo[idx];
1518 self->memo[idx] = value;
1519 if (old_item != NULL) {
1520 Py_DECREF(old_item);
1521 }
1522 else {
1523 self->memo_len++;
1524 }
1525 return 0;
1526 }
1527
1528 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1529 _Unpickler_NewMemo(Py_ssize_t new_size)
1530 {
1531 PyObject **memo = PyMem_NEW(PyObject *, new_size);
1532 if (memo == NULL) {
1533 PyErr_NoMemory();
1534 return NULL;
1535 }
1536 memset(memo, 0, new_size * sizeof(PyObject *));
1537 return memo;
1538 }
1539
1540 /* Free the unpickler's memo, taking care to decref any items left in it. */
1541 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1542 _Unpickler_MemoCleanup(UnpicklerObject *self)
1543 {
1544 Py_ssize_t i;
1545 PyObject **memo = self->memo;
1546
1547 if (self->memo == NULL)
1548 return;
1549 self->memo = NULL;
1550 i = self->memo_size;
1551 while (--i >= 0) {
1552 Py_XDECREF(memo[i]);
1553 }
1554 PyMem_FREE(memo);
1555 }
1556
1557 static UnpicklerObject *
_Unpickler_New(void)1558 _Unpickler_New(void)
1559 {
1560 UnpicklerObject *self;
1561
1562 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1563 if (self == NULL)
1564 return NULL;
1565
1566 self->pers_func = NULL;
1567 self->input_buffer = NULL;
1568 self->input_line = NULL;
1569 self->input_len = 0;
1570 self->next_read_idx = 0;
1571 self->prefetched_idx = 0;
1572 self->read = NULL;
1573 self->readinto = NULL;
1574 self->readline = NULL;
1575 self->peek = NULL;
1576 self->buffers = NULL;
1577 self->encoding = NULL;
1578 self->errors = NULL;
1579 self->marks = NULL;
1580 self->num_marks = 0;
1581 self->marks_size = 0;
1582 self->proto = 0;
1583 self->fix_imports = 0;
1584 memset(&self->buffer, 0, sizeof(Py_buffer));
1585 self->memo_size = 32;
1586 self->memo_len = 0;
1587 self->memo = _Unpickler_NewMemo(self->memo_size);
1588 self->stack = (Pdata *)Pdata_New();
1589
1590 if (self->memo == NULL || self->stack == NULL) {
1591 Py_DECREF(self);
1592 return NULL;
1593 }
1594
1595 PyObject_GC_Track(self);
1596 return self;
1597 }
1598
1599 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1600 be called once on a freshly created Unpickler. */
1601 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1602 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1603 {
1604 _Py_IDENTIFIER(peek);
1605 _Py_IDENTIFIER(read);
1606 _Py_IDENTIFIER(readinto);
1607 _Py_IDENTIFIER(readline);
1608
1609 if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1610 return -1;
1611 }
1612 (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
1613 (void)_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto);
1614 (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
1615 if (!self->readline || !self->readinto || !self->read) {
1616 if (!PyErr_Occurred()) {
1617 PyErr_SetString(PyExc_TypeError,
1618 "file must have 'read', 'readinto' and "
1619 "'readline' attributes");
1620 }
1621 Py_CLEAR(self->read);
1622 Py_CLEAR(self->readinto);
1623 Py_CLEAR(self->readline);
1624 Py_CLEAR(self->peek);
1625 return -1;
1626 }
1627 return 0;
1628 }
1629
1630 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1631 be called once on a freshly created Unpickler. */
1632 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1633 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1634 const char *encoding,
1635 const char *errors)
1636 {
1637 if (encoding == NULL)
1638 encoding = "ASCII";
1639 if (errors == NULL)
1640 errors = "strict";
1641
1642 self->encoding = _PyMem_Strdup(encoding);
1643 self->errors = _PyMem_Strdup(errors);
1644 if (self->encoding == NULL || self->errors == NULL) {
1645 PyErr_NoMemory();
1646 return -1;
1647 }
1648 return 0;
1649 }
1650
1651 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1652 be called once on a freshly created Unpickler. */
1653 static int
_Unpickler_SetBuffers(UnpicklerObject * self,PyObject * buffers)1654 _Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1655 {
1656 if (buffers == NULL || buffers == Py_None) {
1657 self->buffers = NULL;
1658 }
1659 else {
1660 self->buffers = PyObject_GetIter(buffers);
1661 if (self->buffers == NULL) {
1662 return -1;
1663 }
1664 }
1665 return 0;
1666 }
1667
1668 /* Generate a GET opcode for an object stored in the memo. */
1669 static int
memo_get(PicklerObject * self,PyObject * key)1670 memo_get(PicklerObject *self, PyObject *key)
1671 {
1672 Py_ssize_t *value;
1673 char pdata[30];
1674 Py_ssize_t len;
1675
1676 value = PyMemoTable_Get(self->memo, key);
1677 if (value == NULL) {
1678 PyErr_SetObject(PyExc_KeyError, key);
1679 return -1;
1680 }
1681
1682 if (!self->bin) {
1683 pdata[0] = GET;
1684 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1685 "%" PY_FORMAT_SIZE_T "d\n", *value);
1686 len = strlen(pdata);
1687 }
1688 else {
1689 if (*value < 256) {
1690 pdata[0] = BINGET;
1691 pdata[1] = (unsigned char)(*value & 0xff);
1692 len = 2;
1693 }
1694 else if ((size_t)*value <= 0xffffffffUL) {
1695 pdata[0] = LONG_BINGET;
1696 pdata[1] = (unsigned char)(*value & 0xff);
1697 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1698 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1699 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1700 len = 5;
1701 }
1702 else { /* unlikely */
1703 PickleState *st = _Pickle_GetGlobalState();
1704 PyErr_SetString(st->PicklingError,
1705 "memo id too large for LONG_BINGET");
1706 return -1;
1707 }
1708 }
1709
1710 if (_Pickler_Write(self, pdata, len) < 0)
1711 return -1;
1712
1713 return 0;
1714 }
1715
1716 /* Store an object in the memo, assign it a new unique ID based on the number
1717 of objects currently stored in the memo and generate a PUT opcode. */
1718 static int
memo_put(PicklerObject * self,PyObject * obj)1719 memo_put(PicklerObject *self, PyObject *obj)
1720 {
1721 char pdata[30];
1722 Py_ssize_t len;
1723 Py_ssize_t idx;
1724
1725 const char memoize_op = MEMOIZE;
1726
1727 if (self->fast)
1728 return 0;
1729
1730 idx = PyMemoTable_Size(self->memo);
1731 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1732 return -1;
1733
1734 if (self->proto >= 4) {
1735 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1736 return -1;
1737 return 0;
1738 }
1739 else if (!self->bin) {
1740 pdata[0] = PUT;
1741 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1742 "%" PY_FORMAT_SIZE_T "d\n", idx);
1743 len = strlen(pdata);
1744 }
1745 else {
1746 if (idx < 256) {
1747 pdata[0] = BINPUT;
1748 pdata[1] = (unsigned char)idx;
1749 len = 2;
1750 }
1751 else if ((size_t)idx <= 0xffffffffUL) {
1752 pdata[0] = LONG_BINPUT;
1753 pdata[1] = (unsigned char)(idx & 0xff);
1754 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1755 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1756 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1757 len = 5;
1758 }
1759 else { /* unlikely */
1760 PickleState *st = _Pickle_GetGlobalState();
1761 PyErr_SetString(st->PicklingError,
1762 "memo id too large for LONG_BINPUT");
1763 return -1;
1764 }
1765 }
1766 if (_Pickler_Write(self, pdata, len) < 0)
1767 return -1;
1768
1769 return 0;
1770 }
1771
1772 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1773 get_dotted_path(PyObject *obj, PyObject *name)
1774 {
1775 _Py_static_string(PyId_dot, ".");
1776 PyObject *dotted_path;
1777 Py_ssize_t i, n;
1778
1779 dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1780 if (dotted_path == NULL)
1781 return NULL;
1782 n = PyList_GET_SIZE(dotted_path);
1783 assert(n >= 1);
1784 for (i = 0; i < n; i++) {
1785 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1786 if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1787 if (obj == NULL)
1788 PyErr_Format(PyExc_AttributeError,
1789 "Can't pickle local object %R", name);
1790 else
1791 PyErr_Format(PyExc_AttributeError,
1792 "Can't pickle local attribute %R on %R", name, obj);
1793 Py_DECREF(dotted_path);
1794 return NULL;
1795 }
1796 }
1797 return dotted_path;
1798 }
1799
1800 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1801 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1802 {
1803 Py_ssize_t i, n;
1804 PyObject *parent = NULL;
1805
1806 assert(PyList_CheckExact(names));
1807 Py_INCREF(obj);
1808 n = PyList_GET_SIZE(names);
1809 for (i = 0; i < n; i++) {
1810 PyObject *name = PyList_GET_ITEM(names, i);
1811 Py_XDECREF(parent);
1812 parent = obj;
1813 (void)_PyObject_LookupAttr(parent, name, &obj);
1814 if (obj == NULL) {
1815 Py_DECREF(parent);
1816 return NULL;
1817 }
1818 }
1819 if (pparent != NULL)
1820 *pparent = parent;
1821 else
1822 Py_XDECREF(parent);
1823 return obj;
1824 }
1825
1826
1827 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1828 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1829 {
1830 PyObject *dotted_path, *attr;
1831
1832 if (allow_qualname) {
1833 dotted_path = get_dotted_path(obj, name);
1834 if (dotted_path == NULL)
1835 return NULL;
1836 attr = get_deep_attribute(obj, dotted_path, NULL);
1837 Py_DECREF(dotted_path);
1838 }
1839 else {
1840 (void)_PyObject_LookupAttr(obj, name, &attr);
1841 }
1842 if (attr == NULL && !PyErr_Occurred()) {
1843 PyErr_Format(PyExc_AttributeError,
1844 "Can't get attribute %R on %R", name, obj);
1845 }
1846 return attr;
1847 }
1848
1849 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1850 _checkmodule(PyObject *module_name, PyObject *module,
1851 PyObject *global, PyObject *dotted_path)
1852 {
1853 if (module == Py_None) {
1854 return -1;
1855 }
1856 if (PyUnicode_Check(module_name) &&
1857 _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1858 return -1;
1859 }
1860
1861 PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1862 if (candidate == NULL) {
1863 return -1;
1864 }
1865 if (candidate != global) {
1866 Py_DECREF(candidate);
1867 return -1;
1868 }
1869 Py_DECREF(candidate);
1870 return 0;
1871 }
1872
1873 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1874 whichmodule(PyObject *global, PyObject *dotted_path)
1875 {
1876 PyObject *module_name;
1877 PyObject *module = NULL;
1878 Py_ssize_t i;
1879 PyObject *modules;
1880 _Py_IDENTIFIER(__module__);
1881 _Py_IDENTIFIER(modules);
1882 _Py_IDENTIFIER(__main__);
1883
1884 if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1885 return NULL;
1886 }
1887 if (module_name) {
1888 /* In some rare cases (e.g., bound methods of extension types),
1889 __module__ can be None. If it is so, then search sys.modules for
1890 the module of global. */
1891 if (module_name != Py_None)
1892 return module_name;
1893 Py_CLEAR(module_name);
1894 }
1895 assert(module_name == NULL);
1896
1897 /* Fallback on walking sys.modules */
1898 modules = _PySys_GetObjectId(&PyId_modules);
1899 if (modules == NULL) {
1900 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1901 return NULL;
1902 }
1903 if (PyDict_CheckExact(modules)) {
1904 i = 0;
1905 while (PyDict_Next(modules, &i, &module_name, &module)) {
1906 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1907 Py_INCREF(module_name);
1908 return module_name;
1909 }
1910 if (PyErr_Occurred()) {
1911 return NULL;
1912 }
1913 }
1914 }
1915 else {
1916 PyObject *iterator = PyObject_GetIter(modules);
1917 if (iterator == NULL) {
1918 return NULL;
1919 }
1920 while ((module_name = PyIter_Next(iterator))) {
1921 module = PyObject_GetItem(modules, module_name);
1922 if (module == NULL) {
1923 Py_DECREF(module_name);
1924 Py_DECREF(iterator);
1925 return NULL;
1926 }
1927 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1928 Py_DECREF(module);
1929 Py_DECREF(iterator);
1930 return module_name;
1931 }
1932 Py_DECREF(module);
1933 Py_DECREF(module_name);
1934 if (PyErr_Occurred()) {
1935 Py_DECREF(iterator);
1936 return NULL;
1937 }
1938 }
1939 Py_DECREF(iterator);
1940 }
1941
1942 /* If no module is found, use __main__. */
1943 module_name = _PyUnicode_FromId(&PyId___main__);
1944 Py_XINCREF(module_name);
1945 return module_name;
1946 }
1947
1948 /* fast_save_enter() and fast_save_leave() are guards against recursive
1949 objects when Pickler is used with the "fast mode" (i.e., with object
1950 memoization disabled). If the nesting of a list or dict object exceed
1951 FAST_NESTING_LIMIT, these guards will start keeping an internal
1952 reference to the seen list or dict objects and check whether these objects
1953 are recursive. These are not strictly necessary, since save() has a
1954 hard-coded recursion limit, but they give a nicer error message than the
1955 typical RuntimeError. */
1956 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1957 fast_save_enter(PicklerObject *self, PyObject *obj)
1958 {
1959 /* if fast_nesting < 0, we're doing an error exit. */
1960 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1961 PyObject *key = NULL;
1962 if (self->fast_memo == NULL) {
1963 self->fast_memo = PyDict_New();
1964 if (self->fast_memo == NULL) {
1965 self->fast_nesting = -1;
1966 return 0;
1967 }
1968 }
1969 key = PyLong_FromVoidPtr(obj);
1970 if (key == NULL) {
1971 self->fast_nesting = -1;
1972 return 0;
1973 }
1974 if (PyDict_GetItemWithError(self->fast_memo, key)) {
1975 Py_DECREF(key);
1976 PyErr_Format(PyExc_ValueError,
1977 "fast mode: can't pickle cyclic objects "
1978 "including object type %.200s at %p",
1979 obj->ob_type->tp_name, obj);
1980 self->fast_nesting = -1;
1981 return 0;
1982 }
1983 if (PyErr_Occurred()) {
1984 Py_DECREF(key);
1985 self->fast_nesting = -1;
1986 return 0;
1987 }
1988 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1989 Py_DECREF(key);
1990 self->fast_nesting = -1;
1991 return 0;
1992 }
1993 Py_DECREF(key);
1994 }
1995 return 1;
1996 }
1997
1998 static int
fast_save_leave(PicklerObject * self,PyObject * obj)1999 fast_save_leave(PicklerObject *self, PyObject *obj)
2000 {
2001 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2002 PyObject *key = PyLong_FromVoidPtr(obj);
2003 if (key == NULL)
2004 return 0;
2005 if (PyDict_DelItem(self->fast_memo, key) < 0) {
2006 Py_DECREF(key);
2007 return 0;
2008 }
2009 Py_DECREF(key);
2010 }
2011 return 1;
2012 }
2013
2014 static int
save_none(PicklerObject * self,PyObject * obj)2015 save_none(PicklerObject *self, PyObject *obj)
2016 {
2017 const char none_op = NONE;
2018 if (_Pickler_Write(self, &none_op, 1) < 0)
2019 return -1;
2020
2021 return 0;
2022 }
2023
2024 static int
save_bool(PicklerObject * self,PyObject * obj)2025 save_bool(PicklerObject *self, PyObject *obj)
2026 {
2027 if (self->proto >= 2) {
2028 const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
2029 if (_Pickler_Write(self, &bool_op, 1) < 0)
2030 return -1;
2031 }
2032 else {
2033 /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2034 * so that unpicklers written before bools were introduced unpickle them
2035 * as ints, but unpicklers after can recognize that bools were intended.
2036 * Note that protocol 2 added direct ways to pickle bools.
2037 */
2038 const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2039 if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2040 return -1;
2041 }
2042 return 0;
2043 }
2044
2045 static int
save_long(PicklerObject * self,PyObject * obj)2046 save_long(PicklerObject *self, PyObject *obj)
2047 {
2048 PyObject *repr = NULL;
2049 Py_ssize_t size;
2050 long val;
2051 int overflow;
2052 int status = 0;
2053
2054 val= PyLong_AsLongAndOverflow(obj, &overflow);
2055 if (!overflow && (sizeof(long) <= 4 ||
2056 (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2057 {
2058 /* result fits in a signed 4-byte integer.
2059
2060 Note: we can't use -0x80000000L in the above condition because some
2061 compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2062 before applying the unary minus when sizeof(long) <= 4. The
2063 resulting value stays unsigned which is commonly not what we want,
2064 so MSVC happily warns us about it. However, that result would have
2065 been fine because we guard for sizeof(long) <= 4 which turns the
2066 condition true in that particular case. */
2067 char pdata[32];
2068 Py_ssize_t len = 0;
2069
2070 if (self->bin) {
2071 pdata[1] = (unsigned char)(val & 0xff);
2072 pdata[2] = (unsigned char)((val >> 8) & 0xff);
2073 pdata[3] = (unsigned char)((val >> 16) & 0xff);
2074 pdata[4] = (unsigned char)((val >> 24) & 0xff);
2075
2076 if ((pdata[4] != 0) || (pdata[3] != 0)) {
2077 pdata[0] = BININT;
2078 len = 5;
2079 }
2080 else if (pdata[2] != 0) {
2081 pdata[0] = BININT2;
2082 len = 3;
2083 }
2084 else {
2085 pdata[0] = BININT1;
2086 len = 2;
2087 }
2088 }
2089 else {
2090 sprintf(pdata, "%c%ld\n", INT, val);
2091 len = strlen(pdata);
2092 }
2093 if (_Pickler_Write(self, pdata, len) < 0)
2094 return -1;
2095
2096 return 0;
2097 }
2098 assert(!PyErr_Occurred());
2099
2100 if (self->proto >= 2) {
2101 /* Linear-time pickling. */
2102 size_t nbits;
2103 size_t nbytes;
2104 unsigned char *pdata;
2105 char header[5];
2106 int i;
2107 int sign = _PyLong_Sign(obj);
2108
2109 if (sign == 0) {
2110 header[0] = LONG1;
2111 header[1] = 0; /* It's 0 -- an empty bytestring. */
2112 if (_Pickler_Write(self, header, 2) < 0)
2113 goto error;
2114 return 0;
2115 }
2116 nbits = _PyLong_NumBits(obj);
2117 if (nbits == (size_t)-1 && PyErr_Occurred())
2118 goto error;
2119 /* How many bytes do we need? There are nbits >> 3 full
2120 * bytes of data, and nbits & 7 leftover bits. If there
2121 * are any leftover bits, then we clearly need another
2122 * byte. What's not so obvious is that we *probably*
2123 * need another byte even if there aren't any leftovers:
2124 * the most-significant bit of the most-significant byte
2125 * acts like a sign bit, and it's usually got a sense
2126 * opposite of the one we need. The exception is ints
2127 * of the form -(2**(8*j-1)) for j > 0. Such an int is
2128 * its own 256's-complement, so has the right sign bit
2129 * even without the extra byte. That's a pain to check
2130 * for in advance, though, so we always grab an extra
2131 * byte at the start, and cut it back later if possible.
2132 */
2133 nbytes = (nbits >> 3) + 1;
2134 if (nbytes > 0x7fffffffL) {
2135 PyErr_SetString(PyExc_OverflowError,
2136 "int too large to pickle");
2137 goto error;
2138 }
2139 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2140 if (repr == NULL)
2141 goto error;
2142 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2143 i = _PyLong_AsByteArray((PyLongObject *)obj,
2144 pdata, nbytes,
2145 1 /* little endian */ , 1 /* signed */ );
2146 if (i < 0)
2147 goto error;
2148 /* If the int is negative, this may be a byte more than
2149 * needed. This is so iff the MSB is all redundant sign
2150 * bits.
2151 */
2152 if (sign < 0 &&
2153 nbytes > 1 &&
2154 pdata[nbytes - 1] == 0xff &&
2155 (pdata[nbytes - 2] & 0x80) != 0) {
2156 nbytes--;
2157 }
2158
2159 if (nbytes < 256) {
2160 header[0] = LONG1;
2161 header[1] = (unsigned char)nbytes;
2162 size = 2;
2163 }
2164 else {
2165 header[0] = LONG4;
2166 size = (Py_ssize_t) nbytes;
2167 for (i = 1; i < 5; i++) {
2168 header[i] = (unsigned char)(size & 0xff);
2169 size >>= 8;
2170 }
2171 size = 5;
2172 }
2173 if (_Pickler_Write(self, header, size) < 0 ||
2174 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2175 goto error;
2176 }
2177 else {
2178 const char long_op = LONG;
2179 const char *string;
2180
2181 /* proto < 2: write the repr and newline. This is quadratic-time (in
2182 the number of digits), in both directions. We add a trailing 'L'
2183 to the repr, for compatibility with Python 2.x. */
2184
2185 repr = PyObject_Repr(obj);
2186 if (repr == NULL)
2187 goto error;
2188
2189 string = PyUnicode_AsUTF8AndSize(repr, &size);
2190 if (string == NULL)
2191 goto error;
2192
2193 if (_Pickler_Write(self, &long_op, 1) < 0 ||
2194 _Pickler_Write(self, string, size) < 0 ||
2195 _Pickler_Write(self, "L\n", 2) < 0)
2196 goto error;
2197 }
2198
2199 if (0) {
2200 error:
2201 status = -1;
2202 }
2203 Py_XDECREF(repr);
2204
2205 return status;
2206 }
2207
2208 static int
save_float(PicklerObject * self,PyObject * obj)2209 save_float(PicklerObject *self, PyObject *obj)
2210 {
2211 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2212
2213 if (self->bin) {
2214 char pdata[9];
2215 pdata[0] = BINFLOAT;
2216 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2217 return -1;
2218 if (_Pickler_Write(self, pdata, 9) < 0)
2219 return -1;
2220 }
2221 else {
2222 int result = -1;
2223 char *buf = NULL;
2224 char op = FLOAT;
2225
2226 if (_Pickler_Write(self, &op, 1) < 0)
2227 goto done;
2228
2229 buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2230 if (!buf) {
2231 PyErr_NoMemory();
2232 goto done;
2233 }
2234
2235 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2236 goto done;
2237
2238 if (_Pickler_Write(self, "\n", 1) < 0)
2239 goto done;
2240
2241 result = 0;
2242 done:
2243 PyMem_Free(buf);
2244 return result;
2245 }
2246
2247 return 0;
2248 }
2249
2250 /* Perform direct write of the header and payload of the binary object.
2251
2252 The large contiguous data is written directly into the underlying file
2253 object, bypassing the output_buffer of the Pickler. We intentionally
2254 do not insert a protocol 4 frame opcode to make it possible to optimize
2255 file.read calls in the loader.
2256 */
2257 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2258 _Pickler_write_bytes(PicklerObject *self,
2259 const char *header, Py_ssize_t header_size,
2260 const char *data, Py_ssize_t data_size,
2261 PyObject *payload)
2262 {
2263 int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2264 int framing = self->framing;
2265
2266 if (bypass_buffer) {
2267 assert(self->output_buffer != NULL);
2268 /* Commit the previous frame. */
2269 if (_Pickler_CommitFrame(self)) {
2270 return -1;
2271 }
2272 /* Disable framing temporarily */
2273 self->framing = 0;
2274 }
2275
2276 if (_Pickler_Write(self, header, header_size) < 0) {
2277 return -1;
2278 }
2279
2280 if (bypass_buffer && self->write != NULL) {
2281 /* Bypass the in-memory buffer to directly stream large data
2282 into the underlying file object. */
2283 PyObject *result, *mem = NULL;
2284 /* Dump the output buffer to the file. */
2285 if (_Pickler_FlushToFile(self) < 0) {
2286 return -1;
2287 }
2288
2289 /* Stream write the payload into the file without going through the
2290 output buffer. */
2291 if (payload == NULL) {
2292 /* TODO: It would be better to use a memoryview with a linked
2293 original string if this is possible. */
2294 payload = mem = PyBytes_FromStringAndSize(data, data_size);
2295 if (payload == NULL) {
2296 return -1;
2297 }
2298 }
2299 result = PyObject_CallFunctionObjArgs(self->write, payload, NULL);
2300 Py_XDECREF(mem);
2301 if (result == NULL) {
2302 return -1;
2303 }
2304 Py_DECREF(result);
2305
2306 /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2307 if (_Pickler_ClearBuffer(self) < 0) {
2308 return -1;
2309 }
2310 }
2311 else {
2312 if (_Pickler_Write(self, data, data_size) < 0) {
2313 return -1;
2314 }
2315 }
2316
2317 /* Re-enable framing for subsequent calls to _Pickler_Write. */
2318 self->framing = framing;
2319
2320 return 0;
2321 }
2322
2323 static int
_save_bytes_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2324 _save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
2325 Py_ssize_t size)
2326 {
2327 assert(self->proto >= 3);
2328
2329 char header[9];
2330 Py_ssize_t len;
2331
2332 if (size < 0)
2333 return -1;
2334
2335 if (size <= 0xff) {
2336 header[0] = SHORT_BINBYTES;
2337 header[1] = (unsigned char)size;
2338 len = 2;
2339 }
2340 else if ((size_t)size <= 0xffffffffUL) {
2341 header[0] = BINBYTES;
2342 header[1] = (unsigned char)(size & 0xff);
2343 header[2] = (unsigned char)((size >> 8) & 0xff);
2344 header[3] = (unsigned char)((size >> 16) & 0xff);
2345 header[4] = (unsigned char)((size >> 24) & 0xff);
2346 len = 5;
2347 }
2348 else if (self->proto >= 4) {
2349 header[0] = BINBYTES8;
2350 _write_size64(header + 1, size);
2351 len = 9;
2352 }
2353 else {
2354 PyErr_SetString(PyExc_OverflowError,
2355 "serializing a bytes object larger than 4 GiB "
2356 "requires pickle protocol 4 or higher");
2357 return -1;
2358 }
2359
2360 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2361 return -1;
2362 }
2363
2364 if (memo_put(self, obj) < 0) {
2365 return -1;
2366 }
2367
2368 return 0;
2369 }
2370
2371 static int
save_bytes(PicklerObject * self,PyObject * obj)2372 save_bytes(PicklerObject *self, PyObject *obj)
2373 {
2374 if (self->proto < 3) {
2375 /* Older pickle protocols do not have an opcode for pickling bytes
2376 objects. Therefore, we need to fake the copy protocol (i.e.,
2377 the __reduce__ method) to permit bytes object unpickling.
2378
2379 Here we use a hack to be compatible with Python 2. Since in Python
2380 2 'bytes' is just an alias for 'str' (which has different
2381 parameters than the actual bytes object), we use codecs.encode
2382 to create the appropriate 'str' object when unpickled using
2383 Python 2 *and* the appropriate 'bytes' object when unpickled
2384 using Python 3. Again this is a hack and we don't need to do this
2385 with newer protocols. */
2386 PyObject *reduce_value;
2387 int status;
2388
2389 if (PyBytes_GET_SIZE(obj) == 0) {
2390 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2391 }
2392 else {
2393 PickleState *st = _Pickle_GetGlobalState();
2394 PyObject *unicode_str =
2395 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2396 PyBytes_GET_SIZE(obj),
2397 "strict");
2398 _Py_IDENTIFIER(latin1);
2399
2400 if (unicode_str == NULL)
2401 return -1;
2402 reduce_value = Py_BuildValue("(O(OO))",
2403 st->codecs_encode, unicode_str,
2404 _PyUnicode_FromId(&PyId_latin1));
2405 Py_DECREF(unicode_str);
2406 }
2407
2408 if (reduce_value == NULL)
2409 return -1;
2410
2411 /* save_reduce() will memoize the object automatically. */
2412 status = save_reduce(self, reduce_value, obj);
2413 Py_DECREF(reduce_value);
2414 return status;
2415 }
2416 else {
2417 return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
2418 PyBytes_GET_SIZE(obj));
2419 }
2420 }
2421
2422 static int
_save_bytearray_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2423 _save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
2424 Py_ssize_t size)
2425 {
2426 assert(self->proto >= 5);
2427
2428 char header[9];
2429 Py_ssize_t len;
2430
2431 if (size < 0)
2432 return -1;
2433
2434 header[0] = BYTEARRAY8;
2435 _write_size64(header + 1, size);
2436 len = 9;
2437
2438 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2439 return -1;
2440 }
2441
2442 if (memo_put(self, obj) < 0) {
2443 return -1;
2444 }
2445
2446 return 0;
2447 }
2448
2449 static int
save_bytearray(PicklerObject * self,PyObject * obj)2450 save_bytearray(PicklerObject *self, PyObject *obj)
2451 {
2452 if (self->proto < 5) {
2453 /* Older pickle protocols do not have an opcode for pickling
2454 * bytearrays. */
2455 PyObject *reduce_value = NULL;
2456 int status;
2457
2458 if (PyByteArray_GET_SIZE(obj) == 0) {
2459 reduce_value = Py_BuildValue("(O())",
2460 (PyObject *) &PyByteArray_Type);
2461 }
2462 else {
2463 PyObject *bytes_obj = PyBytes_FromObject(obj);
2464 if (bytes_obj != NULL) {
2465 reduce_value = Py_BuildValue("(O(O))",
2466 (PyObject *) &PyByteArray_Type,
2467 bytes_obj);
2468 Py_DECREF(bytes_obj);
2469 }
2470 }
2471 if (reduce_value == NULL)
2472 return -1;
2473
2474 /* save_reduce() will memoize the object automatically. */
2475 status = save_reduce(self, reduce_value, obj);
2476 Py_DECREF(reduce_value);
2477 return status;
2478 }
2479 else {
2480 return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
2481 PyByteArray_GET_SIZE(obj));
2482 }
2483 }
2484
2485 static int
save_picklebuffer(PicklerObject * self,PyObject * obj)2486 save_picklebuffer(PicklerObject *self, PyObject *obj)
2487 {
2488 if (self->proto < 5) {
2489 PickleState *st = _Pickle_GetGlobalState();
2490 PyErr_SetString(st->PicklingError,
2491 "PickleBuffer can only pickled with protocol >= 5");
2492 return -1;
2493 }
2494 const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2495 if (view == NULL) {
2496 return -1;
2497 }
2498 if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2499 PickleState *st = _Pickle_GetGlobalState();
2500 PyErr_SetString(st->PicklingError,
2501 "PickleBuffer can not be pickled when "
2502 "pointing to a non-contiguous buffer");
2503 return -1;
2504 }
2505 int in_band = 1;
2506 if (self->buffer_callback != NULL) {
2507 PyObject *ret = PyObject_CallFunctionObjArgs(self->buffer_callback,
2508 obj, NULL);
2509 if (ret == NULL) {
2510 return -1;
2511 }
2512 in_band = PyObject_IsTrue(ret);
2513 Py_DECREF(ret);
2514 if (in_band == -1) {
2515 return -1;
2516 }
2517 }
2518 if (in_band) {
2519 /* Write data in-band */
2520 if (view->readonly) {
2521 return _save_bytes_data(self, obj, (const char*) view->buf,
2522 view->len);
2523 }
2524 else {
2525 return _save_bytearray_data(self, obj, (const char*) view->buf,
2526 view->len);
2527 }
2528 }
2529 else {
2530 /* Write data out-of-band */
2531 const char next_buffer_op = NEXT_BUFFER;
2532 if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2533 return -1;
2534 }
2535 if (view->readonly) {
2536 const char readonly_buffer_op = READONLY_BUFFER;
2537 if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2538 return -1;
2539 }
2540 }
2541 }
2542 return 0;
2543 }
2544
2545 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2546 backslash and newline characters to \uXXXX escapes. */
2547 static PyObject *
raw_unicode_escape(PyObject * obj)2548 raw_unicode_escape(PyObject *obj)
2549 {
2550 char *p;
2551 Py_ssize_t i, size;
2552 void *data;
2553 unsigned int kind;
2554 _PyBytesWriter writer;
2555
2556 if (PyUnicode_READY(obj))
2557 return NULL;
2558
2559 _PyBytesWriter_Init(&writer);
2560
2561 size = PyUnicode_GET_LENGTH(obj);
2562 data = PyUnicode_DATA(obj);
2563 kind = PyUnicode_KIND(obj);
2564
2565 p = _PyBytesWriter_Alloc(&writer, size);
2566 if (p == NULL)
2567 goto error;
2568 writer.overallocate = 1;
2569
2570 for (i=0; i < size; i++) {
2571 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2572 /* Map 32-bit characters to '\Uxxxxxxxx' */
2573 if (ch >= 0x10000) {
2574 /* -1: subtract 1 preallocated byte */
2575 p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2576 if (p == NULL)
2577 goto error;
2578
2579 *p++ = '\\';
2580 *p++ = 'U';
2581 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2582 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2583 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2584 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2585 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2586 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2587 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2588 *p++ = Py_hexdigits[ch & 15];
2589 }
2590 /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2591 else if (ch >= 256 ||
2592 ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2593 ch == 0x1a)
2594 {
2595 /* -1: subtract 1 preallocated byte */
2596 p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2597 if (p == NULL)
2598 goto error;
2599
2600 *p++ = '\\';
2601 *p++ = 'u';
2602 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2603 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2604 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2605 *p++ = Py_hexdigits[ch & 15];
2606 }
2607 /* Copy everything else as-is */
2608 else
2609 *p++ = (char) ch;
2610 }
2611
2612 return _PyBytesWriter_Finish(&writer, p);
2613
2614 error:
2615 _PyBytesWriter_Dealloc(&writer);
2616 return NULL;
2617 }
2618
2619 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2620 write_unicode_binary(PicklerObject *self, PyObject *obj)
2621 {
2622 char header[9];
2623 Py_ssize_t len;
2624 PyObject *encoded = NULL;
2625 Py_ssize_t size;
2626 const char *data;
2627
2628 if (PyUnicode_READY(obj))
2629 return -1;
2630
2631 data = PyUnicode_AsUTF8AndSize(obj, &size);
2632 if (data == NULL) {
2633 /* Issue #8383: for strings with lone surrogates, fallback on the
2634 "surrogatepass" error handler. */
2635 PyErr_Clear();
2636 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2637 if (encoded == NULL)
2638 return -1;
2639
2640 data = PyBytes_AS_STRING(encoded);
2641 size = PyBytes_GET_SIZE(encoded);
2642 }
2643
2644 assert(size >= 0);
2645 if (size <= 0xff && self->proto >= 4) {
2646 header[0] = SHORT_BINUNICODE;
2647 header[1] = (unsigned char)(size & 0xff);
2648 len = 2;
2649 }
2650 else if ((size_t)size <= 0xffffffffUL) {
2651 header[0] = BINUNICODE;
2652 header[1] = (unsigned char)(size & 0xff);
2653 header[2] = (unsigned char)((size >> 8) & 0xff);
2654 header[3] = (unsigned char)((size >> 16) & 0xff);
2655 header[4] = (unsigned char)((size >> 24) & 0xff);
2656 len = 5;
2657 }
2658 else if (self->proto >= 4) {
2659 header[0] = BINUNICODE8;
2660 _write_size64(header + 1, size);
2661 len = 9;
2662 }
2663 else {
2664 PyErr_SetString(PyExc_OverflowError,
2665 "serializing a string larger than 4 GiB "
2666 "requires pickle protocol 4 or higher");
2667 Py_XDECREF(encoded);
2668 return -1;
2669 }
2670
2671 if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2672 Py_XDECREF(encoded);
2673 return -1;
2674 }
2675 Py_XDECREF(encoded);
2676 return 0;
2677 }
2678
2679 static int
save_unicode(PicklerObject * self,PyObject * obj)2680 save_unicode(PicklerObject *self, PyObject *obj)
2681 {
2682 if (self->bin) {
2683 if (write_unicode_binary(self, obj) < 0)
2684 return -1;
2685 }
2686 else {
2687 PyObject *encoded;
2688 Py_ssize_t size;
2689 const char unicode_op = UNICODE;
2690
2691 encoded = raw_unicode_escape(obj);
2692 if (encoded == NULL)
2693 return -1;
2694
2695 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2696 Py_DECREF(encoded);
2697 return -1;
2698 }
2699
2700 size = PyBytes_GET_SIZE(encoded);
2701 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2702 Py_DECREF(encoded);
2703 return -1;
2704 }
2705 Py_DECREF(encoded);
2706
2707 if (_Pickler_Write(self, "\n", 1) < 0)
2708 return -1;
2709 }
2710 if (memo_put(self, obj) < 0)
2711 return -1;
2712
2713 return 0;
2714 }
2715
2716 /* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2717 static int
store_tuple_elements(PicklerObject * self,PyObject * t,Py_ssize_t len)2718 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2719 {
2720 Py_ssize_t i;
2721
2722 assert(PyTuple_Size(t) == len);
2723
2724 for (i = 0; i < len; i++) {
2725 PyObject *element = PyTuple_GET_ITEM(t, i);
2726
2727 if (element == NULL)
2728 return -1;
2729 if (save(self, element, 0) < 0)
2730 return -1;
2731 }
2732
2733 return 0;
2734 }
2735
2736 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2737 * used across protocols to minimize the space needed to pickle them.
2738 * Tuples are also the only builtin immutable type that can be recursive
2739 * (a tuple can be reached from itself), and that requires some subtle
2740 * magic so that it works in all cases. IOW, this is a long routine.
2741 */
2742 static int
save_tuple(PicklerObject * self,PyObject * obj)2743 save_tuple(PicklerObject *self, PyObject *obj)
2744 {
2745 Py_ssize_t len, i;
2746
2747 const char mark_op = MARK;
2748 const char tuple_op = TUPLE;
2749 const char pop_op = POP;
2750 const char pop_mark_op = POP_MARK;
2751 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2752
2753 if ((len = PyTuple_Size(obj)) < 0)
2754 return -1;
2755
2756 if (len == 0) {
2757 char pdata[2];
2758
2759 if (self->proto) {
2760 pdata[0] = EMPTY_TUPLE;
2761 len = 1;
2762 }
2763 else {
2764 pdata[0] = MARK;
2765 pdata[1] = TUPLE;
2766 len = 2;
2767 }
2768 if (_Pickler_Write(self, pdata, len) < 0)
2769 return -1;
2770 return 0;
2771 }
2772
2773 /* The tuple isn't in the memo now. If it shows up there after
2774 * saving the tuple elements, the tuple must be recursive, in
2775 * which case we'll pop everything we put on the stack, and fetch
2776 * its value from the memo.
2777 */
2778 if (len <= 3 && self->proto >= 2) {
2779 /* Use TUPLE{1,2,3} opcodes. */
2780 if (store_tuple_elements(self, obj, len) < 0)
2781 return -1;
2782
2783 if (PyMemoTable_Get(self->memo, obj)) {
2784 /* pop the len elements */
2785 for (i = 0; i < len; i++)
2786 if (_Pickler_Write(self, &pop_op, 1) < 0)
2787 return -1;
2788 /* fetch from memo */
2789 if (memo_get(self, obj) < 0)
2790 return -1;
2791
2792 return 0;
2793 }
2794 else { /* Not recursive. */
2795 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2796 return -1;
2797 }
2798 goto memoize;
2799 }
2800
2801 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2802 * Generate MARK e1 e2 ... TUPLE
2803 */
2804 if (_Pickler_Write(self, &mark_op, 1) < 0)
2805 return -1;
2806
2807 if (store_tuple_elements(self, obj, len) < 0)
2808 return -1;
2809
2810 if (PyMemoTable_Get(self->memo, obj)) {
2811 /* pop the stack stuff we pushed */
2812 if (self->bin) {
2813 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2814 return -1;
2815 }
2816 else {
2817 /* Note that we pop one more than len, to remove
2818 * the MARK too.
2819 */
2820 for (i = 0; i <= len; i++)
2821 if (_Pickler_Write(self, &pop_op, 1) < 0)
2822 return -1;
2823 }
2824 /* fetch from memo */
2825 if (memo_get(self, obj) < 0)
2826 return -1;
2827
2828 return 0;
2829 }
2830 else { /* Not recursive. */
2831 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2832 return -1;
2833 }
2834
2835 memoize:
2836 if (memo_put(self, obj) < 0)
2837 return -1;
2838
2839 return 0;
2840 }
2841
2842 /* iter is an iterator giving items, and we batch up chunks of
2843 * MARK item item ... item APPENDS
2844 * opcode sequences. Calling code should have arranged to first create an
2845 * empty list, or list-like object, for the APPENDS to operate on.
2846 * Returns 0 on success, <0 on error.
2847 */
2848 static int
batch_list(PicklerObject * self,PyObject * iter)2849 batch_list(PicklerObject *self, PyObject *iter)
2850 {
2851 PyObject *obj = NULL;
2852 PyObject *firstitem = NULL;
2853 int i, n;
2854
2855 const char mark_op = MARK;
2856 const char append_op = APPEND;
2857 const char appends_op = APPENDS;
2858
2859 assert(iter != NULL);
2860
2861 /* XXX: I think this function could be made faster by avoiding the
2862 iterator interface and fetching objects directly from list using
2863 PyList_GET_ITEM.
2864 */
2865
2866 if (self->proto == 0) {
2867 /* APPENDS isn't available; do one at a time. */
2868 for (;;) {
2869 obj = PyIter_Next(iter);
2870 if (obj == NULL) {
2871 if (PyErr_Occurred())
2872 return -1;
2873 break;
2874 }
2875 i = save(self, obj, 0);
2876 Py_DECREF(obj);
2877 if (i < 0)
2878 return -1;
2879 if (_Pickler_Write(self, &append_op, 1) < 0)
2880 return -1;
2881 }
2882 return 0;
2883 }
2884
2885 /* proto > 0: write in batches of BATCHSIZE. */
2886 do {
2887 /* Get first item */
2888 firstitem = PyIter_Next(iter);
2889 if (firstitem == NULL) {
2890 if (PyErr_Occurred())
2891 goto error;
2892
2893 /* nothing more to add */
2894 break;
2895 }
2896
2897 /* Try to get a second item */
2898 obj = PyIter_Next(iter);
2899 if (obj == NULL) {
2900 if (PyErr_Occurred())
2901 goto error;
2902
2903 /* Only one item to write */
2904 if (save(self, firstitem, 0) < 0)
2905 goto error;
2906 if (_Pickler_Write(self, &append_op, 1) < 0)
2907 goto error;
2908 Py_CLEAR(firstitem);
2909 break;
2910 }
2911
2912 /* More than one item to write */
2913
2914 /* Pump out MARK, items, APPENDS. */
2915 if (_Pickler_Write(self, &mark_op, 1) < 0)
2916 goto error;
2917
2918 if (save(self, firstitem, 0) < 0)
2919 goto error;
2920 Py_CLEAR(firstitem);
2921 n = 1;
2922
2923 /* Fetch and save up to BATCHSIZE items */
2924 while (obj) {
2925 if (save(self, obj, 0) < 0)
2926 goto error;
2927 Py_CLEAR(obj);
2928 n += 1;
2929
2930 if (n == BATCHSIZE)
2931 break;
2932
2933 obj = PyIter_Next(iter);
2934 if (obj == NULL) {
2935 if (PyErr_Occurred())
2936 goto error;
2937 break;
2938 }
2939 }
2940
2941 if (_Pickler_Write(self, &appends_op, 1) < 0)
2942 goto error;
2943
2944 } while (n == BATCHSIZE);
2945 return 0;
2946
2947 error:
2948 Py_XDECREF(firstitem);
2949 Py_XDECREF(obj);
2950 return -1;
2951 }
2952
2953 /* This is a variant of batch_list() above, specialized for lists (with no
2954 * support for list subclasses). Like batch_list(), we batch up chunks of
2955 * MARK item item ... item APPENDS
2956 * opcode sequences. Calling code should have arranged to first create an
2957 * empty list, or list-like object, for the APPENDS to operate on.
2958 * Returns 0 on success, -1 on error.
2959 *
2960 * This version is considerably faster than batch_list(), if less general.
2961 *
2962 * Note that this only works for protocols > 0.
2963 */
2964 static int
batch_list_exact(PicklerObject * self,PyObject * obj)2965 batch_list_exact(PicklerObject *self, PyObject *obj)
2966 {
2967 PyObject *item = NULL;
2968 Py_ssize_t this_batch, total;
2969
2970 const char append_op = APPEND;
2971 const char appends_op = APPENDS;
2972 const char mark_op = MARK;
2973
2974 assert(obj != NULL);
2975 assert(self->proto > 0);
2976 assert(PyList_CheckExact(obj));
2977
2978 if (PyList_GET_SIZE(obj) == 1) {
2979 item = PyList_GET_ITEM(obj, 0);
2980 if (save(self, item, 0) < 0)
2981 return -1;
2982 if (_Pickler_Write(self, &append_op, 1) < 0)
2983 return -1;
2984 return 0;
2985 }
2986
2987 /* Write in batches of BATCHSIZE. */
2988 total = 0;
2989 do {
2990 this_batch = 0;
2991 if (_Pickler_Write(self, &mark_op, 1) < 0)
2992 return -1;
2993 while (total < PyList_GET_SIZE(obj)) {
2994 item = PyList_GET_ITEM(obj, total);
2995 if (save(self, item, 0) < 0)
2996 return -1;
2997 total++;
2998 if (++this_batch == BATCHSIZE)
2999 break;
3000 }
3001 if (_Pickler_Write(self, &appends_op, 1) < 0)
3002 return -1;
3003
3004 } while (total < PyList_GET_SIZE(obj));
3005
3006 return 0;
3007 }
3008
3009 static int
save_list(PicklerObject * self,PyObject * obj)3010 save_list(PicklerObject *self, PyObject *obj)
3011 {
3012 char header[3];
3013 Py_ssize_t len;
3014 int status = 0;
3015
3016 if (self->fast && !fast_save_enter(self, obj))
3017 goto error;
3018
3019 /* Create an empty list. */
3020 if (self->bin) {
3021 header[0] = EMPTY_LIST;
3022 len = 1;
3023 }
3024 else {
3025 header[0] = MARK;
3026 header[1] = LIST;
3027 len = 2;
3028 }
3029
3030 if (_Pickler_Write(self, header, len) < 0)
3031 goto error;
3032
3033 /* Get list length, and bow out early if empty. */
3034 if ((len = PyList_Size(obj)) < 0)
3035 goto error;
3036
3037 if (memo_put(self, obj) < 0)
3038 goto error;
3039
3040 if (len != 0) {
3041 /* Materialize the list elements. */
3042 if (PyList_CheckExact(obj) && self->proto > 0) {
3043 if (Py_EnterRecursiveCall(" while pickling an object"))
3044 goto error;
3045 status = batch_list_exact(self, obj);
3046 Py_LeaveRecursiveCall();
3047 } else {
3048 PyObject *iter = PyObject_GetIter(obj);
3049 if (iter == NULL)
3050 goto error;
3051
3052 if (Py_EnterRecursiveCall(" while pickling an object")) {
3053 Py_DECREF(iter);
3054 goto error;
3055 }
3056 status = batch_list(self, iter);
3057 Py_LeaveRecursiveCall();
3058 Py_DECREF(iter);
3059 }
3060 }
3061 if (0) {
3062 error:
3063 status = -1;
3064 }
3065
3066 if (self->fast && !fast_save_leave(self, obj))
3067 status = -1;
3068
3069 return status;
3070 }
3071
3072 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3073 * MARK key value ... key value SETITEMS
3074 * opcode sequences. Calling code should have arranged to first create an
3075 * empty dict, or dict-like object, for the SETITEMS to operate on.
3076 * Returns 0 on success, <0 on error.
3077 *
3078 * This is very much like batch_list(). The difference between saving
3079 * elements directly, and picking apart two-tuples, is so long-winded at
3080 * the C level, though, that attempts to combine these routines were too
3081 * ugly to bear.
3082 */
3083 static int
batch_dict(PicklerObject * self,PyObject * iter)3084 batch_dict(PicklerObject *self, PyObject *iter)
3085 {
3086 PyObject *obj = NULL;
3087 PyObject *firstitem = NULL;
3088 int i, n;
3089
3090 const char mark_op = MARK;
3091 const char setitem_op = SETITEM;
3092 const char setitems_op = SETITEMS;
3093
3094 assert(iter != NULL);
3095
3096 if (self->proto == 0) {
3097 /* SETITEMS isn't available; do one at a time. */
3098 for (;;) {
3099 obj = PyIter_Next(iter);
3100 if (obj == NULL) {
3101 if (PyErr_Occurred())
3102 return -1;
3103 break;
3104 }
3105 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3106 PyErr_SetString(PyExc_TypeError, "dict items "
3107 "iterator must return 2-tuples");
3108 return -1;
3109 }
3110 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
3111 if (i >= 0)
3112 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
3113 Py_DECREF(obj);
3114 if (i < 0)
3115 return -1;
3116 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3117 return -1;
3118 }
3119 return 0;
3120 }
3121
3122 /* proto > 0: write in batches of BATCHSIZE. */
3123 do {
3124 /* Get first item */
3125 firstitem = PyIter_Next(iter);
3126 if (firstitem == NULL) {
3127 if (PyErr_Occurred())
3128 goto error;
3129
3130 /* nothing more to add */
3131 break;
3132 }
3133 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3134 PyErr_SetString(PyExc_TypeError, "dict items "
3135 "iterator must return 2-tuples");
3136 goto error;
3137 }
3138
3139 /* Try to get a second item */
3140 obj = PyIter_Next(iter);
3141 if (obj == NULL) {
3142 if (PyErr_Occurred())
3143 goto error;
3144
3145 /* Only one item to write */
3146 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3147 goto error;
3148 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3149 goto error;
3150 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3151 goto error;
3152 Py_CLEAR(firstitem);
3153 break;
3154 }
3155
3156 /* More than one item to write */
3157
3158 /* Pump out MARK, items, SETITEMS. */
3159 if (_Pickler_Write(self, &mark_op, 1) < 0)
3160 goto error;
3161
3162 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3163 goto error;
3164 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3165 goto error;
3166 Py_CLEAR(firstitem);
3167 n = 1;
3168
3169 /* Fetch and save up to BATCHSIZE items */
3170 while (obj) {
3171 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3172 PyErr_SetString(PyExc_TypeError, "dict items "
3173 "iterator must return 2-tuples");
3174 goto error;
3175 }
3176 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3177 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3178 goto error;
3179 Py_CLEAR(obj);
3180 n += 1;
3181
3182 if (n == BATCHSIZE)
3183 break;
3184
3185 obj = PyIter_Next(iter);
3186 if (obj == NULL) {
3187 if (PyErr_Occurred())
3188 goto error;
3189 break;
3190 }
3191 }
3192
3193 if (_Pickler_Write(self, &setitems_op, 1) < 0)
3194 goto error;
3195
3196 } while (n == BATCHSIZE);
3197 return 0;
3198
3199 error:
3200 Py_XDECREF(firstitem);
3201 Py_XDECREF(obj);
3202 return -1;
3203 }
3204
3205 /* This is a variant of batch_dict() above that specializes for dicts, with no
3206 * support for dict subclasses. Like batch_dict(), we batch up chunks of
3207 * MARK key value ... key value SETITEMS
3208 * opcode sequences. Calling code should have arranged to first create an
3209 * empty dict, or dict-like object, for the SETITEMS to operate on.
3210 * Returns 0 on success, -1 on error.
3211 *
3212 * Note that this currently doesn't work for protocol 0.
3213 */
3214 static int
batch_dict_exact(PicklerObject * self,PyObject * obj)3215 batch_dict_exact(PicklerObject *self, PyObject *obj)
3216 {
3217 PyObject *key = NULL, *value = NULL;
3218 int i;
3219 Py_ssize_t dict_size, ppos = 0;
3220
3221 const char mark_op = MARK;
3222 const char setitem_op = SETITEM;
3223 const char setitems_op = SETITEMS;
3224
3225 assert(obj != NULL && PyDict_CheckExact(obj));
3226 assert(self->proto > 0);
3227
3228 dict_size = PyDict_GET_SIZE(obj);
3229
3230 /* Special-case len(d) == 1 to save space. */
3231 if (dict_size == 1) {
3232 PyDict_Next(obj, &ppos, &key, &value);
3233 if (save(self, key, 0) < 0)
3234 return -1;
3235 if (save(self, value, 0) < 0)
3236 return -1;
3237 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3238 return -1;
3239 return 0;
3240 }
3241
3242 /* Write in batches of BATCHSIZE. */
3243 do {
3244 i = 0;
3245 if (_Pickler_Write(self, &mark_op, 1) < 0)
3246 return -1;
3247 while (PyDict_Next(obj, &ppos, &key, &value)) {
3248 if (save(self, key, 0) < 0)
3249 return -1;
3250 if (save(self, value, 0) < 0)
3251 return -1;
3252 if (++i == BATCHSIZE)
3253 break;
3254 }
3255 if (_Pickler_Write(self, &setitems_op, 1) < 0)
3256 return -1;
3257 if (PyDict_GET_SIZE(obj) != dict_size) {
3258 PyErr_Format(
3259 PyExc_RuntimeError,
3260 "dictionary changed size during iteration");
3261 return -1;
3262 }
3263
3264 } while (i == BATCHSIZE);
3265 return 0;
3266 }
3267
3268 static int
save_dict(PicklerObject * self,PyObject * obj)3269 save_dict(PicklerObject *self, PyObject *obj)
3270 {
3271 PyObject *items, *iter;
3272 char header[3];
3273 Py_ssize_t len;
3274 int status = 0;
3275 assert(PyDict_Check(obj));
3276
3277 if (self->fast && !fast_save_enter(self, obj))
3278 goto error;
3279
3280 /* Create an empty dict. */
3281 if (self->bin) {
3282 header[0] = EMPTY_DICT;
3283 len = 1;
3284 }
3285 else {
3286 header[0] = MARK;
3287 header[1] = DICT;
3288 len = 2;
3289 }
3290
3291 if (_Pickler_Write(self, header, len) < 0)
3292 goto error;
3293
3294 if (memo_put(self, obj) < 0)
3295 goto error;
3296
3297 if (PyDict_GET_SIZE(obj)) {
3298 /* Save the dict items. */
3299 if (PyDict_CheckExact(obj) && self->proto > 0) {
3300 /* We can take certain shortcuts if we know this is a dict and
3301 not a dict subclass. */
3302 if (Py_EnterRecursiveCall(" while pickling an object"))
3303 goto error;
3304 status = batch_dict_exact(self, obj);
3305 Py_LeaveRecursiveCall();
3306 } else {
3307 _Py_IDENTIFIER(items);
3308
3309 items = _PyObject_CallMethodId(obj, &PyId_items, NULL);
3310 if (items == NULL)
3311 goto error;
3312 iter = PyObject_GetIter(items);
3313 Py_DECREF(items);
3314 if (iter == NULL)
3315 goto error;
3316 if (Py_EnterRecursiveCall(" while pickling an object")) {
3317 Py_DECREF(iter);
3318 goto error;
3319 }
3320 status = batch_dict(self, iter);
3321 Py_LeaveRecursiveCall();
3322 Py_DECREF(iter);
3323 }
3324 }
3325
3326 if (0) {
3327 error:
3328 status = -1;
3329 }
3330
3331 if (self->fast && !fast_save_leave(self, obj))
3332 status = -1;
3333
3334 return status;
3335 }
3336
3337 static int
save_set(PicklerObject * self,PyObject * obj)3338 save_set(PicklerObject *self, PyObject *obj)
3339 {
3340 PyObject *item;
3341 int i;
3342 Py_ssize_t set_size, ppos = 0;
3343 Py_hash_t hash;
3344
3345 const char empty_set_op = EMPTY_SET;
3346 const char mark_op = MARK;
3347 const char additems_op = ADDITEMS;
3348
3349 if (self->proto < 4) {
3350 PyObject *items;
3351 PyObject *reduce_value;
3352 int status;
3353
3354 items = PySequence_List(obj);
3355 if (items == NULL) {
3356 return -1;
3357 }
3358 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3359 Py_DECREF(items);
3360 if (reduce_value == NULL) {
3361 return -1;
3362 }
3363 /* save_reduce() will memoize the object automatically. */
3364 status = save_reduce(self, reduce_value, obj);
3365 Py_DECREF(reduce_value);
3366 return status;
3367 }
3368
3369 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3370 return -1;
3371
3372 if (memo_put(self, obj) < 0)
3373 return -1;
3374
3375 set_size = PySet_GET_SIZE(obj);
3376 if (set_size == 0)
3377 return 0; /* nothing to do */
3378
3379 /* Write in batches of BATCHSIZE. */
3380 do {
3381 i = 0;
3382 if (_Pickler_Write(self, &mark_op, 1) < 0)
3383 return -1;
3384 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3385 if (save(self, item, 0) < 0)
3386 return -1;
3387 if (++i == BATCHSIZE)
3388 break;
3389 }
3390 if (_Pickler_Write(self, &additems_op, 1) < 0)
3391 return -1;
3392 if (PySet_GET_SIZE(obj) != set_size) {
3393 PyErr_Format(
3394 PyExc_RuntimeError,
3395 "set changed size during iteration");
3396 return -1;
3397 }
3398 } while (i == BATCHSIZE);
3399
3400 return 0;
3401 }
3402
3403 static int
save_frozenset(PicklerObject * self,PyObject * obj)3404 save_frozenset(PicklerObject *self, PyObject *obj)
3405 {
3406 PyObject *iter;
3407
3408 const char mark_op = MARK;
3409 const char frozenset_op = FROZENSET;
3410
3411 if (self->fast && !fast_save_enter(self, obj))
3412 return -1;
3413
3414 if (self->proto < 4) {
3415 PyObject *items;
3416 PyObject *reduce_value;
3417 int status;
3418
3419 items = PySequence_List(obj);
3420 if (items == NULL) {
3421 return -1;
3422 }
3423 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3424 items);
3425 Py_DECREF(items);
3426 if (reduce_value == NULL) {
3427 return -1;
3428 }
3429 /* save_reduce() will memoize the object automatically. */
3430 status = save_reduce(self, reduce_value, obj);
3431 Py_DECREF(reduce_value);
3432 return status;
3433 }
3434
3435 if (_Pickler_Write(self, &mark_op, 1) < 0)
3436 return -1;
3437
3438 iter = PyObject_GetIter(obj);
3439 if (iter == NULL) {
3440 return -1;
3441 }
3442 for (;;) {
3443 PyObject *item;
3444
3445 item = PyIter_Next(iter);
3446 if (item == NULL) {
3447 if (PyErr_Occurred()) {
3448 Py_DECREF(iter);
3449 return -1;
3450 }
3451 break;
3452 }
3453 if (save(self, item, 0) < 0) {
3454 Py_DECREF(item);
3455 Py_DECREF(iter);
3456 return -1;
3457 }
3458 Py_DECREF(item);
3459 }
3460 Py_DECREF(iter);
3461
3462 /* If the object is already in the memo, this means it is
3463 recursive. In this case, throw away everything we put on the
3464 stack, and fetch the object back from the memo. */
3465 if (PyMemoTable_Get(self->memo, obj)) {
3466 const char pop_mark_op = POP_MARK;
3467
3468 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3469 return -1;
3470 if (memo_get(self, obj) < 0)
3471 return -1;
3472 return 0;
3473 }
3474
3475 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3476 return -1;
3477 if (memo_put(self, obj) < 0)
3478 return -1;
3479
3480 return 0;
3481 }
3482
3483 static int
fix_imports(PyObject ** module_name,PyObject ** global_name)3484 fix_imports(PyObject **module_name, PyObject **global_name)
3485 {
3486 PyObject *key;
3487 PyObject *item;
3488 PickleState *st = _Pickle_GetGlobalState();
3489
3490 key = PyTuple_Pack(2, *module_name, *global_name);
3491 if (key == NULL)
3492 return -1;
3493 item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3494 Py_DECREF(key);
3495 if (item) {
3496 PyObject *fixed_module_name;
3497 PyObject *fixed_global_name;
3498
3499 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3500 PyErr_Format(PyExc_RuntimeError,
3501 "_compat_pickle.REVERSE_NAME_MAPPING values "
3502 "should be 2-tuples, not %.200s",
3503 Py_TYPE(item)->tp_name);
3504 return -1;
3505 }
3506 fixed_module_name = PyTuple_GET_ITEM(item, 0);
3507 fixed_global_name = PyTuple_GET_ITEM(item, 1);
3508 if (!PyUnicode_Check(fixed_module_name) ||
3509 !PyUnicode_Check(fixed_global_name)) {
3510 PyErr_Format(PyExc_RuntimeError,
3511 "_compat_pickle.REVERSE_NAME_MAPPING values "
3512 "should be pairs of str, not (%.200s, %.200s)",
3513 Py_TYPE(fixed_module_name)->tp_name,
3514 Py_TYPE(fixed_global_name)->tp_name);
3515 return -1;
3516 }
3517
3518 Py_CLEAR(*module_name);
3519 Py_CLEAR(*global_name);
3520 Py_INCREF(fixed_module_name);
3521 Py_INCREF(fixed_global_name);
3522 *module_name = fixed_module_name;
3523 *global_name = fixed_global_name;
3524 return 0;
3525 }
3526 else if (PyErr_Occurred()) {
3527 return -1;
3528 }
3529
3530 item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3531 if (item) {
3532 if (!PyUnicode_Check(item)) {
3533 PyErr_Format(PyExc_RuntimeError,
3534 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3535 "should be strings, not %.200s",
3536 Py_TYPE(item)->tp_name);
3537 return -1;
3538 }
3539 Py_INCREF(item);
3540 Py_XSETREF(*module_name, item);
3541 }
3542 else if (PyErr_Occurred()) {
3543 return -1;
3544 }
3545
3546 return 0;
3547 }
3548
3549 static int
save_global(PicklerObject * self,PyObject * obj,PyObject * name)3550 save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3551 {
3552 PyObject *global_name = NULL;
3553 PyObject *module_name = NULL;
3554 PyObject *module = NULL;
3555 PyObject *parent = NULL;
3556 PyObject *dotted_path = NULL;
3557 PyObject *lastname = NULL;
3558 PyObject *cls;
3559 PickleState *st = _Pickle_GetGlobalState();
3560 int status = 0;
3561 _Py_IDENTIFIER(__name__);
3562 _Py_IDENTIFIER(__qualname__);
3563
3564 const char global_op = GLOBAL;
3565
3566 if (name) {
3567 Py_INCREF(name);
3568 global_name = name;
3569 }
3570 else {
3571 if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3572 goto error;
3573 if (global_name == NULL) {
3574 global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3575 if (global_name == NULL)
3576 goto error;
3577 }
3578 }
3579
3580 dotted_path = get_dotted_path(module, global_name);
3581 if (dotted_path == NULL)
3582 goto error;
3583 module_name = whichmodule(obj, dotted_path);
3584 if (module_name == NULL)
3585 goto error;
3586
3587 /* XXX: Change to use the import C API directly with level=0 to disallow
3588 relative imports.
3589
3590 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3591 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3592 custom import functions (IMHO, this would be a nice security
3593 feature). The import C API would need to be extended to support the
3594 extra parameters of __import__ to fix that. */
3595 module = PyImport_Import(module_name);
3596 if (module == NULL) {
3597 PyErr_Format(st->PicklingError,
3598 "Can't pickle %R: import of module %R failed",
3599 obj, module_name);
3600 goto error;
3601 }
3602 lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3603 Py_INCREF(lastname);
3604 cls = get_deep_attribute(module, dotted_path, &parent);
3605 Py_CLEAR(dotted_path);
3606 if (cls == NULL) {
3607 PyErr_Format(st->PicklingError,
3608 "Can't pickle %R: attribute lookup %S on %S failed",
3609 obj, global_name, module_name);
3610 goto error;
3611 }
3612 if (cls != obj) {
3613 Py_DECREF(cls);
3614 PyErr_Format(st->PicklingError,
3615 "Can't pickle %R: it's not the same object as %S.%S",
3616 obj, module_name, global_name);
3617 goto error;
3618 }
3619 Py_DECREF(cls);
3620
3621 if (self->proto >= 2) {
3622 /* See whether this is in the extension registry, and if
3623 * so generate an EXT opcode.
3624 */
3625 PyObject *extension_key;
3626 PyObject *code_obj; /* extension code as Python object */
3627 long code; /* extension code as C value */
3628 char pdata[5];
3629 Py_ssize_t n;
3630
3631 extension_key = PyTuple_Pack(2, module_name, global_name);
3632 if (extension_key == NULL) {
3633 goto error;
3634 }
3635 code_obj = PyDict_GetItemWithError(st->extension_registry,
3636 extension_key);
3637 Py_DECREF(extension_key);
3638 /* The object is not registered in the extension registry.
3639 This is the most likely code path. */
3640 if (code_obj == NULL) {
3641 if (PyErr_Occurred()) {
3642 goto error;
3643 }
3644 goto gen_global;
3645 }
3646
3647 /* XXX: pickle.py doesn't check neither the type, nor the range
3648 of the value returned by the extension_registry. It should for
3649 consistency. */
3650
3651 /* Verify code_obj has the right type and value. */
3652 if (!PyLong_Check(code_obj)) {
3653 PyErr_Format(st->PicklingError,
3654 "Can't pickle %R: extension code %R isn't an integer",
3655 obj, code_obj);
3656 goto error;
3657 }
3658 code = PyLong_AS_LONG(code_obj);
3659 if (code <= 0 || code > 0x7fffffffL) {
3660 if (!PyErr_Occurred())
3661 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3662 "code %ld is out of range", obj, code);
3663 goto error;
3664 }
3665
3666 /* Generate an EXT opcode. */
3667 if (code <= 0xff) {
3668 pdata[0] = EXT1;
3669 pdata[1] = (unsigned char)code;
3670 n = 2;
3671 }
3672 else if (code <= 0xffff) {
3673 pdata[0] = EXT2;
3674 pdata[1] = (unsigned char)(code & 0xff);
3675 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3676 n = 3;
3677 }
3678 else {
3679 pdata[0] = EXT4;
3680 pdata[1] = (unsigned char)(code & 0xff);
3681 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3682 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3683 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3684 n = 5;
3685 }
3686
3687 if (_Pickler_Write(self, pdata, n) < 0)
3688 goto error;
3689 }
3690 else {
3691 gen_global:
3692 if (parent == module) {
3693 Py_INCREF(lastname);
3694 Py_DECREF(global_name);
3695 global_name = lastname;
3696 }
3697 if (self->proto >= 4) {
3698 const char stack_global_op = STACK_GLOBAL;
3699
3700 if (save(self, module_name, 0) < 0)
3701 goto error;
3702 if (save(self, global_name, 0) < 0)
3703 goto error;
3704
3705 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3706 goto error;
3707 }
3708 else if (parent != module) {
3709 PickleState *st = _Pickle_GetGlobalState();
3710 PyObject *reduce_value = Py_BuildValue("(O(OO))",
3711 st->getattr, parent, lastname);
3712 if (reduce_value == NULL)
3713 goto error;
3714 status = save_reduce(self, reduce_value, NULL);
3715 Py_DECREF(reduce_value);
3716 if (status < 0)
3717 goto error;
3718 }
3719 else {
3720 /* Generate a normal global opcode if we are using a pickle
3721 protocol < 4, or if the object is not registered in the
3722 extension registry. */
3723 PyObject *encoded;
3724 PyObject *(*unicode_encoder)(PyObject *);
3725
3726 if (_Pickler_Write(self, &global_op, 1) < 0)
3727 goto error;
3728
3729 /* For protocol < 3 and if the user didn't request against doing
3730 so, we convert module names to the old 2.x module names. */
3731 if (self->proto < 3 && self->fix_imports) {
3732 if (fix_imports(&module_name, &global_name) < 0) {
3733 goto error;
3734 }
3735 }
3736
3737 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3738 both the module name and the global name using UTF-8. We do so
3739 only when we are using the pickle protocol newer than version
3740 3. This is to ensure compatibility with older Unpickler running
3741 on Python 2.x. */
3742 if (self->proto == 3) {
3743 unicode_encoder = PyUnicode_AsUTF8String;
3744 }
3745 else {
3746 unicode_encoder = PyUnicode_AsASCIIString;
3747 }
3748 encoded = unicode_encoder(module_name);
3749 if (encoded == NULL) {
3750 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3751 PyErr_Format(st->PicklingError,
3752 "can't pickle module identifier '%S' using "
3753 "pickle protocol %i",
3754 module_name, self->proto);
3755 goto error;
3756 }
3757 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3758 PyBytes_GET_SIZE(encoded)) < 0) {
3759 Py_DECREF(encoded);
3760 goto error;
3761 }
3762 Py_DECREF(encoded);
3763 if(_Pickler_Write(self, "\n", 1) < 0)
3764 goto error;
3765
3766 /* Save the name of the module. */
3767 encoded = unicode_encoder(global_name);
3768 if (encoded == NULL) {
3769 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3770 PyErr_Format(st->PicklingError,
3771 "can't pickle global identifier '%S' using "
3772 "pickle protocol %i",
3773 global_name, self->proto);
3774 goto error;
3775 }
3776 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3777 PyBytes_GET_SIZE(encoded)) < 0) {
3778 Py_DECREF(encoded);
3779 goto error;
3780 }
3781 Py_DECREF(encoded);
3782 if (_Pickler_Write(self, "\n", 1) < 0)
3783 goto error;
3784 }
3785 /* Memoize the object. */
3786 if (memo_put(self, obj) < 0)
3787 goto error;
3788 }
3789
3790 if (0) {
3791 error:
3792 status = -1;
3793 }
3794 Py_XDECREF(module_name);
3795 Py_XDECREF(global_name);
3796 Py_XDECREF(module);
3797 Py_XDECREF(parent);
3798 Py_XDECREF(dotted_path);
3799 Py_XDECREF(lastname);
3800
3801 return status;
3802 }
3803
3804 static int
save_singleton_type(PicklerObject * self,PyObject * obj,PyObject * singleton)3805 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3806 {
3807 PyObject *reduce_value;
3808 int status;
3809
3810 reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3811 if (reduce_value == NULL) {
3812 return -1;
3813 }
3814 status = save_reduce(self, reduce_value, obj);
3815 Py_DECREF(reduce_value);
3816 return status;
3817 }
3818
3819 static int
save_type(PicklerObject * self,PyObject * obj)3820 save_type(PicklerObject *self, PyObject *obj)
3821 {
3822 if (obj == (PyObject *)&_PyNone_Type) {
3823 return save_singleton_type(self, obj, Py_None);
3824 }
3825 else if (obj == (PyObject *)&PyEllipsis_Type) {
3826 return save_singleton_type(self, obj, Py_Ellipsis);
3827 }
3828 else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3829 return save_singleton_type(self, obj, Py_NotImplemented);
3830 }
3831 return save_global(self, obj, NULL);
3832 }
3833
3834 static int
save_pers(PicklerObject * self,PyObject * obj)3835 save_pers(PicklerObject *self, PyObject *obj)
3836 {
3837 PyObject *pid = NULL;
3838 int status = 0;
3839
3840 const char persid_op = PERSID;
3841 const char binpersid_op = BINPERSID;
3842
3843 pid = call_method(self->pers_func, self->pers_func_self, obj);
3844 if (pid == NULL)
3845 return -1;
3846
3847 if (pid != Py_None) {
3848 if (self->bin) {
3849 if (save(self, pid, 1) < 0 ||
3850 _Pickler_Write(self, &binpersid_op, 1) < 0)
3851 goto error;
3852 }
3853 else {
3854 PyObject *pid_str;
3855
3856 pid_str = PyObject_Str(pid);
3857 if (pid_str == NULL)
3858 goto error;
3859
3860 /* XXX: Should it check whether the pid contains embedded
3861 newlines? */
3862 if (!PyUnicode_IS_ASCII(pid_str)) {
3863 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3864 "persistent IDs in protocol 0 must be "
3865 "ASCII strings");
3866 Py_DECREF(pid_str);
3867 goto error;
3868 }
3869
3870 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3871 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3872 PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3873 _Pickler_Write(self, "\n", 1) < 0) {
3874 Py_DECREF(pid_str);
3875 goto error;
3876 }
3877 Py_DECREF(pid_str);
3878 }
3879 status = 1;
3880 }
3881
3882 if (0) {
3883 error:
3884 status = -1;
3885 }
3886 Py_XDECREF(pid);
3887
3888 return status;
3889 }
3890
3891 static PyObject *
get_class(PyObject * obj)3892 get_class(PyObject *obj)
3893 {
3894 PyObject *cls;
3895 _Py_IDENTIFIER(__class__);
3896
3897 if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3898 cls = (PyObject *) Py_TYPE(obj);
3899 Py_INCREF(cls);
3900 }
3901 return cls;
3902 }
3903
3904 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3905 * appropriate __reduce__ method for obj.
3906 */
3907 static int
save_reduce(PicklerObject * self,PyObject * args,PyObject * obj)3908 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3909 {
3910 PyObject *callable;
3911 PyObject *argtup;
3912 PyObject *state = NULL;
3913 PyObject *listitems = Py_None;
3914 PyObject *dictitems = Py_None;
3915 PyObject *state_setter = Py_None;
3916 PickleState *st = _Pickle_GetGlobalState();
3917 Py_ssize_t size;
3918 int use_newobj = 0, use_newobj_ex = 0;
3919
3920 const char reduce_op = REDUCE;
3921 const char build_op = BUILD;
3922 const char newobj_op = NEWOBJ;
3923 const char newobj_ex_op = NEWOBJ_EX;
3924
3925 size = PyTuple_Size(args);
3926 if (size < 2 || size > 6) {
3927 PyErr_SetString(st->PicklingError, "tuple returned by "
3928 "__reduce__ must contain 2 through 6 elements");
3929 return -1;
3930 }
3931
3932 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3933 &callable, &argtup, &state, &listitems, &dictitems,
3934 &state_setter))
3935 return -1;
3936
3937 if (!PyCallable_Check(callable)) {
3938 PyErr_SetString(st->PicklingError, "first item of the tuple "
3939 "returned by __reduce__ must be callable");
3940 return -1;
3941 }
3942 if (!PyTuple_Check(argtup)) {
3943 PyErr_SetString(st->PicklingError, "second item of the tuple "
3944 "returned by __reduce__ must be a tuple");
3945 return -1;
3946 }
3947
3948 if (state == Py_None)
3949 state = NULL;
3950
3951 if (listitems == Py_None)
3952 listitems = NULL;
3953 else if (!PyIter_Check(listitems)) {
3954 PyErr_Format(st->PicklingError, "fourth element of the tuple "
3955 "returned by __reduce__ must be an iterator, not %s",
3956 Py_TYPE(listitems)->tp_name);
3957 return -1;
3958 }
3959
3960 if (dictitems == Py_None)
3961 dictitems = NULL;
3962 else if (!PyIter_Check(dictitems)) {
3963 PyErr_Format(st->PicklingError, "fifth element of the tuple "
3964 "returned by __reduce__ must be an iterator, not %s",
3965 Py_TYPE(dictitems)->tp_name);
3966 return -1;
3967 }
3968
3969 if (state_setter == Py_None)
3970 state_setter = NULL;
3971 else if (!PyCallable_Check(state_setter)) {
3972 PyErr_Format(st->PicklingError, "sixth element of the tuple "
3973 "returned by __reduce__ must be a function, not %s",
3974 Py_TYPE(state_setter)->tp_name);
3975 return -1;
3976 }
3977
3978 if (self->proto >= 2) {
3979 PyObject *name;
3980 _Py_IDENTIFIER(__name__);
3981
3982 if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
3983 return -1;
3984 }
3985 if (name != NULL && PyUnicode_Check(name)) {
3986 _Py_IDENTIFIER(__newobj_ex__);
3987 use_newobj_ex = _PyUnicode_EqualToASCIIId(
3988 name, &PyId___newobj_ex__);
3989 if (!use_newobj_ex) {
3990 _Py_IDENTIFIER(__newobj__);
3991 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
3992 }
3993 }
3994 Py_XDECREF(name);
3995 }
3996
3997 if (use_newobj_ex) {
3998 PyObject *cls;
3999 PyObject *args;
4000 PyObject *kwargs;
4001
4002 if (PyTuple_GET_SIZE(argtup) != 3) {
4003 PyErr_Format(st->PicklingError,
4004 "length of the NEWOBJ_EX argument tuple must be "
4005 "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
4006 return -1;
4007 }
4008
4009 cls = PyTuple_GET_ITEM(argtup, 0);
4010 if (!PyType_Check(cls)) {
4011 PyErr_Format(st->PicklingError,
4012 "first item from NEWOBJ_EX argument tuple must "
4013 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4014 return -1;
4015 }
4016 args = PyTuple_GET_ITEM(argtup, 1);
4017 if (!PyTuple_Check(args)) {
4018 PyErr_Format(st->PicklingError,
4019 "second item from NEWOBJ_EX argument tuple must "
4020 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4021 return -1;
4022 }
4023 kwargs = PyTuple_GET_ITEM(argtup, 2);
4024 if (!PyDict_Check(kwargs)) {
4025 PyErr_Format(st->PicklingError,
4026 "third item from NEWOBJ_EX argument tuple must "
4027 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4028 return -1;
4029 }
4030
4031 if (self->proto >= 4) {
4032 if (save(self, cls, 0) < 0 ||
4033 save(self, args, 0) < 0 ||
4034 save(self, kwargs, 0) < 0 ||
4035 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4036 return -1;
4037 }
4038 }
4039 else {
4040 PyObject *newargs;
4041 PyObject *cls_new;
4042 Py_ssize_t i;
4043 _Py_IDENTIFIER(__new__);
4044
4045 newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
4046 if (newargs == NULL)
4047 return -1;
4048
4049 cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
4050 if (cls_new == NULL) {
4051 Py_DECREF(newargs);
4052 return -1;
4053 }
4054 PyTuple_SET_ITEM(newargs, 0, cls_new);
4055 Py_INCREF(cls);
4056 PyTuple_SET_ITEM(newargs, 1, cls);
4057 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
4058 PyObject *item = PyTuple_GET_ITEM(args, i);
4059 Py_INCREF(item);
4060 PyTuple_SET_ITEM(newargs, i + 2, item);
4061 }
4062
4063 callable = PyObject_Call(st->partial, newargs, kwargs);
4064 Py_DECREF(newargs);
4065 if (callable == NULL)
4066 return -1;
4067
4068 newargs = PyTuple_New(0);
4069 if (newargs == NULL) {
4070 Py_DECREF(callable);
4071 return -1;
4072 }
4073
4074 if (save(self, callable, 0) < 0 ||
4075 save(self, newargs, 0) < 0 ||
4076 _Pickler_Write(self, &reduce_op, 1) < 0) {
4077 Py_DECREF(newargs);
4078 Py_DECREF(callable);
4079 return -1;
4080 }
4081 Py_DECREF(newargs);
4082 Py_DECREF(callable);
4083 }
4084 }
4085 else if (use_newobj) {
4086 PyObject *cls;
4087 PyObject *newargtup;
4088 PyObject *obj_class;
4089 int p;
4090
4091 /* Sanity checks. */
4092 if (PyTuple_GET_SIZE(argtup) < 1) {
4093 PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
4094 return -1;
4095 }
4096
4097 cls = PyTuple_GET_ITEM(argtup, 0);
4098 if (!PyType_Check(cls)) {
4099 PyErr_SetString(st->PicklingError, "args[0] from "
4100 "__newobj__ args is not a type");
4101 return -1;
4102 }
4103
4104 if (obj != NULL) {
4105 obj_class = get_class(obj);
4106 if (obj_class == NULL) {
4107 return -1;
4108 }
4109 p = obj_class != cls;
4110 Py_DECREF(obj_class);
4111 if (p) {
4112 PyErr_SetString(st->PicklingError, "args[0] from "
4113 "__newobj__ args has the wrong class");
4114 return -1;
4115 }
4116 }
4117 /* XXX: These calls save() are prone to infinite recursion. Imagine
4118 what happen if the value returned by the __reduce__() method of
4119 some extension type contains another object of the same type. Ouch!
4120
4121 Here is a quick example, that I ran into, to illustrate what I
4122 mean:
4123
4124 >>> import pickle, copyreg
4125 >>> copyreg.dispatch_table.pop(complex)
4126 >>> pickle.dumps(1+2j)
4127 Traceback (most recent call last):
4128 ...
4129 RecursionError: maximum recursion depth exceeded
4130
4131 Removing the complex class from copyreg.dispatch_table made the
4132 __reduce_ex__() method emit another complex object:
4133
4134 >>> (1+1j).__reduce_ex__(2)
4135 (<function __newobj__ at 0xb7b71c3c>,
4136 (<class 'complex'>, (1+1j)), None, None, None)
4137
4138 Thus when save() was called on newargstup (the 2nd item) recursion
4139 ensued. Of course, the bug was in the complex class which had a
4140 broken __getnewargs__() that emitted another complex object. But,
4141 the point, here, is it is quite easy to end up with a broken reduce
4142 function. */
4143
4144 /* Save the class and its __new__ arguments. */
4145 if (save(self, cls, 0) < 0)
4146 return -1;
4147
4148 newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
4149 if (newargtup == NULL)
4150 return -1;
4151
4152 p = save(self, newargtup, 0);
4153 Py_DECREF(newargtup);
4154 if (p < 0)
4155 return -1;
4156
4157 /* Add NEWOBJ opcode. */
4158 if (_Pickler_Write(self, &newobj_op, 1) < 0)
4159 return -1;
4160 }
4161 else { /* Not using NEWOBJ. */
4162 if (save(self, callable, 0) < 0 ||
4163 save(self, argtup, 0) < 0 ||
4164 _Pickler_Write(self, &reduce_op, 1) < 0)
4165 return -1;
4166 }
4167
4168 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4169 the caller do not want to memoize the object. Not particularly useful,
4170 but that is to mimic the behavior save_reduce() in pickle.py when
4171 obj is None. */
4172 if (obj != NULL) {
4173 /* If the object is already in the memo, this means it is
4174 recursive. In this case, throw away everything we put on the
4175 stack, and fetch the object back from the memo. */
4176 if (PyMemoTable_Get(self->memo, obj)) {
4177 const char pop_op = POP;
4178
4179 if (_Pickler_Write(self, &pop_op, 1) < 0)
4180 return -1;
4181 if (memo_get(self, obj) < 0)
4182 return -1;
4183
4184 return 0;
4185 }
4186 else if (memo_put(self, obj) < 0)
4187 return -1;
4188 }
4189
4190 if (listitems && batch_list(self, listitems) < 0)
4191 return -1;
4192
4193 if (dictitems && batch_dict(self, dictitems) < 0)
4194 return -1;
4195
4196 if (state) {
4197 if (state_setter == NULL) {
4198 if (save(self, state, 0) < 0 ||
4199 _Pickler_Write(self, &build_op, 1) < 0)
4200 return -1;
4201 }
4202 else {
4203
4204 /* If a state_setter is specified, call it instead of load_build to
4205 * update obj's with its previous state.
4206 * The first 4 save/write instructions push state_setter and its
4207 * tuple of expected arguments (obj, state) onto the stack. The
4208 * REDUCE opcode triggers the state_setter(obj, state) function
4209 * call. Finally, because state-updating routines only do in-place
4210 * modification, the whole operation has to be stack-transparent.
4211 * Thus, we finally pop the call's output from the stack.*/
4212
4213 const char tupletwo_op = TUPLE2;
4214 const char pop_op = POP;
4215 if (save(self, state_setter, 0) < 0 ||
4216 save(self, obj, 0) < 0 || save(self, state, 0) < 0 ||
4217 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4218 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4219 _Pickler_Write(self, &pop_op, 1) < 0)
4220 return -1;
4221 }
4222 }
4223 return 0;
4224 }
4225
4226 static int
save(PicklerObject * self,PyObject * obj,int pers_save)4227 save(PicklerObject *self, PyObject *obj, int pers_save)
4228 {
4229 PyTypeObject *type;
4230 PyObject *reduce_func = NULL;
4231 PyObject *reduce_value = NULL;
4232 int status = 0;
4233
4234 if (_Pickler_OpcodeBoundary(self) < 0)
4235 return -1;
4236
4237 /* The extra pers_save argument is necessary to avoid calling save_pers()
4238 on its returned object. */
4239 if (!pers_save && self->pers_func) {
4240 /* save_pers() returns:
4241 -1 to signal an error;
4242 0 if it did nothing successfully;
4243 1 if a persistent id was saved.
4244 */
4245 if ((status = save_pers(self, obj)) != 0)
4246 return status;
4247 }
4248
4249 type = Py_TYPE(obj);
4250
4251 /* The old cPickle had an optimization that used switch-case statement
4252 dispatching on the first letter of the type name. This has was removed
4253 since benchmarks shown that this optimization was actually slowing
4254 things down. */
4255
4256 /* Atom types; these aren't memoized, so don't check the memo. */
4257
4258 if (obj == Py_None) {
4259 return save_none(self, obj);
4260 }
4261 else if (obj == Py_False || obj == Py_True) {
4262 return save_bool(self, obj);
4263 }
4264 else if (type == &PyLong_Type) {
4265 return save_long(self, obj);
4266 }
4267 else if (type == &PyFloat_Type) {
4268 return save_float(self, obj);
4269 }
4270
4271 /* Check the memo to see if it has the object. If so, generate
4272 a GET (or BINGET) opcode, instead of pickling the object
4273 once again. */
4274 if (PyMemoTable_Get(self->memo, obj)) {
4275 return memo_get(self, obj);
4276 }
4277
4278 if (type == &PyBytes_Type) {
4279 return save_bytes(self, obj);
4280 }
4281 else if (type == &PyUnicode_Type) {
4282 return save_unicode(self, obj);
4283 }
4284
4285 /* We're only calling Py_EnterRecursiveCall here so that atomic
4286 types above are pickled faster. */
4287 if (Py_EnterRecursiveCall(" while pickling an object")) {
4288 return -1;
4289 }
4290
4291 if (type == &PyDict_Type) {
4292 status = save_dict(self, obj);
4293 goto done;
4294 }
4295 else if (type == &PySet_Type) {
4296 status = save_set(self, obj);
4297 goto done;
4298 }
4299 else if (type == &PyFrozenSet_Type) {
4300 status = save_frozenset(self, obj);
4301 goto done;
4302 }
4303 else if (type == &PyList_Type) {
4304 status = save_list(self, obj);
4305 goto done;
4306 }
4307 else if (type == &PyTuple_Type) {
4308 status = save_tuple(self, obj);
4309 goto done;
4310 }
4311 else if (type == &PyByteArray_Type) {
4312 status = save_bytearray(self, obj);
4313 goto done;
4314 }
4315 else if (type == &PyPickleBuffer_Type) {
4316 status = save_picklebuffer(self, obj);
4317 goto done;
4318 }
4319
4320 /* Now, check reducer_override. If it returns NotImplemented,
4321 * fallback to save_type or save_global, and then perhaps to the
4322 * regular reduction mechanism.
4323 */
4324 if (self->reducer_override != NULL) {
4325 reduce_value = PyObject_CallFunctionObjArgs(self->reducer_override,
4326 obj, NULL);
4327 if (reduce_value == NULL) {
4328 goto error;
4329 }
4330 if (reduce_value != Py_NotImplemented) {
4331 goto reduce;
4332 }
4333 Py_DECREF(reduce_value);
4334 reduce_value = NULL;
4335 }
4336
4337 if (type == &PyType_Type) {
4338 status = save_type(self, obj);
4339 goto done;
4340 }
4341 else if (type == &PyFunction_Type) {
4342 status = save_global(self, obj, NULL);
4343 goto done;
4344 }
4345
4346 /* XXX: This part needs some unit tests. */
4347
4348 /* Get a reduction callable, and call it. This may come from
4349 * self.dispatch_table, copyreg.dispatch_table, the object's
4350 * __reduce_ex__ method, or the object's __reduce__ method.
4351 */
4352 if (self->dispatch_table == NULL) {
4353 PickleState *st = _Pickle_GetGlobalState();
4354 reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4355 (PyObject *)type);
4356 if (reduce_func == NULL) {
4357 if (PyErr_Occurred()) {
4358 goto error;
4359 }
4360 } else {
4361 /* PyDict_GetItemWithError() returns a borrowed reference.
4362 Increase the reference count to be consistent with
4363 PyObject_GetItem and _PyObject_GetAttrId used below. */
4364 Py_INCREF(reduce_func);
4365 }
4366 } else {
4367 reduce_func = PyObject_GetItem(self->dispatch_table,
4368 (PyObject *)type);
4369 if (reduce_func == NULL) {
4370 if (PyErr_ExceptionMatches(PyExc_KeyError))
4371 PyErr_Clear();
4372 else
4373 goto error;
4374 }
4375 }
4376 if (reduce_func != NULL) {
4377 Py_INCREF(obj);
4378 reduce_value = _Pickle_FastCall(reduce_func, obj);
4379 }
4380 else if (PyType_IsSubtype(type, &PyType_Type)) {
4381 status = save_global(self, obj, NULL);
4382 goto done;
4383 }
4384 else {
4385 _Py_IDENTIFIER(__reduce__);
4386 _Py_IDENTIFIER(__reduce_ex__);
4387
4388 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4389 automatically defined as __reduce__. While this is convenient, this
4390 make it impossible to know which method was actually called. Of
4391 course, this is not a big deal. But still, it would be nice to let
4392 the user know which method was called when something go
4393 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4394 don't actually have to check for a __reduce__ method. */
4395
4396 /* Check for a __reduce_ex__ method. */
4397 if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4398 goto error;
4399 }
4400 if (reduce_func != NULL) {
4401 PyObject *proto;
4402 proto = PyLong_FromLong(self->proto);
4403 if (proto != NULL) {
4404 reduce_value = _Pickle_FastCall(reduce_func, proto);
4405 }
4406 }
4407 else {
4408 /* Check for a __reduce__ method. */
4409 if (_PyObject_LookupAttrId(obj, &PyId___reduce__, &reduce_func) < 0) {
4410 goto error;
4411 }
4412 if (reduce_func != NULL) {
4413 reduce_value = _PyObject_CallNoArg(reduce_func);
4414 }
4415 else {
4416 PickleState *st = _Pickle_GetGlobalState();
4417 PyErr_Format(st->PicklingError,
4418 "can't pickle '%.200s' object: %R",
4419 type->tp_name, obj);
4420 goto error;
4421 }
4422 }
4423 }
4424
4425 if (reduce_value == NULL)
4426 goto error;
4427
4428 reduce:
4429 if (PyUnicode_Check(reduce_value)) {
4430 status = save_global(self, obj, reduce_value);
4431 goto done;
4432 }
4433
4434 if (!PyTuple_Check(reduce_value)) {
4435 PickleState *st = _Pickle_GetGlobalState();
4436 PyErr_SetString(st->PicklingError,
4437 "__reduce__ must return a string or tuple");
4438 goto error;
4439 }
4440
4441 status = save_reduce(self, reduce_value, obj);
4442
4443 if (0) {
4444 error:
4445 status = -1;
4446 }
4447 done:
4448
4449 Py_LeaveRecursiveCall();
4450 Py_XDECREF(reduce_func);
4451 Py_XDECREF(reduce_value);
4452
4453 return status;
4454 }
4455
4456 static int
dump(PicklerObject * self,PyObject * obj)4457 dump(PicklerObject *self, PyObject *obj)
4458 {
4459 const char stop_op = STOP;
4460 PyObject *tmp;
4461 _Py_IDENTIFIER(reducer_override);
4462
4463 if (_PyObject_LookupAttrId((PyObject *)self, &PyId_reducer_override,
4464 &tmp) < 0) {
4465 return -1;
4466 }
4467 /* Cache the reducer_override method, if it exists. */
4468 if (tmp != NULL) {
4469 Py_XSETREF(self->reducer_override, tmp);
4470 }
4471 else {
4472 Py_CLEAR(self->reducer_override);
4473 }
4474
4475 if (self->proto >= 2) {
4476 char header[2];
4477
4478 header[0] = PROTO;
4479 assert(self->proto >= 0 && self->proto < 256);
4480 header[1] = (unsigned char)self->proto;
4481 if (_Pickler_Write(self, header, 2) < 0)
4482 return -1;
4483 if (self->proto >= 4)
4484 self->framing = 1;
4485 }
4486
4487 if (save(self, obj, 0) < 0 ||
4488 _Pickler_Write(self, &stop_op, 1) < 0 ||
4489 _Pickler_CommitFrame(self) < 0)
4490 return -1;
4491 self->framing = 0;
4492 return 0;
4493 }
4494
4495 /*[clinic input]
4496
4497 _pickle.Pickler.clear_memo
4498
4499 Clears the pickler's "memo".
4500
4501 The memo is the data structure that remembers which objects the
4502 pickler has already seen, so that shared or recursive objects are
4503 pickled by reference and not by value. This method is useful when
4504 re-using picklers.
4505 [clinic start generated code]*/
4506
4507 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4508 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4509 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4510 {
4511 if (self->memo)
4512 PyMemoTable_Clear(self->memo);
4513
4514 Py_RETURN_NONE;
4515 }
4516
4517 /*[clinic input]
4518
4519 _pickle.Pickler.dump
4520
4521 obj: object
4522 /
4523
4524 Write a pickled representation of the given object to the open file.
4525 [clinic start generated code]*/
4526
4527 static PyObject *
_pickle_Pickler_dump(PicklerObject * self,PyObject * obj)4528 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4529 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4530 {
4531 /* Check whether the Pickler was initialized correctly (issue3664).
4532 Developers often forget to call __init__() in their subclasses, which
4533 would trigger a segfault without this check. */
4534 if (self->write == NULL) {
4535 PickleState *st = _Pickle_GetGlobalState();
4536 PyErr_Format(st->PicklingError,
4537 "Pickler.__init__() was not called by %s.__init__()",
4538 Py_TYPE(self)->tp_name);
4539 return NULL;
4540 }
4541
4542 if (_Pickler_ClearBuffer(self) < 0)
4543 return NULL;
4544
4545 if (dump(self, obj) < 0)
4546 return NULL;
4547
4548 if (_Pickler_FlushToFile(self) < 0)
4549 return NULL;
4550
4551 Py_RETURN_NONE;
4552 }
4553
4554 /*[clinic input]
4555
4556 _pickle.Pickler.__sizeof__ -> Py_ssize_t
4557
4558 Returns size in memory, in bytes.
4559 [clinic start generated code]*/
4560
4561 static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4562 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4563 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4564 {
4565 Py_ssize_t res, s;
4566
4567 res = _PyObject_SIZE(Py_TYPE(self));
4568 if (self->memo != NULL) {
4569 res += sizeof(PyMemoTable);
4570 res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4571 }
4572 if (self->output_buffer != NULL) {
4573 s = _PySys_GetSizeOf(self->output_buffer);
4574 if (s == -1)
4575 return -1;
4576 res += s;
4577 }
4578 return res;
4579 }
4580
4581 static struct PyMethodDef Pickler_methods[] = {
4582 _PICKLE_PICKLER_DUMP_METHODDEF
4583 _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4584 _PICKLE_PICKLER___SIZEOF___METHODDEF
4585 {NULL, NULL} /* sentinel */
4586 };
4587
4588 static void
Pickler_dealloc(PicklerObject * self)4589 Pickler_dealloc(PicklerObject *self)
4590 {
4591 PyObject_GC_UnTrack(self);
4592
4593 Py_XDECREF(self->output_buffer);
4594 Py_XDECREF(self->write);
4595 Py_XDECREF(self->pers_func);
4596 Py_XDECREF(self->dispatch_table);
4597 Py_XDECREF(self->fast_memo);
4598 Py_XDECREF(self->reducer_override);
4599 Py_XDECREF(self->buffer_callback);
4600
4601 PyMemoTable_Del(self->memo);
4602
4603 Py_TYPE(self)->tp_free((PyObject *)self);
4604 }
4605
4606 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4607 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4608 {
4609 Py_VISIT(self->write);
4610 Py_VISIT(self->pers_func);
4611 Py_VISIT(self->dispatch_table);
4612 Py_VISIT(self->fast_memo);
4613 Py_VISIT(self->reducer_override);
4614 Py_VISIT(self->buffer_callback);
4615 return 0;
4616 }
4617
4618 static int
Pickler_clear(PicklerObject * self)4619 Pickler_clear(PicklerObject *self)
4620 {
4621 Py_CLEAR(self->output_buffer);
4622 Py_CLEAR(self->write);
4623 Py_CLEAR(self->pers_func);
4624 Py_CLEAR(self->dispatch_table);
4625 Py_CLEAR(self->fast_memo);
4626 Py_CLEAR(self->reducer_override);
4627 Py_CLEAR(self->buffer_callback);
4628
4629 if (self->memo != NULL) {
4630 PyMemoTable *memo = self->memo;
4631 self->memo = NULL;
4632 PyMemoTable_Del(memo);
4633 }
4634 return 0;
4635 }
4636
4637
4638 /*[clinic input]
4639
4640 _pickle.Pickler.__init__
4641
4642 file: object
4643 protocol: object = None
4644 fix_imports: bool = True
4645 buffer_callback: object = None
4646
4647 This takes a binary file for writing a pickle data stream.
4648
4649 The optional *protocol* argument tells the pickler to use the given
4650 protocol; supported protocols are 0, 1, 2, 3 and 4. The default
4651 protocol is 3; a backward-incompatible protocol designed for Python 3.
4652
4653 Specifying a negative protocol version selects the highest protocol
4654 version supported. The higher the protocol used, the more recent the
4655 version of Python needed to read the pickle produced.
4656
4657 The *file* argument must have a write() method that accepts a single
4658 bytes argument. It can thus be a file object opened for binary
4659 writing, an io.BytesIO instance, or any other custom object that meets
4660 this interface.
4661
4662 If *fix_imports* is True and protocol is less than 3, pickle will try
4663 to map the new Python 3 names to the old module names used in Python
4664 2, so that the pickle data stream is readable with Python 2.
4665
4666 If *buffer_callback* is None (the default), buffer views are
4667 serialized into *file* as part of the pickle stream.
4668
4669 If *buffer_callback* is not None, then it can be called any number
4670 of times with a buffer view. If the callback returns a false value
4671 (such as None), the given buffer is out-of-band; otherwise the
4672 buffer is serialized in-band, i.e. inside the pickle stream.
4673
4674 It is an error if *buffer_callback* is not None and *protocol*
4675 is None or smaller than 5.
4676
4677 [clinic start generated code]*/
4678
4679 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)4680 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4681 PyObject *protocol, int fix_imports,
4682 PyObject *buffer_callback)
4683 /*[clinic end generated code: output=0abedc50590d259b input=bb886e00443a7811]*/
4684 {
4685 _Py_IDENTIFIER(persistent_id);
4686 _Py_IDENTIFIER(dispatch_table);
4687
4688 /* In case of multiple __init__() calls, clear previous content. */
4689 if (self->write != NULL)
4690 (void)Pickler_clear(self);
4691
4692 if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4693 return -1;
4694
4695 if (_Pickler_SetOutputStream(self, file) < 0)
4696 return -1;
4697
4698 if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4699 return -1;
4700
4701 /* memo and output_buffer may have already been created in _Pickler_New */
4702 if (self->memo == NULL) {
4703 self->memo = PyMemoTable_New();
4704 if (self->memo == NULL)
4705 return -1;
4706 }
4707 self->output_len = 0;
4708 if (self->output_buffer == NULL) {
4709 self->max_output_len = WRITE_BUF_SIZE;
4710 self->output_buffer = PyBytes_FromStringAndSize(NULL,
4711 self->max_output_len);
4712 if (self->output_buffer == NULL)
4713 return -1;
4714 }
4715
4716 self->fast = 0;
4717 self->fast_nesting = 0;
4718 self->fast_memo = NULL;
4719
4720 if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4721 &self->pers_func, &self->pers_func_self) < 0)
4722 {
4723 return -1;
4724 }
4725
4726 if (_PyObject_LookupAttrId((PyObject *)self,
4727 &PyId_dispatch_table, &self->dispatch_table) < 0) {
4728 return -1;
4729 }
4730
4731 return 0;
4732 }
4733
4734
4735 /* Define a proxy object for the Pickler's internal memo object. This is to
4736 * avoid breaking code like:
4737 * pickler.memo.clear()
4738 * and
4739 * pickler.memo = saved_memo
4740 * Is this a good idea? Not really, but we don't want to break code that uses
4741 * it. Note that we don't implement the entire mapping API here. This is
4742 * intentional, as these should be treated as black-box implementation details.
4743 */
4744
4745 /*[clinic input]
4746 _pickle.PicklerMemoProxy.clear
4747
4748 Remove all items from memo.
4749 [clinic start generated code]*/
4750
4751 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4752 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4753 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4754 {
4755 if (self->pickler->memo)
4756 PyMemoTable_Clear(self->pickler->memo);
4757 Py_RETURN_NONE;
4758 }
4759
4760 /*[clinic input]
4761 _pickle.PicklerMemoProxy.copy
4762
4763 Copy the memo to a new object.
4764 [clinic start generated code]*/
4765
4766 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4767 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4768 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4769 {
4770 PyMemoTable *memo;
4771 PyObject *new_memo = PyDict_New();
4772 if (new_memo == NULL)
4773 return NULL;
4774
4775 memo = self->pickler->memo;
4776 for (size_t i = 0; i < memo->mt_allocated; ++i) {
4777 PyMemoEntry entry = memo->mt_table[i];
4778 if (entry.me_key != NULL) {
4779 int status;
4780 PyObject *key, *value;
4781
4782 key = PyLong_FromVoidPtr(entry.me_key);
4783 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4784
4785 if (key == NULL || value == NULL) {
4786 Py_XDECREF(key);
4787 Py_XDECREF(value);
4788 goto error;
4789 }
4790 status = PyDict_SetItem(new_memo, key, value);
4791 Py_DECREF(key);
4792 Py_DECREF(value);
4793 if (status < 0)
4794 goto error;
4795 }
4796 }
4797 return new_memo;
4798
4799 error:
4800 Py_XDECREF(new_memo);
4801 return NULL;
4802 }
4803
4804 /*[clinic input]
4805 _pickle.PicklerMemoProxy.__reduce__
4806
4807 Implement pickle support.
4808 [clinic start generated code]*/
4809
4810 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4811 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4812 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4813 {
4814 PyObject *reduce_value, *dict_args;
4815 PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4816 if (contents == NULL)
4817 return NULL;
4818
4819 reduce_value = PyTuple_New(2);
4820 if (reduce_value == NULL) {
4821 Py_DECREF(contents);
4822 return NULL;
4823 }
4824 dict_args = PyTuple_New(1);
4825 if (dict_args == NULL) {
4826 Py_DECREF(contents);
4827 Py_DECREF(reduce_value);
4828 return NULL;
4829 }
4830 PyTuple_SET_ITEM(dict_args, 0, contents);
4831 Py_INCREF((PyObject *)&PyDict_Type);
4832 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4833 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4834 return reduce_value;
4835 }
4836
4837 static PyMethodDef picklerproxy_methods[] = {
4838 _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4839 _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4840 _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4841 {NULL, NULL} /* sentinel */
4842 };
4843
4844 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4845 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4846 {
4847 PyObject_GC_UnTrack(self);
4848 Py_XDECREF(self->pickler);
4849 PyObject_GC_Del((PyObject *)self);
4850 }
4851
4852 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4853 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4854 visitproc visit, void *arg)
4855 {
4856 Py_VISIT(self->pickler);
4857 return 0;
4858 }
4859
4860 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4861 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4862 {
4863 Py_CLEAR(self->pickler);
4864 return 0;
4865 }
4866
4867 static PyTypeObject PicklerMemoProxyType = {
4868 PyVarObject_HEAD_INIT(NULL, 0)
4869 "_pickle.PicklerMemoProxy", /*tp_name*/
4870 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
4871 0,
4872 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
4873 0, /* tp_vectorcall_offset */
4874 0, /* tp_getattr */
4875 0, /* tp_setattr */
4876 0, /* tp_as_async */
4877 0, /* tp_repr */
4878 0, /* tp_as_number */
4879 0, /* tp_as_sequence */
4880 0, /* tp_as_mapping */
4881 PyObject_HashNotImplemented, /* tp_hash */
4882 0, /* tp_call */
4883 0, /* tp_str */
4884 PyObject_GenericGetAttr, /* tp_getattro */
4885 PyObject_GenericSetAttr, /* tp_setattro */
4886 0, /* tp_as_buffer */
4887 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4888 0, /* tp_doc */
4889 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
4890 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
4891 0, /* tp_richcompare */
4892 0, /* tp_weaklistoffset */
4893 0, /* tp_iter */
4894 0, /* tp_iternext */
4895 picklerproxy_methods, /* tp_methods */
4896 };
4897
4898 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4899 PicklerMemoProxy_New(PicklerObject *pickler)
4900 {
4901 PicklerMemoProxyObject *self;
4902
4903 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4904 if (self == NULL)
4905 return NULL;
4906 Py_INCREF(pickler);
4907 self->pickler = pickler;
4908 PyObject_GC_Track(self);
4909 return (PyObject *)self;
4910 }
4911
4912 /*****************************************************************************/
4913
4914 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4915 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4916 {
4917 return PicklerMemoProxy_New(self);
4918 }
4919
4920 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4921 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4922 {
4923 PyMemoTable *new_memo = NULL;
4924
4925 if (obj == NULL) {
4926 PyErr_SetString(PyExc_TypeError,
4927 "attribute deletion is not supported");
4928 return -1;
4929 }
4930
4931 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
4932 PicklerObject *pickler =
4933 ((PicklerMemoProxyObject *)obj)->pickler;
4934
4935 new_memo = PyMemoTable_Copy(pickler->memo);
4936 if (new_memo == NULL)
4937 return -1;
4938 }
4939 else if (PyDict_Check(obj)) {
4940 Py_ssize_t i = 0;
4941 PyObject *key, *value;
4942
4943 new_memo = PyMemoTable_New();
4944 if (new_memo == NULL)
4945 return -1;
4946
4947 while (PyDict_Next(obj, &i, &key, &value)) {
4948 Py_ssize_t memo_id;
4949 PyObject *memo_obj;
4950
4951 if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
4952 PyErr_SetString(PyExc_TypeError,
4953 "'memo' values must be 2-item tuples");
4954 goto error;
4955 }
4956 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
4957 if (memo_id == -1 && PyErr_Occurred())
4958 goto error;
4959 memo_obj = PyTuple_GET_ITEM(value, 1);
4960 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
4961 goto error;
4962 }
4963 }
4964 else {
4965 PyErr_Format(PyExc_TypeError,
4966 "'memo' attribute must be a PicklerMemoProxy object "
4967 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
4968 return -1;
4969 }
4970
4971 PyMemoTable_Del(self->memo);
4972 self->memo = new_memo;
4973
4974 return 0;
4975
4976 error:
4977 if (new_memo)
4978 PyMemoTable_Del(new_memo);
4979 return -1;
4980 }
4981
4982 static PyObject *
Pickler_get_persid(PicklerObject * self,void * Py_UNUSED (ignored))4983 Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
4984 {
4985 if (self->pers_func == NULL) {
4986 PyErr_SetString(PyExc_AttributeError, "persistent_id");
4987 return NULL;
4988 }
4989 return reconstruct_method(self->pers_func, self->pers_func_self);
4990 }
4991
4992 static int
Pickler_set_persid(PicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))4993 Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
4994 {
4995 if (value == NULL) {
4996 PyErr_SetString(PyExc_TypeError,
4997 "attribute deletion is not supported");
4998 return -1;
4999 }
5000 if (!PyCallable_Check(value)) {
5001 PyErr_SetString(PyExc_TypeError,
5002 "persistent_id must be a callable taking one argument");
5003 return -1;
5004 }
5005
5006 self->pers_func_self = NULL;
5007 Py_INCREF(value);
5008 Py_XSETREF(self->pers_func, value);
5009
5010 return 0;
5011 }
5012
5013 static PyMemberDef Pickler_members[] = {
5014 {"bin", T_INT, offsetof(PicklerObject, bin)},
5015 {"fast", T_INT, offsetof(PicklerObject, fast)},
5016 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
5017 {NULL}
5018 };
5019
5020 static PyGetSetDef Pickler_getsets[] = {
5021 {"memo", (getter)Pickler_get_memo,
5022 (setter)Pickler_set_memo},
5023 {"persistent_id", (getter)Pickler_get_persid,
5024 (setter)Pickler_set_persid},
5025 {NULL}
5026 };
5027
5028 static PyTypeObject Pickler_Type = {
5029 PyVarObject_HEAD_INIT(NULL, 0)
5030 "_pickle.Pickler" , /*tp_name*/
5031 sizeof(PicklerObject), /*tp_basicsize*/
5032 0, /*tp_itemsize*/
5033 (destructor)Pickler_dealloc, /*tp_dealloc*/
5034 0, /*tp_vectorcall_offset*/
5035 0, /*tp_getattr*/
5036 0, /*tp_setattr*/
5037 0, /*tp_as_async*/
5038 0, /*tp_repr*/
5039 0, /*tp_as_number*/
5040 0, /*tp_as_sequence*/
5041 0, /*tp_as_mapping*/
5042 0, /*tp_hash*/
5043 0, /*tp_call*/
5044 0, /*tp_str*/
5045 0, /*tp_getattro*/
5046 0, /*tp_setattro*/
5047 0, /*tp_as_buffer*/
5048 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5049 _pickle_Pickler___init____doc__, /*tp_doc*/
5050 (traverseproc)Pickler_traverse, /*tp_traverse*/
5051 (inquiry)Pickler_clear, /*tp_clear*/
5052 0, /*tp_richcompare*/
5053 0, /*tp_weaklistoffset*/
5054 0, /*tp_iter*/
5055 0, /*tp_iternext*/
5056 Pickler_methods, /*tp_methods*/
5057 Pickler_members, /*tp_members*/
5058 Pickler_getsets, /*tp_getset*/
5059 0, /*tp_base*/
5060 0, /*tp_dict*/
5061 0, /*tp_descr_get*/
5062 0, /*tp_descr_set*/
5063 0, /*tp_dictoffset*/
5064 _pickle_Pickler___init__, /*tp_init*/
5065 PyType_GenericAlloc, /*tp_alloc*/
5066 PyType_GenericNew, /*tp_new*/
5067 PyObject_GC_Del, /*tp_free*/
5068 0, /*tp_is_gc*/
5069 };
5070
5071 /* Temporary helper for calling self.find_class().
5072
5073 XXX: It would be nice to able to avoid Python function call overhead, by
5074 using directly the C version of find_class(), when find_class() is not
5075 overridden by a subclass. Although, this could become rather hackish. A
5076 simpler optimization would be to call the C function when self is not a
5077 subclass instance. */
5078 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)5079 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5080 {
5081 _Py_IDENTIFIER(find_class);
5082
5083 return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
5084 module_name, global_name, NULL);
5085 }
5086
5087 static Py_ssize_t
marker(UnpicklerObject * self)5088 marker(UnpicklerObject *self)
5089 {
5090 Py_ssize_t mark;
5091
5092 if (self->num_marks < 1) {
5093 PickleState *st = _Pickle_GetGlobalState();
5094 PyErr_SetString(st->UnpicklingError, "could not find MARK");
5095 return -1;
5096 }
5097
5098 mark = self->marks[--self->num_marks];
5099 self->stack->mark_set = self->num_marks != 0;
5100 self->stack->fence = self->num_marks ?
5101 self->marks[self->num_marks - 1] : 0;
5102 return mark;
5103 }
5104
5105 static int
load_none(UnpicklerObject * self)5106 load_none(UnpicklerObject *self)
5107 {
5108 PDATA_APPEND(self->stack, Py_None, -1);
5109 return 0;
5110 }
5111
5112 static int
load_int(UnpicklerObject * self)5113 load_int(UnpicklerObject *self)
5114 {
5115 PyObject *value;
5116 char *endptr, *s;
5117 Py_ssize_t len;
5118 long x;
5119
5120 if ((len = _Unpickler_Readline(self, &s)) < 0)
5121 return -1;
5122 if (len < 2)
5123 return bad_readline();
5124
5125 errno = 0;
5126 /* XXX: Should the base argument of strtol() be explicitly set to 10?
5127 XXX(avassalotti): Should this uses PyOS_strtol()? */
5128 x = strtol(s, &endptr, 0);
5129
5130 if (errno || (*endptr != '\n' && *endptr != '\0')) {
5131 /* Hm, maybe we've got something long. Let's try reading
5132 * it as a Python int object. */
5133 errno = 0;
5134 /* XXX: Same thing about the base here. */
5135 value = PyLong_FromString(s, NULL, 0);
5136 if (value == NULL) {
5137 PyErr_SetString(PyExc_ValueError,
5138 "could not convert string to int");
5139 return -1;
5140 }
5141 }
5142 else {
5143 if (len == 3 && (x == 0 || x == 1)) {
5144 if ((value = PyBool_FromLong(x)) == NULL)
5145 return -1;
5146 }
5147 else {
5148 if ((value = PyLong_FromLong(x)) == NULL)
5149 return -1;
5150 }
5151 }
5152
5153 PDATA_PUSH(self->stack, value, -1);
5154 return 0;
5155 }
5156
5157 static int
load_bool(UnpicklerObject * self,PyObject * boolean)5158 load_bool(UnpicklerObject *self, PyObject *boolean)
5159 {
5160 assert(boolean == Py_True || boolean == Py_False);
5161 PDATA_APPEND(self->stack, boolean, -1);
5162 return 0;
5163 }
5164
5165 /* s contains x bytes of an unsigned little-endian integer. Return its value
5166 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5167 */
5168 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)5169 calc_binsize(char *bytes, int nbytes)
5170 {
5171 unsigned char *s = (unsigned char *)bytes;
5172 int i;
5173 size_t x = 0;
5174
5175 if (nbytes > (int)sizeof(size_t)) {
5176 /* Check for integer overflow. BINBYTES8 and BINUNICODE8 opcodes
5177 * have 64-bit size that can't be represented on 32-bit platform.
5178 */
5179 for (i = (int)sizeof(size_t); i < nbytes; i++) {
5180 if (s[i])
5181 return -1;
5182 }
5183 nbytes = (int)sizeof(size_t);
5184 }
5185 for (i = 0; i < nbytes; i++) {
5186 x |= (size_t) s[i] << (8 * i);
5187 }
5188
5189 if (x > PY_SSIZE_T_MAX)
5190 return -1;
5191 else
5192 return (Py_ssize_t) x;
5193 }
5194
5195 /* s contains x bytes of a little-endian integer. Return its value as a
5196 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
5197 * int, but when x is 4 it's a signed one. This is a historical source
5198 * of x-platform bugs.
5199 */
5200 static long
calc_binint(char * bytes,int nbytes)5201 calc_binint(char *bytes, int nbytes)
5202 {
5203 unsigned char *s = (unsigned char *)bytes;
5204 Py_ssize_t i;
5205 long x = 0;
5206
5207 for (i = 0; i < nbytes; i++) {
5208 x |= (long)s[i] << (8 * i);
5209 }
5210
5211 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5212 * is signed, so on a box with longs bigger than 4 bytes we need
5213 * to extend a BININT's sign bit to the full width.
5214 */
5215 if (SIZEOF_LONG > 4 && nbytes == 4) {
5216 x |= -(x & (1L << 31));
5217 }
5218
5219 return x;
5220 }
5221
5222 static int
load_binintx(UnpicklerObject * self,char * s,int size)5223 load_binintx(UnpicklerObject *self, char *s, int size)
5224 {
5225 PyObject *value;
5226 long x;
5227
5228 x = calc_binint(s, size);
5229
5230 if ((value = PyLong_FromLong(x)) == NULL)
5231 return -1;
5232
5233 PDATA_PUSH(self->stack, value, -1);
5234 return 0;
5235 }
5236
5237 static int
load_binint(UnpicklerObject * self)5238 load_binint(UnpicklerObject *self)
5239 {
5240 char *s;
5241
5242 if (_Unpickler_Read(self, &s, 4) < 0)
5243 return -1;
5244
5245 return load_binintx(self, s, 4);
5246 }
5247
5248 static int
load_binint1(UnpicklerObject * self)5249 load_binint1(UnpicklerObject *self)
5250 {
5251 char *s;
5252
5253 if (_Unpickler_Read(self, &s, 1) < 0)
5254 return -1;
5255
5256 return load_binintx(self, s, 1);
5257 }
5258
5259 static int
load_binint2(UnpicklerObject * self)5260 load_binint2(UnpicklerObject *self)
5261 {
5262 char *s;
5263
5264 if (_Unpickler_Read(self, &s, 2) < 0)
5265 return -1;
5266
5267 return load_binintx(self, s, 2);
5268 }
5269
5270 static int
load_long(UnpicklerObject * self)5271 load_long(UnpicklerObject *self)
5272 {
5273 PyObject *value;
5274 char *s = NULL;
5275 Py_ssize_t len;
5276
5277 if ((len = _Unpickler_Readline(self, &s)) < 0)
5278 return -1;
5279 if (len < 2)
5280 return bad_readline();
5281
5282 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5283 the 'L' before calling PyLong_FromString. In order to maintain
5284 compatibility with Python 3.0.0, we don't actually *require*
5285 the 'L' to be present. */
5286 if (s[len-2] == 'L')
5287 s[len-2] = '\0';
5288 /* XXX: Should the base argument explicitly set to 10? */
5289 value = PyLong_FromString(s, NULL, 0);
5290 if (value == NULL)
5291 return -1;
5292
5293 PDATA_PUSH(self->stack, value, -1);
5294 return 0;
5295 }
5296
5297 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
5298 * data following.
5299 */
5300 static int
load_counted_long(UnpicklerObject * self,int size)5301 load_counted_long(UnpicklerObject *self, int size)
5302 {
5303 PyObject *value;
5304 char *nbytes;
5305 char *pdata;
5306
5307 assert(size == 1 || size == 4);
5308 if (_Unpickler_Read(self, &nbytes, size) < 0)
5309 return -1;
5310
5311 size = calc_binint(nbytes, size);
5312 if (size < 0) {
5313 PickleState *st = _Pickle_GetGlobalState();
5314 /* Corrupt or hostile pickle -- we never write one like this */
5315 PyErr_SetString(st->UnpicklingError,
5316 "LONG pickle has negative byte count");
5317 return -1;
5318 }
5319
5320 if (size == 0)
5321 value = PyLong_FromLong(0L);
5322 else {
5323 /* Read the raw little-endian bytes and convert. */
5324 if (_Unpickler_Read(self, &pdata, size) < 0)
5325 return -1;
5326 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5327 1 /* little endian */ , 1 /* signed */ );
5328 }
5329 if (value == NULL)
5330 return -1;
5331 PDATA_PUSH(self->stack, value, -1);
5332 return 0;
5333 }
5334
5335 static int
load_float(UnpicklerObject * self)5336 load_float(UnpicklerObject *self)
5337 {
5338 PyObject *value;
5339 char *endptr, *s;
5340 Py_ssize_t len;
5341 double d;
5342
5343 if ((len = _Unpickler_Readline(self, &s)) < 0)
5344 return -1;
5345 if (len < 2)
5346 return bad_readline();
5347
5348 errno = 0;
5349 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5350 if (d == -1.0 && PyErr_Occurred())
5351 return -1;
5352 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5353 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5354 return -1;
5355 }
5356 value = PyFloat_FromDouble(d);
5357 if (value == NULL)
5358 return -1;
5359
5360 PDATA_PUSH(self->stack, value, -1);
5361 return 0;
5362 }
5363
5364 static int
load_binfloat(UnpicklerObject * self)5365 load_binfloat(UnpicklerObject *self)
5366 {
5367 PyObject *value;
5368 double x;
5369 char *s;
5370
5371 if (_Unpickler_Read(self, &s, 8) < 0)
5372 return -1;
5373
5374 x = _PyFloat_Unpack8((unsigned char *)s, 0);
5375 if (x == -1.0 && PyErr_Occurred())
5376 return -1;
5377
5378 if ((value = PyFloat_FromDouble(x)) == NULL)
5379 return -1;
5380
5381 PDATA_PUSH(self->stack, value, -1);
5382 return 0;
5383 }
5384
5385 static int
load_string(UnpicklerObject * self)5386 load_string(UnpicklerObject *self)
5387 {
5388 PyObject *bytes;
5389 PyObject *obj;
5390 Py_ssize_t len;
5391 char *s, *p;
5392
5393 if ((len = _Unpickler_Readline(self, &s)) < 0)
5394 return -1;
5395 /* Strip the newline */
5396 len--;
5397 /* Strip outermost quotes */
5398 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5399 p = s + 1;
5400 len -= 2;
5401 }
5402 else {
5403 PickleState *st = _Pickle_GetGlobalState();
5404 PyErr_SetString(st->UnpicklingError,
5405 "the STRING opcode argument must be quoted");
5406 return -1;
5407 }
5408 assert(len >= 0);
5409
5410 /* Use the PyBytes API to decode the string, since that is what is used
5411 to encode, and then coerce the result to Unicode. */
5412 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5413 if (bytes == NULL)
5414 return -1;
5415
5416 /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5417 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5418 if (strcmp(self->encoding, "bytes") == 0) {
5419 obj = bytes;
5420 }
5421 else {
5422 obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5423 Py_DECREF(bytes);
5424 if (obj == NULL) {
5425 return -1;
5426 }
5427 }
5428
5429 PDATA_PUSH(self->stack, obj, -1);
5430 return 0;
5431 }
5432
5433 static int
load_counted_binstring(UnpicklerObject * self,int nbytes)5434 load_counted_binstring(UnpicklerObject *self, int nbytes)
5435 {
5436 PyObject *obj;
5437 Py_ssize_t size;
5438 char *s;
5439
5440 if (_Unpickler_Read(self, &s, nbytes) < 0)
5441 return -1;
5442
5443 size = calc_binsize(s, nbytes);
5444 if (size < 0) {
5445 PickleState *st = _Pickle_GetGlobalState();
5446 PyErr_Format(st->UnpicklingError,
5447 "BINSTRING exceeds system's maximum size of %zd bytes",
5448 PY_SSIZE_T_MAX);
5449 return -1;
5450 }
5451
5452 if (_Unpickler_Read(self, &s, size) < 0)
5453 return -1;
5454
5455 /* Convert Python 2.x strings to bytes if the *encoding* given to the
5456 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5457 if (strcmp(self->encoding, "bytes") == 0) {
5458 obj = PyBytes_FromStringAndSize(s, size);
5459 }
5460 else {
5461 obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5462 }
5463 if (obj == NULL) {
5464 return -1;
5465 }
5466
5467 PDATA_PUSH(self->stack, obj, -1);
5468 return 0;
5469 }
5470
5471 static int
load_counted_binbytes(UnpicklerObject * self,int nbytes)5472 load_counted_binbytes(UnpicklerObject *self, int nbytes)
5473 {
5474 PyObject *bytes;
5475 Py_ssize_t size;
5476 char *s;
5477
5478 if (_Unpickler_Read(self, &s, nbytes) < 0)
5479 return -1;
5480
5481 size = calc_binsize(s, nbytes);
5482 if (size < 0) {
5483 PyErr_Format(PyExc_OverflowError,
5484 "BINBYTES exceeds system's maximum size of %zd bytes",
5485 PY_SSIZE_T_MAX);
5486 return -1;
5487 }
5488
5489 bytes = PyBytes_FromStringAndSize(NULL, size);
5490 if (bytes == NULL)
5491 return -1;
5492 if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
5493 Py_DECREF(bytes);
5494 return -1;
5495 }
5496
5497 PDATA_PUSH(self->stack, bytes, -1);
5498 return 0;
5499 }
5500
5501 static int
load_counted_bytearray(UnpicklerObject * self)5502 load_counted_bytearray(UnpicklerObject *self)
5503 {
5504 PyObject *bytearray;
5505 Py_ssize_t size;
5506 char *s;
5507
5508 if (_Unpickler_Read(self, &s, 8) < 0) {
5509 return -1;
5510 }
5511
5512 size = calc_binsize(s, 8);
5513 if (size < 0) {
5514 PyErr_Format(PyExc_OverflowError,
5515 "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5516 PY_SSIZE_T_MAX);
5517 return -1;
5518 }
5519
5520 bytearray = PyByteArray_FromStringAndSize(NULL, size);
5521 if (bytearray == NULL) {
5522 return -1;
5523 }
5524 if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
5525 Py_DECREF(bytearray);
5526 return -1;
5527 }
5528
5529 PDATA_PUSH(self->stack, bytearray, -1);
5530 return 0;
5531 }
5532
5533 static int
load_next_buffer(UnpicklerObject * self)5534 load_next_buffer(UnpicklerObject *self)
5535 {
5536 if (self->buffers == NULL) {
5537 PickleState *st = _Pickle_GetGlobalState();
5538 PyErr_SetString(st->UnpicklingError,
5539 "pickle stream refers to out-of-band data "
5540 "but no *buffers* argument was given");
5541 return -1;
5542 }
5543 PyObject *buf = PyIter_Next(self->buffers);
5544 if (buf == NULL) {
5545 if (!PyErr_Occurred()) {
5546 PickleState *st = _Pickle_GetGlobalState();
5547 PyErr_SetString(st->UnpicklingError,
5548 "not enough out-of-band buffers");
5549 }
5550 return -1;
5551 }
5552
5553 PDATA_PUSH(self->stack, buf, -1);
5554 return 0;
5555 }
5556
5557 static int
load_readonly_buffer(UnpicklerObject * self)5558 load_readonly_buffer(UnpicklerObject *self)
5559 {
5560 Py_ssize_t len = Py_SIZE(self->stack);
5561 if (len <= self->stack->fence) {
5562 return Pdata_stack_underflow(self->stack);
5563 }
5564
5565 PyObject *obj = self->stack->data[len - 1];
5566 PyObject *view = PyMemoryView_FromObject(obj);
5567 if (view == NULL) {
5568 return -1;
5569 }
5570 if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5571 /* Original object is writable */
5572 PyMemoryView_GET_BUFFER(view)->readonly = 1;
5573 self->stack->data[len - 1] = view;
5574 Py_DECREF(obj);
5575 }
5576 else {
5577 /* Original object is read-only, no need to replace it */
5578 Py_DECREF(view);
5579 }
5580 return 0;
5581 }
5582
5583 static int
load_unicode(UnpicklerObject * self)5584 load_unicode(UnpicklerObject *self)
5585 {
5586 PyObject *str;
5587 Py_ssize_t len;
5588 char *s = NULL;
5589
5590 if ((len = _Unpickler_Readline(self, &s)) < 0)
5591 return -1;
5592 if (len < 1)
5593 return bad_readline();
5594
5595 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5596 if (str == NULL)
5597 return -1;
5598
5599 PDATA_PUSH(self->stack, str, -1);
5600 return 0;
5601 }
5602
5603 static int
load_counted_binunicode(UnpicklerObject * self,int nbytes)5604 load_counted_binunicode(UnpicklerObject *self, int nbytes)
5605 {
5606 PyObject *str;
5607 Py_ssize_t size;
5608 char *s;
5609
5610 if (_Unpickler_Read(self, &s, nbytes) < 0)
5611 return -1;
5612
5613 size = calc_binsize(s, nbytes);
5614 if (size < 0) {
5615 PyErr_Format(PyExc_OverflowError,
5616 "BINUNICODE exceeds system's maximum size of %zd bytes",
5617 PY_SSIZE_T_MAX);
5618 return -1;
5619 }
5620
5621 if (_Unpickler_Read(self, &s, size) < 0)
5622 return -1;
5623
5624 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5625 if (str == NULL)
5626 return -1;
5627
5628 PDATA_PUSH(self->stack, str, -1);
5629 return 0;
5630 }
5631
5632 static int
load_counted_tuple(UnpicklerObject * self,Py_ssize_t len)5633 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5634 {
5635 PyObject *tuple;
5636
5637 if (Py_SIZE(self->stack) < len)
5638 return Pdata_stack_underflow(self->stack);
5639
5640 tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5641 if (tuple == NULL)
5642 return -1;
5643 PDATA_PUSH(self->stack, tuple, -1);
5644 return 0;
5645 }
5646
5647 static int
load_tuple(UnpicklerObject * self)5648 load_tuple(UnpicklerObject *self)
5649 {
5650 Py_ssize_t i;
5651
5652 if ((i = marker(self)) < 0)
5653 return -1;
5654
5655 return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5656 }
5657
5658 static int
load_empty_list(UnpicklerObject * self)5659 load_empty_list(UnpicklerObject *self)
5660 {
5661 PyObject *list;
5662
5663 if ((list = PyList_New(0)) == NULL)
5664 return -1;
5665 PDATA_PUSH(self->stack, list, -1);
5666 return 0;
5667 }
5668
5669 static int
load_empty_dict(UnpicklerObject * self)5670 load_empty_dict(UnpicklerObject *self)
5671 {
5672 PyObject *dict;
5673
5674 if ((dict = PyDict_New()) == NULL)
5675 return -1;
5676 PDATA_PUSH(self->stack, dict, -1);
5677 return 0;
5678 }
5679
5680 static int
load_empty_set(UnpicklerObject * self)5681 load_empty_set(UnpicklerObject *self)
5682 {
5683 PyObject *set;
5684
5685 if ((set = PySet_New(NULL)) == NULL)
5686 return -1;
5687 PDATA_PUSH(self->stack, set, -1);
5688 return 0;
5689 }
5690
5691 static int
load_list(UnpicklerObject * self)5692 load_list(UnpicklerObject *self)
5693 {
5694 PyObject *list;
5695 Py_ssize_t i;
5696
5697 if ((i = marker(self)) < 0)
5698 return -1;
5699
5700 list = Pdata_poplist(self->stack, i);
5701 if (list == NULL)
5702 return -1;
5703 PDATA_PUSH(self->stack, list, -1);
5704 return 0;
5705 }
5706
5707 static int
load_dict(UnpicklerObject * self)5708 load_dict(UnpicklerObject *self)
5709 {
5710 PyObject *dict, *key, *value;
5711 Py_ssize_t i, j, k;
5712
5713 if ((i = marker(self)) < 0)
5714 return -1;
5715 j = Py_SIZE(self->stack);
5716
5717 if ((dict = PyDict_New()) == NULL)
5718 return -1;
5719
5720 if ((j - i) % 2 != 0) {
5721 PickleState *st = _Pickle_GetGlobalState();
5722 PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5723 Py_DECREF(dict);
5724 return -1;
5725 }
5726
5727 for (k = i + 1; k < j; k += 2) {
5728 key = self->stack->data[k - 1];
5729 value = self->stack->data[k];
5730 if (PyDict_SetItem(dict, key, value) < 0) {
5731 Py_DECREF(dict);
5732 return -1;
5733 }
5734 }
5735 Pdata_clear(self->stack, i);
5736 PDATA_PUSH(self->stack, dict, -1);
5737 return 0;
5738 }
5739
5740 static int
load_frozenset(UnpicklerObject * self)5741 load_frozenset(UnpicklerObject *self)
5742 {
5743 PyObject *items;
5744 PyObject *frozenset;
5745 Py_ssize_t i;
5746
5747 if ((i = marker(self)) < 0)
5748 return -1;
5749
5750 items = Pdata_poptuple(self->stack, i);
5751 if (items == NULL)
5752 return -1;
5753
5754 frozenset = PyFrozenSet_New(items);
5755 Py_DECREF(items);
5756 if (frozenset == NULL)
5757 return -1;
5758
5759 PDATA_PUSH(self->stack, frozenset, -1);
5760 return 0;
5761 }
5762
5763 static PyObject *
instantiate(PyObject * cls,PyObject * args)5764 instantiate(PyObject *cls, PyObject *args)
5765 {
5766 /* Caller must assure args are a tuple. Normally, args come from
5767 Pdata_poptuple which packs objects from the top of the stack
5768 into a newly created tuple. */
5769 assert(PyTuple_Check(args));
5770 if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5771 _Py_IDENTIFIER(__getinitargs__);
5772 _Py_IDENTIFIER(__new__);
5773 PyObject *func;
5774 if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5775 return NULL;
5776 }
5777 if (func == NULL) {
5778 return _PyObject_CallMethodIdObjArgs(cls, &PyId___new__, cls, NULL);
5779 }
5780 Py_DECREF(func);
5781 }
5782 return PyObject_CallObject(cls, args);
5783 }
5784
5785 static int
load_obj(UnpicklerObject * self)5786 load_obj(UnpicklerObject *self)
5787 {
5788 PyObject *cls, *args, *obj = NULL;
5789 Py_ssize_t i;
5790
5791 if ((i = marker(self)) < 0)
5792 return -1;
5793
5794 if (Py_SIZE(self->stack) - i < 1)
5795 return Pdata_stack_underflow(self->stack);
5796
5797 args = Pdata_poptuple(self->stack, i + 1);
5798 if (args == NULL)
5799 return -1;
5800
5801 PDATA_POP(self->stack, cls);
5802 if (cls) {
5803 obj = instantiate(cls, args);
5804 Py_DECREF(cls);
5805 }
5806 Py_DECREF(args);
5807 if (obj == NULL)
5808 return -1;
5809
5810 PDATA_PUSH(self->stack, obj, -1);
5811 return 0;
5812 }
5813
5814 static int
load_inst(UnpicklerObject * self)5815 load_inst(UnpicklerObject *self)
5816 {
5817 PyObject *cls = NULL;
5818 PyObject *args = NULL;
5819 PyObject *obj = NULL;
5820 PyObject *module_name;
5821 PyObject *class_name;
5822 Py_ssize_t len;
5823 Py_ssize_t i;
5824 char *s;
5825
5826 if ((i = marker(self)) < 0)
5827 return -1;
5828 if ((len = _Unpickler_Readline(self, &s)) < 0)
5829 return -1;
5830 if (len < 2)
5831 return bad_readline();
5832
5833 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5834 identifiers are permitted in Python 3.0, since the INST opcode is only
5835 supported by older protocols on Python 2.x. */
5836 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5837 if (module_name == NULL)
5838 return -1;
5839
5840 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5841 if (len < 2) {
5842 Py_DECREF(module_name);
5843 return bad_readline();
5844 }
5845 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5846 if (class_name != NULL) {
5847 cls = find_class(self, module_name, class_name);
5848 Py_DECREF(class_name);
5849 }
5850 }
5851 Py_DECREF(module_name);
5852
5853 if (cls == NULL)
5854 return -1;
5855
5856 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5857 obj = instantiate(cls, args);
5858 Py_DECREF(args);
5859 }
5860 Py_DECREF(cls);
5861
5862 if (obj == NULL)
5863 return -1;
5864
5865 PDATA_PUSH(self->stack, obj, -1);
5866 return 0;
5867 }
5868
5869 static int
load_newobj(UnpicklerObject * self)5870 load_newobj(UnpicklerObject *self)
5871 {
5872 PyObject *args = NULL;
5873 PyObject *clsraw = NULL;
5874 PyTypeObject *cls; /* clsraw cast to its true type */
5875 PyObject *obj;
5876 PickleState *st = _Pickle_GetGlobalState();
5877
5878 /* Stack is ... cls argtuple, and we want to call
5879 * cls.__new__(cls, *argtuple).
5880 */
5881 PDATA_POP(self->stack, args);
5882 if (args == NULL)
5883 goto error;
5884 if (!PyTuple_Check(args)) {
5885 PyErr_SetString(st->UnpicklingError,
5886 "NEWOBJ expected an arg " "tuple.");
5887 goto error;
5888 }
5889
5890 PDATA_POP(self->stack, clsraw);
5891 cls = (PyTypeObject *)clsraw;
5892 if (cls == NULL)
5893 goto error;
5894 if (!PyType_Check(cls)) {
5895 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5896 "isn't a type object");
5897 goto error;
5898 }
5899 if (cls->tp_new == NULL) {
5900 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5901 "has NULL tp_new");
5902 goto error;
5903 }
5904
5905 /* Call __new__. */
5906 obj = cls->tp_new(cls, args, NULL);
5907 if (obj == NULL)
5908 goto error;
5909
5910 Py_DECREF(args);
5911 Py_DECREF(clsraw);
5912 PDATA_PUSH(self->stack, obj, -1);
5913 return 0;
5914
5915 error:
5916 Py_XDECREF(args);
5917 Py_XDECREF(clsraw);
5918 return -1;
5919 }
5920
5921 static int
load_newobj_ex(UnpicklerObject * self)5922 load_newobj_ex(UnpicklerObject *self)
5923 {
5924 PyObject *cls, *args, *kwargs;
5925 PyObject *obj;
5926 PickleState *st = _Pickle_GetGlobalState();
5927
5928 PDATA_POP(self->stack, kwargs);
5929 if (kwargs == NULL) {
5930 return -1;
5931 }
5932 PDATA_POP(self->stack, args);
5933 if (args == NULL) {
5934 Py_DECREF(kwargs);
5935 return -1;
5936 }
5937 PDATA_POP(self->stack, cls);
5938 if (cls == NULL) {
5939 Py_DECREF(kwargs);
5940 Py_DECREF(args);
5941 return -1;
5942 }
5943
5944 if (!PyType_Check(cls)) {
5945 Py_DECREF(kwargs);
5946 Py_DECREF(args);
5947 PyErr_Format(st->UnpicklingError,
5948 "NEWOBJ_EX class argument must be a type, not %.200s",
5949 Py_TYPE(cls)->tp_name);
5950 Py_DECREF(cls);
5951 return -1;
5952 }
5953
5954 if (((PyTypeObject *)cls)->tp_new == NULL) {
5955 Py_DECREF(kwargs);
5956 Py_DECREF(args);
5957 Py_DECREF(cls);
5958 PyErr_SetString(st->UnpicklingError,
5959 "NEWOBJ_EX class argument doesn't have __new__");
5960 return -1;
5961 }
5962 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5963 Py_DECREF(kwargs);
5964 Py_DECREF(args);
5965 Py_DECREF(cls);
5966 if (obj == NULL) {
5967 return -1;
5968 }
5969 PDATA_PUSH(self->stack, obj, -1);
5970 return 0;
5971 }
5972
5973 static int
load_global(UnpicklerObject * self)5974 load_global(UnpicklerObject *self)
5975 {
5976 PyObject *global = NULL;
5977 PyObject *module_name;
5978 PyObject *global_name;
5979 Py_ssize_t len;
5980 char *s;
5981
5982 if ((len = _Unpickler_Readline(self, &s)) < 0)
5983 return -1;
5984 if (len < 2)
5985 return bad_readline();
5986 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5987 if (!module_name)
5988 return -1;
5989
5990 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5991 if (len < 2) {
5992 Py_DECREF(module_name);
5993 return bad_readline();
5994 }
5995 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5996 if (global_name) {
5997 global = find_class(self, module_name, global_name);
5998 Py_DECREF(global_name);
5999 }
6000 }
6001 Py_DECREF(module_name);
6002
6003 if (global == NULL)
6004 return -1;
6005 PDATA_PUSH(self->stack, global, -1);
6006 return 0;
6007 }
6008
6009 static int
load_stack_global(UnpicklerObject * self)6010 load_stack_global(UnpicklerObject *self)
6011 {
6012 PyObject *global;
6013 PyObject *module_name;
6014 PyObject *global_name;
6015
6016 PDATA_POP(self->stack, global_name);
6017 PDATA_POP(self->stack, module_name);
6018 if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
6019 global_name == NULL || !PyUnicode_CheckExact(global_name)) {
6020 PickleState *st = _Pickle_GetGlobalState();
6021 PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
6022 Py_XDECREF(global_name);
6023 Py_XDECREF(module_name);
6024 return -1;
6025 }
6026 global = find_class(self, module_name, global_name);
6027 Py_DECREF(global_name);
6028 Py_DECREF(module_name);
6029 if (global == NULL)
6030 return -1;
6031 PDATA_PUSH(self->stack, global, -1);
6032 return 0;
6033 }
6034
6035 static int
load_persid(UnpicklerObject * self)6036 load_persid(UnpicklerObject *self)
6037 {
6038 PyObject *pid, *obj;
6039 Py_ssize_t len;
6040 char *s;
6041
6042 if (self->pers_func) {
6043 if ((len = _Unpickler_Readline(self, &s)) < 0)
6044 return -1;
6045 if (len < 1)
6046 return bad_readline();
6047
6048 pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6049 if (pid == NULL) {
6050 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6051 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
6052 "persistent IDs in protocol 0 must be "
6053 "ASCII strings");
6054 }
6055 return -1;
6056 }
6057
6058 obj = call_method(self->pers_func, self->pers_func_self, pid);
6059 Py_DECREF(pid);
6060 if (obj == NULL)
6061 return -1;
6062
6063 PDATA_PUSH(self->stack, obj, -1);
6064 return 0;
6065 }
6066 else {
6067 PickleState *st = _Pickle_GetGlobalState();
6068 PyErr_SetString(st->UnpicklingError,
6069 "A load persistent id instruction was encountered,\n"
6070 "but no persistent_load function was specified.");
6071 return -1;
6072 }
6073 }
6074
6075 static int
load_binpersid(UnpicklerObject * self)6076 load_binpersid(UnpicklerObject *self)
6077 {
6078 PyObject *pid, *obj;
6079
6080 if (self->pers_func) {
6081 PDATA_POP(self->stack, pid);
6082 if (pid == NULL)
6083 return -1;
6084
6085 obj = call_method(self->pers_func, self->pers_func_self, pid);
6086 Py_DECREF(pid);
6087 if (obj == NULL)
6088 return -1;
6089
6090 PDATA_PUSH(self->stack, obj, -1);
6091 return 0;
6092 }
6093 else {
6094 PickleState *st = _Pickle_GetGlobalState();
6095 PyErr_SetString(st->UnpicklingError,
6096 "A load persistent id instruction was encountered,\n"
6097 "but no persistent_load function was specified.");
6098 return -1;
6099 }
6100 }
6101
6102 static int
load_pop(UnpicklerObject * self)6103 load_pop(UnpicklerObject *self)
6104 {
6105 Py_ssize_t len = Py_SIZE(self->stack);
6106
6107 /* Note that we split the (pickle.py) stack into two stacks,
6108 * an object stack and a mark stack. We have to be clever and
6109 * pop the right one. We do this by looking at the top of the
6110 * mark stack first, and only signalling a stack underflow if
6111 * the object stack is empty and the mark stack doesn't match
6112 * our expectations.
6113 */
6114 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
6115 self->num_marks--;
6116 self->stack->mark_set = self->num_marks != 0;
6117 self->stack->fence = self->num_marks ?
6118 self->marks[self->num_marks - 1] : 0;
6119 } else if (len <= self->stack->fence)
6120 return Pdata_stack_underflow(self->stack);
6121 else {
6122 len--;
6123 Py_DECREF(self->stack->data[len]);
6124 Py_SIZE(self->stack) = len;
6125 }
6126 return 0;
6127 }
6128
6129 static int
load_pop_mark(UnpicklerObject * self)6130 load_pop_mark(UnpicklerObject *self)
6131 {
6132 Py_ssize_t i;
6133
6134 if ((i = marker(self)) < 0)
6135 return -1;
6136
6137 Pdata_clear(self->stack, i);
6138
6139 return 0;
6140 }
6141
6142 static int
load_dup(UnpicklerObject * self)6143 load_dup(UnpicklerObject *self)
6144 {
6145 PyObject *last;
6146 Py_ssize_t len = Py_SIZE(self->stack);
6147
6148 if (len <= self->stack->fence)
6149 return Pdata_stack_underflow(self->stack);
6150 last = self->stack->data[len - 1];
6151 PDATA_APPEND(self->stack, last, -1);
6152 return 0;
6153 }
6154
6155 static int
load_get(UnpicklerObject * self)6156 load_get(UnpicklerObject *self)
6157 {
6158 PyObject *key, *value;
6159 Py_ssize_t idx;
6160 Py_ssize_t len;
6161 char *s;
6162
6163 if ((len = _Unpickler_Readline(self, &s)) < 0)
6164 return -1;
6165 if (len < 2)
6166 return bad_readline();
6167
6168 key = PyLong_FromString(s, NULL, 10);
6169 if (key == NULL)
6170 return -1;
6171 idx = PyLong_AsSsize_t(key);
6172 if (idx == -1 && PyErr_Occurred()) {
6173 Py_DECREF(key);
6174 return -1;
6175 }
6176
6177 value = _Unpickler_MemoGet(self, idx);
6178 if (value == NULL) {
6179 if (!PyErr_Occurred())
6180 PyErr_SetObject(PyExc_KeyError, key);
6181 Py_DECREF(key);
6182 return -1;
6183 }
6184 Py_DECREF(key);
6185
6186 PDATA_APPEND(self->stack, value, -1);
6187 return 0;
6188 }
6189
6190 static int
load_binget(UnpicklerObject * self)6191 load_binget(UnpicklerObject *self)
6192 {
6193 PyObject *value;
6194 Py_ssize_t idx;
6195 char *s;
6196
6197 if (_Unpickler_Read(self, &s, 1) < 0)
6198 return -1;
6199
6200 idx = Py_CHARMASK(s[0]);
6201
6202 value = _Unpickler_MemoGet(self, idx);
6203 if (value == NULL) {
6204 PyObject *key = PyLong_FromSsize_t(idx);
6205 if (key != NULL) {
6206 PyErr_SetObject(PyExc_KeyError, key);
6207 Py_DECREF(key);
6208 }
6209 return -1;
6210 }
6211
6212 PDATA_APPEND(self->stack, value, -1);
6213 return 0;
6214 }
6215
6216 static int
load_long_binget(UnpicklerObject * self)6217 load_long_binget(UnpicklerObject *self)
6218 {
6219 PyObject *value;
6220 Py_ssize_t idx;
6221 char *s;
6222
6223 if (_Unpickler_Read(self, &s, 4) < 0)
6224 return -1;
6225
6226 idx = calc_binsize(s, 4);
6227
6228 value = _Unpickler_MemoGet(self, idx);
6229 if (value == NULL) {
6230 PyObject *key = PyLong_FromSsize_t(idx);
6231 if (key != NULL) {
6232 PyErr_SetObject(PyExc_KeyError, key);
6233 Py_DECREF(key);
6234 }
6235 return -1;
6236 }
6237
6238 PDATA_APPEND(self->stack, value, -1);
6239 return 0;
6240 }
6241
6242 /* Push an object from the extension registry (EXT[124]). nbytes is
6243 * the number of bytes following the opcode, holding the index (code) value.
6244 */
6245 static int
load_extension(UnpicklerObject * self,int nbytes)6246 load_extension(UnpicklerObject *self, int nbytes)
6247 {
6248 char *codebytes; /* the nbytes bytes after the opcode */
6249 long code; /* calc_binint returns long */
6250 PyObject *py_code; /* code as a Python int */
6251 PyObject *obj; /* the object to push */
6252 PyObject *pair; /* (module_name, class_name) */
6253 PyObject *module_name, *class_name;
6254 PickleState *st = _Pickle_GetGlobalState();
6255
6256 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
6257 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
6258 return -1;
6259 code = calc_binint(codebytes, nbytes);
6260 if (code <= 0) { /* note that 0 is forbidden */
6261 /* Corrupt or hostile pickle. */
6262 PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
6263 return -1;
6264 }
6265
6266 /* Look for the code in the cache. */
6267 py_code = PyLong_FromLong(code);
6268 if (py_code == NULL)
6269 return -1;
6270 obj = PyDict_GetItemWithError(st->extension_cache, py_code);
6271 if (obj != NULL) {
6272 /* Bingo. */
6273 Py_DECREF(py_code);
6274 PDATA_APPEND(self->stack, obj, -1);
6275 return 0;
6276 }
6277 if (PyErr_Occurred()) {
6278 Py_DECREF(py_code);
6279 return -1;
6280 }
6281
6282 /* Look up the (module_name, class_name) pair. */
6283 pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
6284 if (pair == NULL) {
6285 Py_DECREF(py_code);
6286 if (!PyErr_Occurred()) {
6287 PyErr_Format(PyExc_ValueError, "unregistered extension "
6288 "code %ld", code);
6289 }
6290 return -1;
6291 }
6292 /* Since the extension registry is manipulable via Python code,
6293 * confirm that pair is really a 2-tuple of strings.
6294 */
6295 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6296 goto error;
6297 }
6298
6299 module_name = PyTuple_GET_ITEM(pair, 0);
6300 if (!PyUnicode_Check(module_name)) {
6301 goto error;
6302 }
6303
6304 class_name = PyTuple_GET_ITEM(pair, 1);
6305 if (!PyUnicode_Check(class_name)) {
6306 goto error;
6307 }
6308
6309 /* Load the object. */
6310 obj = find_class(self, module_name, class_name);
6311 if (obj == NULL) {
6312 Py_DECREF(py_code);
6313 return -1;
6314 }
6315 /* Cache code -> obj. */
6316 code = PyDict_SetItem(st->extension_cache, py_code, obj);
6317 Py_DECREF(py_code);
6318 if (code < 0) {
6319 Py_DECREF(obj);
6320 return -1;
6321 }
6322 PDATA_PUSH(self->stack, obj, -1);
6323 return 0;
6324
6325 error:
6326 Py_DECREF(py_code);
6327 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6328 "isn't a 2-tuple of strings", code);
6329 return -1;
6330 }
6331
6332 static int
load_put(UnpicklerObject * self)6333 load_put(UnpicklerObject *self)
6334 {
6335 PyObject *key, *value;
6336 Py_ssize_t idx;
6337 Py_ssize_t len;
6338 char *s = NULL;
6339
6340 if ((len = _Unpickler_Readline(self, &s)) < 0)
6341 return -1;
6342 if (len < 2)
6343 return bad_readline();
6344 if (Py_SIZE(self->stack) <= self->stack->fence)
6345 return Pdata_stack_underflow(self->stack);
6346 value = self->stack->data[Py_SIZE(self->stack) - 1];
6347
6348 key = PyLong_FromString(s, NULL, 10);
6349 if (key == NULL)
6350 return -1;
6351 idx = PyLong_AsSsize_t(key);
6352 Py_DECREF(key);
6353 if (idx < 0) {
6354 if (!PyErr_Occurred())
6355 PyErr_SetString(PyExc_ValueError,
6356 "negative PUT argument");
6357 return -1;
6358 }
6359
6360 return _Unpickler_MemoPut(self, idx, value);
6361 }
6362
6363 static int
load_binput(UnpicklerObject * self)6364 load_binput(UnpicklerObject *self)
6365 {
6366 PyObject *value;
6367 Py_ssize_t idx;
6368 char *s;
6369
6370 if (_Unpickler_Read(self, &s, 1) < 0)
6371 return -1;
6372
6373 if (Py_SIZE(self->stack) <= self->stack->fence)
6374 return Pdata_stack_underflow(self->stack);
6375 value = self->stack->data[Py_SIZE(self->stack) - 1];
6376
6377 idx = Py_CHARMASK(s[0]);
6378
6379 return _Unpickler_MemoPut(self, idx, value);
6380 }
6381
6382 static int
load_long_binput(UnpicklerObject * self)6383 load_long_binput(UnpicklerObject *self)
6384 {
6385 PyObject *value;
6386 Py_ssize_t idx;
6387 char *s;
6388
6389 if (_Unpickler_Read(self, &s, 4) < 0)
6390 return -1;
6391
6392 if (Py_SIZE(self->stack) <= self->stack->fence)
6393 return Pdata_stack_underflow(self->stack);
6394 value = self->stack->data[Py_SIZE(self->stack) - 1];
6395
6396 idx = calc_binsize(s, 4);
6397 if (idx < 0) {
6398 PyErr_SetString(PyExc_ValueError,
6399 "negative LONG_BINPUT argument");
6400 return -1;
6401 }
6402
6403 return _Unpickler_MemoPut(self, idx, value);
6404 }
6405
6406 static int
load_memoize(UnpicklerObject * self)6407 load_memoize(UnpicklerObject *self)
6408 {
6409 PyObject *value;
6410
6411 if (Py_SIZE(self->stack) <= self->stack->fence)
6412 return Pdata_stack_underflow(self->stack);
6413 value = self->stack->data[Py_SIZE(self->stack) - 1];
6414
6415 return _Unpickler_MemoPut(self, self->memo_len, value);
6416 }
6417
6418 static int
do_append(UnpicklerObject * self,Py_ssize_t x)6419 do_append(UnpicklerObject *self, Py_ssize_t x)
6420 {
6421 PyObject *value;
6422 PyObject *slice;
6423 PyObject *list;
6424 PyObject *result;
6425 Py_ssize_t len, i;
6426
6427 len = Py_SIZE(self->stack);
6428 if (x > len || x <= self->stack->fence)
6429 return Pdata_stack_underflow(self->stack);
6430 if (len == x) /* nothing to do */
6431 return 0;
6432
6433 list = self->stack->data[x - 1];
6434
6435 if (PyList_CheckExact(list)) {
6436 Py_ssize_t list_len;
6437 int ret;
6438
6439 slice = Pdata_poplist(self->stack, x);
6440 if (!slice)
6441 return -1;
6442 list_len = PyList_GET_SIZE(list);
6443 ret = PyList_SetSlice(list, list_len, list_len, slice);
6444 Py_DECREF(slice);
6445 return ret;
6446 }
6447 else {
6448 PyObject *extend_func;
6449 _Py_IDENTIFIER(extend);
6450
6451 if (_PyObject_LookupAttrId(list, &PyId_extend, &extend_func) < 0) {
6452 return -1;
6453 }
6454 if (extend_func != NULL) {
6455 slice = Pdata_poplist(self->stack, x);
6456 if (!slice) {
6457 Py_DECREF(extend_func);
6458 return -1;
6459 }
6460 result = _Pickle_FastCall(extend_func, slice);
6461 Py_DECREF(extend_func);
6462 if (result == NULL)
6463 return -1;
6464 Py_DECREF(result);
6465 }
6466 else {
6467 PyObject *append_func;
6468 _Py_IDENTIFIER(append);
6469
6470 /* Even if the PEP 307 requires extend() and append() methods,
6471 fall back on append() if the object has no extend() method
6472 for backward compatibility. */
6473 append_func = _PyObject_GetAttrId(list, &PyId_append);
6474 if (append_func == NULL)
6475 return -1;
6476 for (i = x; i < len; i++) {
6477 value = self->stack->data[i];
6478 result = _Pickle_FastCall(append_func, value);
6479 if (result == NULL) {
6480 Pdata_clear(self->stack, i + 1);
6481 Py_SIZE(self->stack) = x;
6482 Py_DECREF(append_func);
6483 return -1;
6484 }
6485 Py_DECREF(result);
6486 }
6487 Py_SIZE(self->stack) = x;
6488 Py_DECREF(append_func);
6489 }
6490 }
6491
6492 return 0;
6493 }
6494
6495 static int
load_append(UnpicklerObject * self)6496 load_append(UnpicklerObject *self)
6497 {
6498 if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6499 return Pdata_stack_underflow(self->stack);
6500 return do_append(self, Py_SIZE(self->stack) - 1);
6501 }
6502
6503 static int
load_appends(UnpicklerObject * self)6504 load_appends(UnpicklerObject *self)
6505 {
6506 Py_ssize_t i = marker(self);
6507 if (i < 0)
6508 return -1;
6509 return do_append(self, i);
6510 }
6511
6512 static int
do_setitems(UnpicklerObject * self,Py_ssize_t x)6513 do_setitems(UnpicklerObject *self, Py_ssize_t x)
6514 {
6515 PyObject *value, *key;
6516 PyObject *dict;
6517 Py_ssize_t len, i;
6518 int status = 0;
6519
6520 len = Py_SIZE(self->stack);
6521 if (x > len || x <= self->stack->fence)
6522 return Pdata_stack_underflow(self->stack);
6523 if (len == x) /* nothing to do */
6524 return 0;
6525 if ((len - x) % 2 != 0) {
6526 PickleState *st = _Pickle_GetGlobalState();
6527 /* Currupt or hostile pickle -- we never write one like this. */
6528 PyErr_SetString(st->UnpicklingError,
6529 "odd number of items for SETITEMS");
6530 return -1;
6531 }
6532
6533 /* Here, dict does not actually need to be a PyDict; it could be anything
6534 that supports the __setitem__ attribute. */
6535 dict = self->stack->data[x - 1];
6536
6537 for (i = x + 1; i < len; i += 2) {
6538 key = self->stack->data[i - 1];
6539 value = self->stack->data[i];
6540 if (PyObject_SetItem(dict, key, value) < 0) {
6541 status = -1;
6542 break;
6543 }
6544 }
6545
6546 Pdata_clear(self->stack, x);
6547 return status;
6548 }
6549
6550 static int
load_setitem(UnpicklerObject * self)6551 load_setitem(UnpicklerObject *self)
6552 {
6553 return do_setitems(self, Py_SIZE(self->stack) - 2);
6554 }
6555
6556 static int
load_setitems(UnpicklerObject * self)6557 load_setitems(UnpicklerObject *self)
6558 {
6559 Py_ssize_t i = marker(self);
6560 if (i < 0)
6561 return -1;
6562 return do_setitems(self, i);
6563 }
6564
6565 static int
load_additems(UnpicklerObject * self)6566 load_additems(UnpicklerObject *self)
6567 {
6568 PyObject *set;
6569 Py_ssize_t mark, len, i;
6570
6571 mark = marker(self);
6572 if (mark < 0)
6573 return -1;
6574 len = Py_SIZE(self->stack);
6575 if (mark > len || mark <= self->stack->fence)
6576 return Pdata_stack_underflow(self->stack);
6577 if (len == mark) /* nothing to do */
6578 return 0;
6579
6580 set = self->stack->data[mark - 1];
6581
6582 if (PySet_Check(set)) {
6583 PyObject *items;
6584 int status;
6585
6586 items = Pdata_poptuple(self->stack, mark);
6587 if (items == NULL)
6588 return -1;
6589
6590 status = _PySet_Update(set, items);
6591 Py_DECREF(items);
6592 return status;
6593 }
6594 else {
6595 PyObject *add_func;
6596 _Py_IDENTIFIER(add);
6597
6598 add_func = _PyObject_GetAttrId(set, &PyId_add);
6599 if (add_func == NULL)
6600 return -1;
6601 for (i = mark; i < len; i++) {
6602 PyObject *result;
6603 PyObject *item;
6604
6605 item = self->stack->data[i];
6606 result = _Pickle_FastCall(add_func, item);
6607 if (result == NULL) {
6608 Pdata_clear(self->stack, i + 1);
6609 Py_SIZE(self->stack) = mark;
6610 return -1;
6611 }
6612 Py_DECREF(result);
6613 }
6614 Py_SIZE(self->stack) = mark;
6615 }
6616
6617 return 0;
6618 }
6619
6620 static int
load_build(UnpicklerObject * self)6621 load_build(UnpicklerObject *self)
6622 {
6623 PyObject *state, *inst, *slotstate;
6624 PyObject *setstate;
6625 int status = 0;
6626 _Py_IDENTIFIER(__setstate__);
6627
6628 /* Stack is ... instance, state. We want to leave instance at
6629 * the stack top, possibly mutated via instance.__setstate__(state).
6630 */
6631 if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6632 return Pdata_stack_underflow(self->stack);
6633
6634 PDATA_POP(self->stack, state);
6635 if (state == NULL)
6636 return -1;
6637
6638 inst = self->stack->data[Py_SIZE(self->stack) - 1];
6639
6640 if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6641 Py_DECREF(state);
6642 return -1;
6643 }
6644 if (setstate != NULL) {
6645 PyObject *result;
6646
6647 /* The explicit __setstate__ is responsible for everything. */
6648 result = _Pickle_FastCall(setstate, state);
6649 Py_DECREF(setstate);
6650 if (result == NULL)
6651 return -1;
6652 Py_DECREF(result);
6653 return 0;
6654 }
6655
6656 /* A default __setstate__. First see whether state embeds a
6657 * slot state dict too (a proto 2 addition).
6658 */
6659 if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6660 PyObject *tmp = state;
6661
6662 state = PyTuple_GET_ITEM(tmp, 0);
6663 slotstate = PyTuple_GET_ITEM(tmp, 1);
6664 Py_INCREF(state);
6665 Py_INCREF(slotstate);
6666 Py_DECREF(tmp);
6667 }
6668 else
6669 slotstate = NULL;
6670
6671 /* Set inst.__dict__ from the state dict (if any). */
6672 if (state != Py_None) {
6673 PyObject *dict;
6674 PyObject *d_key, *d_value;
6675 Py_ssize_t i;
6676 _Py_IDENTIFIER(__dict__);
6677
6678 if (!PyDict_Check(state)) {
6679 PickleState *st = _Pickle_GetGlobalState();
6680 PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6681 goto error;
6682 }
6683 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
6684 if (dict == NULL)
6685 goto error;
6686
6687 i = 0;
6688 while (PyDict_Next(state, &i, &d_key, &d_value)) {
6689 /* normally the keys for instance attributes are
6690 interned. we should try to do that here. */
6691 Py_INCREF(d_key);
6692 if (PyUnicode_CheckExact(d_key))
6693 PyUnicode_InternInPlace(&d_key);
6694 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6695 Py_DECREF(d_key);
6696 goto error;
6697 }
6698 Py_DECREF(d_key);
6699 }
6700 Py_DECREF(dict);
6701 }
6702
6703 /* Also set instance attributes from the slotstate dict (if any). */
6704 if (slotstate != NULL) {
6705 PyObject *d_key, *d_value;
6706 Py_ssize_t i;
6707
6708 if (!PyDict_Check(slotstate)) {
6709 PickleState *st = _Pickle_GetGlobalState();
6710 PyErr_SetString(st->UnpicklingError,
6711 "slot state is not a dictionary");
6712 goto error;
6713 }
6714 i = 0;
6715 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6716 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6717 goto error;
6718 }
6719 }
6720
6721 if (0) {
6722 error:
6723 status = -1;
6724 }
6725
6726 Py_DECREF(state);
6727 Py_XDECREF(slotstate);
6728 return status;
6729 }
6730
6731 static int
load_mark(UnpicklerObject * self)6732 load_mark(UnpicklerObject *self)
6733 {
6734
6735 /* Note that we split the (pickle.py) stack into two stacks, an
6736 * object stack and a mark stack. Here we push a mark onto the
6737 * mark stack.
6738 */
6739
6740 if (self->num_marks >= self->marks_size) {
6741 size_t alloc = ((size_t)self->num_marks << 1) + 20;
6742 Py_ssize_t *marks_new = self->marks;
6743 PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6744 if (marks_new == NULL) {
6745 PyErr_NoMemory();
6746 return -1;
6747 }
6748 self->marks = marks_new;
6749 self->marks_size = (Py_ssize_t)alloc;
6750 }
6751
6752 self->stack->mark_set = 1;
6753 self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6754
6755 return 0;
6756 }
6757
6758 static int
load_reduce(UnpicklerObject * self)6759 load_reduce(UnpicklerObject *self)
6760 {
6761 PyObject *callable = NULL;
6762 PyObject *argtup = NULL;
6763 PyObject *obj = NULL;
6764
6765 PDATA_POP(self->stack, argtup);
6766 if (argtup == NULL)
6767 return -1;
6768 PDATA_POP(self->stack, callable);
6769 if (callable) {
6770 obj = PyObject_CallObject(callable, argtup);
6771 Py_DECREF(callable);
6772 }
6773 Py_DECREF(argtup);
6774
6775 if (obj == NULL)
6776 return -1;
6777
6778 PDATA_PUSH(self->stack, obj, -1);
6779 return 0;
6780 }
6781
6782 /* Just raises an error if we don't know the protocol specified. PROTO
6783 * is the first opcode for protocols >= 2.
6784 */
6785 static int
load_proto(UnpicklerObject * self)6786 load_proto(UnpicklerObject *self)
6787 {
6788 char *s;
6789 int i;
6790
6791 if (_Unpickler_Read(self, &s, 1) < 0)
6792 return -1;
6793
6794 i = (unsigned char)s[0];
6795 if (i <= HIGHEST_PROTOCOL) {
6796 self->proto = i;
6797 return 0;
6798 }
6799
6800 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6801 return -1;
6802 }
6803
6804 static int
load_frame(UnpicklerObject * self)6805 load_frame(UnpicklerObject *self)
6806 {
6807 char *s;
6808 Py_ssize_t frame_len;
6809
6810 if (_Unpickler_Read(self, &s, 8) < 0)
6811 return -1;
6812
6813 frame_len = calc_binsize(s, 8);
6814 if (frame_len < 0) {
6815 PyErr_Format(PyExc_OverflowError,
6816 "FRAME length exceeds system's maximum of %zd bytes",
6817 PY_SSIZE_T_MAX);
6818 return -1;
6819 }
6820
6821 if (_Unpickler_Read(self, &s, frame_len) < 0)
6822 return -1;
6823
6824 /* Rewind to start of frame */
6825 self->next_read_idx -= frame_len;
6826 return 0;
6827 }
6828
6829 static PyObject *
load(UnpicklerObject * self)6830 load(UnpicklerObject *self)
6831 {
6832 PyObject *value = NULL;
6833 char *s = NULL;
6834
6835 self->num_marks = 0;
6836 self->stack->mark_set = 0;
6837 self->stack->fence = 0;
6838 self->proto = 0;
6839 if (Py_SIZE(self->stack))
6840 Pdata_clear(self->stack, 0);
6841
6842 /* Convenient macros for the dispatch while-switch loop just below. */
6843 #define OP(opcode, load_func) \
6844 case opcode: if (load_func(self) < 0) break; continue;
6845
6846 #define OP_ARG(opcode, load_func, arg) \
6847 case opcode: if (load_func(self, (arg)) < 0) break; continue;
6848
6849 while (1) {
6850 if (_Unpickler_Read(self, &s, 1) < 0) {
6851 PickleState *st = _Pickle_GetGlobalState();
6852 if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6853 PyErr_Format(PyExc_EOFError, "Ran out of input");
6854 }
6855 return NULL;
6856 }
6857
6858 switch ((enum opcode)s[0]) {
6859 OP(NONE, load_none)
6860 OP(BININT, load_binint)
6861 OP(BININT1, load_binint1)
6862 OP(BININT2, load_binint2)
6863 OP(INT, load_int)
6864 OP(LONG, load_long)
6865 OP_ARG(LONG1, load_counted_long, 1)
6866 OP_ARG(LONG4, load_counted_long, 4)
6867 OP(FLOAT, load_float)
6868 OP(BINFLOAT, load_binfloat)
6869 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6870 OP_ARG(BINBYTES, load_counted_binbytes, 4)
6871 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6872 OP(BYTEARRAY8, load_counted_bytearray)
6873 OP(NEXT_BUFFER, load_next_buffer)
6874 OP(READONLY_BUFFER, load_readonly_buffer)
6875 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6876 OP_ARG(BINSTRING, load_counted_binstring, 4)
6877 OP(STRING, load_string)
6878 OP(UNICODE, load_unicode)
6879 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6880 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6881 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6882 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6883 OP_ARG(TUPLE1, load_counted_tuple, 1)
6884 OP_ARG(TUPLE2, load_counted_tuple, 2)
6885 OP_ARG(TUPLE3, load_counted_tuple, 3)
6886 OP(TUPLE, load_tuple)
6887 OP(EMPTY_LIST, load_empty_list)
6888 OP(LIST, load_list)
6889 OP(EMPTY_DICT, load_empty_dict)
6890 OP(DICT, load_dict)
6891 OP(EMPTY_SET, load_empty_set)
6892 OP(ADDITEMS, load_additems)
6893 OP(FROZENSET, load_frozenset)
6894 OP(OBJ, load_obj)
6895 OP(INST, load_inst)
6896 OP(NEWOBJ, load_newobj)
6897 OP(NEWOBJ_EX, load_newobj_ex)
6898 OP(GLOBAL, load_global)
6899 OP(STACK_GLOBAL, load_stack_global)
6900 OP(APPEND, load_append)
6901 OP(APPENDS, load_appends)
6902 OP(BUILD, load_build)
6903 OP(DUP, load_dup)
6904 OP(BINGET, load_binget)
6905 OP(LONG_BINGET, load_long_binget)
6906 OP(GET, load_get)
6907 OP(MARK, load_mark)
6908 OP(BINPUT, load_binput)
6909 OP(LONG_BINPUT, load_long_binput)
6910 OP(PUT, load_put)
6911 OP(MEMOIZE, load_memoize)
6912 OP(POP, load_pop)
6913 OP(POP_MARK, load_pop_mark)
6914 OP(SETITEM, load_setitem)
6915 OP(SETITEMS, load_setitems)
6916 OP(PERSID, load_persid)
6917 OP(BINPERSID, load_binpersid)
6918 OP(REDUCE, load_reduce)
6919 OP(PROTO, load_proto)
6920 OP(FRAME, load_frame)
6921 OP_ARG(EXT1, load_extension, 1)
6922 OP_ARG(EXT2, load_extension, 2)
6923 OP_ARG(EXT4, load_extension, 4)
6924 OP_ARG(NEWTRUE, load_bool, Py_True)
6925 OP_ARG(NEWFALSE, load_bool, Py_False)
6926
6927 case STOP:
6928 break;
6929
6930 default:
6931 {
6932 PickleState *st = _Pickle_GetGlobalState();
6933 unsigned char c = (unsigned char) *s;
6934 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6935 PyErr_Format(st->UnpicklingError,
6936 "invalid load key, '%c'.", c);
6937 }
6938 else {
6939 PyErr_Format(st->UnpicklingError,
6940 "invalid load key, '\\x%02x'.", c);
6941 }
6942 return NULL;
6943 }
6944 }
6945
6946 break; /* and we are done! */
6947 }
6948
6949 if (PyErr_Occurred()) {
6950 return NULL;
6951 }
6952
6953 if (_Unpickler_SkipConsumed(self) < 0)
6954 return NULL;
6955
6956 PDATA_POP(self->stack, value);
6957 return value;
6958 }
6959
6960 /*[clinic input]
6961
6962 _pickle.Unpickler.load
6963
6964 Load a pickle.
6965
6966 Read a pickled object representation from the open file object given
6967 in the constructor, and return the reconstituted object hierarchy
6968 specified therein.
6969 [clinic start generated code]*/
6970
6971 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self)6972 _pickle_Unpickler_load_impl(UnpicklerObject *self)
6973 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
6974 {
6975 UnpicklerObject *unpickler = (UnpicklerObject*)self;
6976
6977 /* Check whether the Unpickler was initialized correctly. This prevents
6978 segfaulting if a subclass overridden __init__ with a function that does
6979 not call Unpickler.__init__(). Here, we simply ensure that self->read
6980 is not NULL. */
6981 if (unpickler->read == NULL) {
6982 PickleState *st = _Pickle_GetGlobalState();
6983 PyErr_Format(st->UnpicklingError,
6984 "Unpickler.__init__() was not called by %s.__init__()",
6985 Py_TYPE(unpickler)->tp_name);
6986 return NULL;
6987 }
6988
6989 return load(unpickler);
6990 }
6991
6992 /* The name of find_class() is misleading. In newer pickle protocols, this
6993 function is used for loading any global (i.e., functions), not just
6994 classes. The name is kept only for backward compatibility. */
6995
6996 /*[clinic input]
6997
6998 _pickle.Unpickler.find_class
6999
7000 module_name: object
7001 global_name: object
7002 /
7003
7004 Return an object from a specified module.
7005
7006 If necessary, the module will be imported. Subclasses may override
7007 this method (e.g. to restrict unpickling of arbitrary classes and
7008 functions).
7009
7010 This method is called whenever a class or a function object is
7011 needed. Both arguments passed are str objects.
7012 [clinic start generated code]*/
7013
7014 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)7015 _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
7016 PyObject *module_name,
7017 PyObject *global_name)
7018 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
7019 {
7020 PyObject *global;
7021 PyObject *module;
7022
7023 if (PySys_Audit("pickle.find_class", "OO",
7024 module_name, global_name) < 0) {
7025 return NULL;
7026 }
7027
7028 /* Try to map the old names used in Python 2.x to the new ones used in
7029 Python 3.x. We do this only with old pickle protocols and when the
7030 user has not disabled the feature. */
7031 if (self->proto < 3 && self->fix_imports) {
7032 PyObject *key;
7033 PyObject *item;
7034 PickleState *st = _Pickle_GetGlobalState();
7035
7036 /* Check if the global (i.e., a function or a class) was renamed
7037 or moved to another module. */
7038 key = PyTuple_Pack(2, module_name, global_name);
7039 if (key == NULL)
7040 return NULL;
7041 item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
7042 Py_DECREF(key);
7043 if (item) {
7044 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7045 PyErr_Format(PyExc_RuntimeError,
7046 "_compat_pickle.NAME_MAPPING values should be "
7047 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7048 return NULL;
7049 }
7050 module_name = PyTuple_GET_ITEM(item, 0);
7051 global_name = PyTuple_GET_ITEM(item, 1);
7052 if (!PyUnicode_Check(module_name) ||
7053 !PyUnicode_Check(global_name)) {
7054 PyErr_Format(PyExc_RuntimeError,
7055 "_compat_pickle.NAME_MAPPING values should be "
7056 "pairs of str, not (%.200s, %.200s)",
7057 Py_TYPE(module_name)->tp_name,
7058 Py_TYPE(global_name)->tp_name);
7059 return NULL;
7060 }
7061 }
7062 else if (PyErr_Occurred()) {
7063 return NULL;
7064 }
7065 else {
7066 /* Check if the module was renamed. */
7067 item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7068 if (item) {
7069 if (!PyUnicode_Check(item)) {
7070 PyErr_Format(PyExc_RuntimeError,
7071 "_compat_pickle.IMPORT_MAPPING values should be "
7072 "strings, not %.200s", Py_TYPE(item)->tp_name);
7073 return NULL;
7074 }
7075 module_name = item;
7076 }
7077 else if (PyErr_Occurred()) {
7078 return NULL;
7079 }
7080 }
7081 }
7082
7083 /*
7084 * we don't use PyImport_GetModule here, because it can return partially-
7085 * initialised modules, which then cause the getattribute to fail.
7086 */
7087 module = PyImport_Import(module_name);
7088 if (module == NULL) {
7089 return NULL;
7090 }
7091 global = getattribute(module, global_name, self->proto >= 4);
7092 Py_DECREF(module);
7093 return global;
7094 }
7095
7096 /*[clinic input]
7097
7098 _pickle.Unpickler.__sizeof__ -> Py_ssize_t
7099
7100 Returns size in memory, in bytes.
7101 [clinic start generated code]*/
7102
7103 static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)7104 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7105 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
7106 {
7107 Py_ssize_t res;
7108
7109 res = _PyObject_SIZE(Py_TYPE(self));
7110 if (self->memo != NULL)
7111 res += self->memo_size * sizeof(PyObject *);
7112 if (self->marks != NULL)
7113 res += self->marks_size * sizeof(Py_ssize_t);
7114 if (self->input_line != NULL)
7115 res += strlen(self->input_line) + 1;
7116 if (self->encoding != NULL)
7117 res += strlen(self->encoding) + 1;
7118 if (self->errors != NULL)
7119 res += strlen(self->errors) + 1;
7120 return res;
7121 }
7122
7123 static struct PyMethodDef Unpickler_methods[] = {
7124 _PICKLE_UNPICKLER_LOAD_METHODDEF
7125 _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
7126 _PICKLE_UNPICKLER___SIZEOF___METHODDEF
7127 {NULL, NULL} /* sentinel */
7128 };
7129
7130 static void
Unpickler_dealloc(UnpicklerObject * self)7131 Unpickler_dealloc(UnpicklerObject *self)
7132 {
7133 PyObject_GC_UnTrack((PyObject *)self);
7134 Py_XDECREF(self->readline);
7135 Py_XDECREF(self->readinto);
7136 Py_XDECREF(self->read);
7137 Py_XDECREF(self->peek);
7138 Py_XDECREF(self->stack);
7139 Py_XDECREF(self->pers_func);
7140 Py_XDECREF(self->buffers);
7141 if (self->buffer.buf != NULL) {
7142 PyBuffer_Release(&self->buffer);
7143 self->buffer.buf = NULL;
7144 }
7145
7146 _Unpickler_MemoCleanup(self);
7147 PyMem_Free(self->marks);
7148 PyMem_Free(self->input_line);
7149 PyMem_Free(self->encoding);
7150 PyMem_Free(self->errors);
7151
7152 Py_TYPE(self)->tp_free((PyObject *)self);
7153 }
7154
7155 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)7156 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7157 {
7158 Py_VISIT(self->readline);
7159 Py_VISIT(self->readinto);
7160 Py_VISIT(self->read);
7161 Py_VISIT(self->peek);
7162 Py_VISIT(self->stack);
7163 Py_VISIT(self->pers_func);
7164 Py_VISIT(self->buffers);
7165 return 0;
7166 }
7167
7168 static int
Unpickler_clear(UnpicklerObject * self)7169 Unpickler_clear(UnpicklerObject *self)
7170 {
7171 Py_CLEAR(self->readline);
7172 Py_CLEAR(self->readinto);
7173 Py_CLEAR(self->read);
7174 Py_CLEAR(self->peek);
7175 Py_CLEAR(self->stack);
7176 Py_CLEAR(self->pers_func);
7177 Py_CLEAR(self->buffers);
7178 if (self->buffer.buf != NULL) {
7179 PyBuffer_Release(&self->buffer);
7180 self->buffer.buf = NULL;
7181 }
7182
7183 _Unpickler_MemoCleanup(self);
7184 PyMem_Free(self->marks);
7185 self->marks = NULL;
7186 PyMem_Free(self->input_line);
7187 self->input_line = NULL;
7188 PyMem_Free(self->encoding);
7189 self->encoding = NULL;
7190 PyMem_Free(self->errors);
7191 self->errors = NULL;
7192
7193 return 0;
7194 }
7195
7196 /*[clinic input]
7197
7198 _pickle.Unpickler.__init__
7199
7200 file: object
7201 *
7202 fix_imports: bool = True
7203 encoding: str = 'ASCII'
7204 errors: str = 'strict'
7205 buffers: object(c_default="NULL") = ()
7206
7207 This takes a binary file for reading a pickle data stream.
7208
7209 The protocol version of the pickle is detected automatically, so no
7210 protocol argument is needed. Bytes past the pickled object's
7211 representation are ignored.
7212
7213 The argument *file* must have two methods, a read() method that takes
7214 an integer argument, and a readline() method that requires no
7215 arguments. Both methods should return bytes. Thus *file* can be a
7216 binary file object opened for reading, an io.BytesIO object, or any
7217 other custom object that meets this interface.
7218
7219 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7220 which are used to control compatibility support for pickle stream
7221 generated by Python 2. If *fix_imports* is True, pickle will try to
7222 map the old Python 2 names to the new names used in Python 3. The
7223 *encoding* and *errors* tell pickle how to decode 8-bit string
7224 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7225 respectively. The *encoding* can be 'bytes' to read these 8-bit
7226 string instances as bytes objects.
7227 [clinic start generated code]*/
7228
7229 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7230 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7231 int fix_imports, const char *encoding,
7232 const char *errors, PyObject *buffers)
7233 /*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
7234 {
7235 _Py_IDENTIFIER(persistent_load);
7236
7237 /* In case of multiple __init__() calls, clear previous content. */
7238 if (self->read != NULL)
7239 (void)Unpickler_clear(self);
7240
7241 if (_Unpickler_SetInputStream(self, file) < 0)
7242 return -1;
7243
7244 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
7245 return -1;
7246
7247 if (_Unpickler_SetBuffers(self, buffers) < 0)
7248 return -1;
7249
7250 self->fix_imports = fix_imports;
7251
7252 if (init_method_ref((PyObject *)self, &PyId_persistent_load,
7253 &self->pers_func, &self->pers_func_self) < 0)
7254 {
7255 return -1;
7256 }
7257
7258 self->stack = (Pdata *)Pdata_New();
7259 if (self->stack == NULL)
7260 return -1;
7261
7262 self->memo_size = 32;
7263 self->memo = _Unpickler_NewMemo(self->memo_size);
7264 if (self->memo == NULL)
7265 return -1;
7266
7267 self->proto = 0;
7268
7269 return 0;
7270 }
7271
7272
7273 /* Define a proxy object for the Unpickler's internal memo object. This is to
7274 * avoid breaking code like:
7275 * unpickler.memo.clear()
7276 * and
7277 * unpickler.memo = saved_memo
7278 * Is this a good idea? Not really, but we don't want to break code that uses
7279 * it. Note that we don't implement the entire mapping API here. This is
7280 * intentional, as these should be treated as black-box implementation details.
7281 *
7282 * We do, however, have to implement pickling/unpickling support because of
7283 * real-world code like cvs2svn.
7284 */
7285
7286 /*[clinic input]
7287 _pickle.UnpicklerMemoProxy.clear
7288
7289 Remove all items from memo.
7290 [clinic start generated code]*/
7291
7292 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)7293 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
7294 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
7295 {
7296 _Unpickler_MemoCleanup(self->unpickler);
7297 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7298 if (self->unpickler->memo == NULL)
7299 return NULL;
7300 Py_RETURN_NONE;
7301 }
7302
7303 /*[clinic input]
7304 _pickle.UnpicklerMemoProxy.copy
7305
7306 Copy the memo to a new object.
7307 [clinic start generated code]*/
7308
7309 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)7310 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
7311 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
7312 {
7313 size_t i;
7314 PyObject *new_memo = PyDict_New();
7315 if (new_memo == NULL)
7316 return NULL;
7317
7318 for (i = 0; i < self->unpickler->memo_size; i++) {
7319 int status;
7320 PyObject *key, *value;
7321
7322 value = self->unpickler->memo[i];
7323 if (value == NULL)
7324 continue;
7325
7326 key = PyLong_FromSsize_t(i);
7327 if (key == NULL)
7328 goto error;
7329 status = PyDict_SetItem(new_memo, key, value);
7330 Py_DECREF(key);
7331 if (status < 0)
7332 goto error;
7333 }
7334 return new_memo;
7335
7336 error:
7337 Py_DECREF(new_memo);
7338 return NULL;
7339 }
7340
7341 /*[clinic input]
7342 _pickle.UnpicklerMemoProxy.__reduce__
7343
7344 Implement pickling support.
7345 [clinic start generated code]*/
7346
7347 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)7348 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
7349 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
7350 {
7351 PyObject *reduce_value;
7352 PyObject *constructor_args;
7353 PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
7354 if (contents == NULL)
7355 return NULL;
7356
7357 reduce_value = PyTuple_New(2);
7358 if (reduce_value == NULL) {
7359 Py_DECREF(contents);
7360 return NULL;
7361 }
7362 constructor_args = PyTuple_New(1);
7363 if (constructor_args == NULL) {
7364 Py_DECREF(contents);
7365 Py_DECREF(reduce_value);
7366 return NULL;
7367 }
7368 PyTuple_SET_ITEM(constructor_args, 0, contents);
7369 Py_INCREF((PyObject *)&PyDict_Type);
7370 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
7371 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7372 return reduce_value;
7373 }
7374
7375 static PyMethodDef unpicklerproxy_methods[] = {
7376 _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7377 _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7378 _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
7379 {NULL, NULL} /* sentinel */
7380 };
7381
7382 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)7383 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7384 {
7385 PyObject_GC_UnTrack(self);
7386 Py_XDECREF(self->unpickler);
7387 PyObject_GC_Del((PyObject *)self);
7388 }
7389
7390 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)7391 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7392 visitproc visit, void *arg)
7393 {
7394 Py_VISIT(self->unpickler);
7395 return 0;
7396 }
7397
7398 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)7399 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7400 {
7401 Py_CLEAR(self->unpickler);
7402 return 0;
7403 }
7404
7405 static PyTypeObject UnpicklerMemoProxyType = {
7406 PyVarObject_HEAD_INIT(NULL, 0)
7407 "_pickle.UnpicklerMemoProxy", /*tp_name*/
7408 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
7409 0,
7410 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
7411 0, /* tp_vectorcall_offset */
7412 0, /* tp_getattr */
7413 0, /* tp_setattr */
7414 0, /* tp_as_async */
7415 0, /* tp_repr */
7416 0, /* tp_as_number */
7417 0, /* tp_as_sequence */
7418 0, /* tp_as_mapping */
7419 PyObject_HashNotImplemented, /* tp_hash */
7420 0, /* tp_call */
7421 0, /* tp_str */
7422 PyObject_GenericGetAttr, /* tp_getattro */
7423 PyObject_GenericSetAttr, /* tp_setattro */
7424 0, /* tp_as_buffer */
7425 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7426 0, /* tp_doc */
7427 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
7428 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
7429 0, /* tp_richcompare */
7430 0, /* tp_weaklistoffset */
7431 0, /* tp_iter */
7432 0, /* tp_iternext */
7433 unpicklerproxy_methods, /* tp_methods */
7434 };
7435
7436 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)7437 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7438 {
7439 UnpicklerMemoProxyObject *self;
7440
7441 self = PyObject_GC_New(UnpicklerMemoProxyObject,
7442 &UnpicklerMemoProxyType);
7443 if (self == NULL)
7444 return NULL;
7445 Py_INCREF(unpickler);
7446 self->unpickler = unpickler;
7447 PyObject_GC_Track(self);
7448 return (PyObject *)self;
7449 }
7450
7451 /*****************************************************************************/
7452
7453
7454 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7455 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7456 {
7457 return UnpicklerMemoProxy_New(self);
7458 }
7459
7460 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7461 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7462 {
7463 PyObject **new_memo;
7464 size_t new_memo_size = 0;
7465
7466 if (obj == NULL) {
7467 PyErr_SetString(PyExc_TypeError,
7468 "attribute deletion is not supported");
7469 return -1;
7470 }
7471
7472 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
7473 UnpicklerObject *unpickler =
7474 ((UnpicklerMemoProxyObject *)obj)->unpickler;
7475
7476 new_memo_size = unpickler->memo_size;
7477 new_memo = _Unpickler_NewMemo(new_memo_size);
7478 if (new_memo == NULL)
7479 return -1;
7480
7481 for (size_t i = 0; i < new_memo_size; i++) {
7482 Py_XINCREF(unpickler->memo[i]);
7483 new_memo[i] = unpickler->memo[i];
7484 }
7485 }
7486 else if (PyDict_Check(obj)) {
7487 Py_ssize_t i = 0;
7488 PyObject *key, *value;
7489
7490 new_memo_size = PyDict_GET_SIZE(obj);
7491 new_memo = _Unpickler_NewMemo(new_memo_size);
7492 if (new_memo == NULL)
7493 return -1;
7494
7495 while (PyDict_Next(obj, &i, &key, &value)) {
7496 Py_ssize_t idx;
7497 if (!PyLong_Check(key)) {
7498 PyErr_SetString(PyExc_TypeError,
7499 "memo key must be integers");
7500 goto error;
7501 }
7502 idx = PyLong_AsSsize_t(key);
7503 if (idx == -1 && PyErr_Occurred())
7504 goto error;
7505 if (idx < 0) {
7506 PyErr_SetString(PyExc_ValueError,
7507 "memo key must be positive integers.");
7508 goto error;
7509 }
7510 if (_Unpickler_MemoPut(self, idx, value) < 0)
7511 goto error;
7512 }
7513 }
7514 else {
7515 PyErr_Format(PyExc_TypeError,
7516 "'memo' attribute must be an UnpicklerMemoProxy object "
7517 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7518 return -1;
7519 }
7520
7521 _Unpickler_MemoCleanup(self);
7522 self->memo_size = new_memo_size;
7523 self->memo = new_memo;
7524
7525 return 0;
7526
7527 error:
7528 if (new_memo_size) {
7529 for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7530 Py_XDECREF(new_memo[i]);
7531 }
7532 PyMem_FREE(new_memo);
7533 }
7534 return -1;
7535 }
7536
7537 static PyObject *
Unpickler_get_persload(UnpicklerObject * self,void * Py_UNUSED (ignored))7538 Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
7539 {
7540 if (self->pers_func == NULL) {
7541 PyErr_SetString(PyExc_AttributeError, "persistent_load");
7542 return NULL;
7543 }
7544 return reconstruct_method(self->pers_func, self->pers_func_self);
7545 }
7546
7547 static int
Unpickler_set_persload(UnpicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))7548 Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
7549 {
7550 if (value == NULL) {
7551 PyErr_SetString(PyExc_TypeError,
7552 "attribute deletion is not supported");
7553 return -1;
7554 }
7555 if (!PyCallable_Check(value)) {
7556 PyErr_SetString(PyExc_TypeError,
7557 "persistent_load must be a callable taking "
7558 "one argument");
7559 return -1;
7560 }
7561
7562 self->pers_func_self = NULL;
7563 Py_INCREF(value);
7564 Py_XSETREF(self->pers_func, value);
7565
7566 return 0;
7567 }
7568
7569 static PyGetSetDef Unpickler_getsets[] = {
7570 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7571 {"persistent_load", (getter)Unpickler_get_persload,
7572 (setter)Unpickler_set_persload},
7573 {NULL}
7574 };
7575
7576 static PyTypeObject Unpickler_Type = {
7577 PyVarObject_HEAD_INIT(NULL, 0)
7578 "_pickle.Unpickler", /*tp_name*/
7579 sizeof(UnpicklerObject), /*tp_basicsize*/
7580 0, /*tp_itemsize*/
7581 (destructor)Unpickler_dealloc, /*tp_dealloc*/
7582 0, /*tp_vectorcall_offset*/
7583 0, /*tp_getattr*/
7584 0, /*tp_setattr*/
7585 0, /*tp_as_async*/
7586 0, /*tp_repr*/
7587 0, /*tp_as_number*/
7588 0, /*tp_as_sequence*/
7589 0, /*tp_as_mapping*/
7590 0, /*tp_hash*/
7591 0, /*tp_call*/
7592 0, /*tp_str*/
7593 0, /*tp_getattro*/
7594 0, /*tp_setattro*/
7595 0, /*tp_as_buffer*/
7596 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7597 _pickle_Unpickler___init____doc__, /*tp_doc*/
7598 (traverseproc)Unpickler_traverse, /*tp_traverse*/
7599 (inquiry)Unpickler_clear, /*tp_clear*/
7600 0, /*tp_richcompare*/
7601 0, /*tp_weaklistoffset*/
7602 0, /*tp_iter*/
7603 0, /*tp_iternext*/
7604 Unpickler_methods, /*tp_methods*/
7605 0, /*tp_members*/
7606 Unpickler_getsets, /*tp_getset*/
7607 0, /*tp_base*/
7608 0, /*tp_dict*/
7609 0, /*tp_descr_get*/
7610 0, /*tp_descr_set*/
7611 0, /*tp_dictoffset*/
7612 _pickle_Unpickler___init__, /*tp_init*/
7613 PyType_GenericAlloc, /*tp_alloc*/
7614 PyType_GenericNew, /*tp_new*/
7615 PyObject_GC_Del, /*tp_free*/
7616 0, /*tp_is_gc*/
7617 };
7618
7619 /*[clinic input]
7620
7621 _pickle.dump
7622
7623 obj: object
7624 file: object
7625 protocol: object = None
7626 *
7627 fix_imports: bool = True
7628 buffer_callback: object = None
7629
7630 Write a pickled representation of obj to the open file object file.
7631
7632 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7633 be more efficient.
7634
7635 The optional *protocol* argument tells the pickler to use the given
7636 protocol; supported protocols are 0, 1, 2, 3 and 4. The default
7637 protocol is 4. It was introduced in Python 3.4, it is incompatible
7638 with previous versions.
7639
7640 Specifying a negative protocol version selects the highest protocol
7641 version supported. The higher the protocol used, the more recent the
7642 version of Python needed to read the pickle produced.
7643
7644 The *file* argument must have a write() method that accepts a single
7645 bytes argument. It can thus be a file object opened for binary
7646 writing, an io.BytesIO instance, or any other custom object that meets
7647 this interface.
7648
7649 If *fix_imports* is True and protocol is less than 3, pickle will try
7650 to map the new Python 3 names to the old module names used in Python
7651 2, so that the pickle data stream is readable with Python 2.
7652
7653 If *buffer_callback* is None (the default), buffer views are serialized
7654 into *file* as part of the pickle stream. It is an error if
7655 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7656
7657 [clinic start generated code]*/
7658
7659 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7660 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7661 PyObject *protocol, int fix_imports,
7662 PyObject *buffer_callback)
7663 /*[clinic end generated code: output=706186dba996490c input=cfdcaf573ed6e46c]*/
7664 {
7665 PicklerObject *pickler = _Pickler_New();
7666
7667 if (pickler == NULL)
7668 return NULL;
7669
7670 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7671 goto error;
7672
7673 if (_Pickler_SetOutputStream(pickler, file) < 0)
7674 goto error;
7675
7676 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7677 goto error;
7678
7679 if (dump(pickler, obj) < 0)
7680 goto error;
7681
7682 if (_Pickler_FlushToFile(pickler) < 0)
7683 goto error;
7684
7685 Py_DECREF(pickler);
7686 Py_RETURN_NONE;
7687
7688 error:
7689 Py_XDECREF(pickler);
7690 return NULL;
7691 }
7692
7693 /*[clinic input]
7694
7695 _pickle.dumps
7696
7697 obj: object
7698 protocol: object = None
7699 *
7700 fix_imports: bool = True
7701 buffer_callback: object = None
7702
7703 Return the pickled representation of the object as a bytes object.
7704
7705 The optional *protocol* argument tells the pickler to use the given
7706 protocol; supported protocols are 0, 1, 2, 3 and 4. The default
7707 protocol is 4. It was introduced in Python 3.4, it is incompatible
7708 with previous versions.
7709
7710 Specifying a negative protocol version selects the highest protocol
7711 version supported. The higher the protocol used, the more recent the
7712 version of Python needed to read the pickle produced.
7713
7714 If *fix_imports* is True and *protocol* is less than 3, pickle will
7715 try to map the new Python 3 names to the old module names used in
7716 Python 2, so that the pickle data stream is readable with Python 2.
7717
7718 If *buffer_callback* is None (the default), buffer views are serialized
7719 into *file* as part of the pickle stream. It is an error if
7720 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7721
7722 [clinic start generated code]*/
7723
7724 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7725 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7726 int fix_imports, PyObject *buffer_callback)
7727 /*[clinic end generated code: output=fbab0093a5580fdf input=9f334d535ff7194f]*/
7728 {
7729 PyObject *result;
7730 PicklerObject *pickler = _Pickler_New();
7731
7732 if (pickler == NULL)
7733 return NULL;
7734
7735 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7736 goto error;
7737
7738 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7739 goto error;
7740
7741 if (dump(pickler, obj) < 0)
7742 goto error;
7743
7744 result = _Pickler_GetString(pickler);
7745 Py_DECREF(pickler);
7746 return result;
7747
7748 error:
7749 Py_XDECREF(pickler);
7750 return NULL;
7751 }
7752
7753 /*[clinic input]
7754
7755 _pickle.load
7756
7757 file: object
7758 *
7759 fix_imports: bool = True
7760 encoding: str = 'ASCII'
7761 errors: str = 'strict'
7762 buffers: object(c_default="NULL") = ()
7763
7764 Read and return an object from the pickle data stored in a file.
7765
7766 This is equivalent to ``Unpickler(file).load()``, but may be more
7767 efficient.
7768
7769 The protocol version of the pickle is detected automatically, so no
7770 protocol argument is needed. Bytes past the pickled object's
7771 representation are ignored.
7772
7773 The argument *file* must have two methods, a read() method that takes
7774 an integer argument, and a readline() method that requires no
7775 arguments. Both methods should return bytes. Thus *file* can be a
7776 binary file object opened for reading, an io.BytesIO object, or any
7777 other custom object that meets this interface.
7778
7779 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7780 which are used to control compatibility support for pickle stream
7781 generated by Python 2. If *fix_imports* is True, pickle will try to
7782 map the old Python 2 names to the new names used in Python 3. The
7783 *encoding* and *errors* tell pickle how to decode 8-bit string
7784 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7785 respectively. The *encoding* can be 'bytes' to read these 8-bit
7786 string instances as bytes objects.
7787 [clinic start generated code]*/
7788
7789 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7790 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7791 const char *encoding, const char *errors,
7792 PyObject *buffers)
7793 /*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
7794 {
7795 PyObject *result;
7796 UnpicklerObject *unpickler = _Unpickler_New();
7797
7798 if (unpickler == NULL)
7799 return NULL;
7800
7801 if (_Unpickler_SetInputStream(unpickler, file) < 0)
7802 goto error;
7803
7804 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7805 goto error;
7806
7807 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7808 goto error;
7809
7810 unpickler->fix_imports = fix_imports;
7811
7812 result = load(unpickler);
7813 Py_DECREF(unpickler);
7814 return result;
7815
7816 error:
7817 Py_XDECREF(unpickler);
7818 return NULL;
7819 }
7820
7821 /*[clinic input]
7822
7823 _pickle.loads
7824
7825 data: object
7826 *
7827 fix_imports: bool = True
7828 encoding: str = 'ASCII'
7829 errors: str = 'strict'
7830 buffers: object(c_default="NULL") = ()
7831
7832 Read and return an object from the given pickle data.
7833
7834 The protocol version of the pickle is detected automatically, so no
7835 protocol argument is needed. Bytes past the pickled object's
7836 representation are ignored.
7837
7838 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7839 which are used to control compatibility support for pickle stream
7840 generated by Python 2. If *fix_imports* is True, pickle will try to
7841 map the old Python 2 names to the new names used in Python 3. The
7842 *encoding* and *errors* tell pickle how to decode 8-bit string
7843 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7844 respectively. The *encoding* can be 'bytes' to read these 8-bit
7845 string instances as bytes objects.
7846 [clinic start generated code]*/
7847
7848 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7849 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7850 const char *encoding, const char *errors,
7851 PyObject *buffers)
7852 /*[clinic end generated code: output=82ac1e6b588e6d02 input=9c2ab6a0960185ea]*/
7853 {
7854 PyObject *result;
7855 UnpicklerObject *unpickler = _Unpickler_New();
7856
7857 if (unpickler == NULL)
7858 return NULL;
7859
7860 if (_Unpickler_SetStringInput(unpickler, data) < 0)
7861 goto error;
7862
7863 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7864 goto error;
7865
7866 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7867 goto error;
7868
7869 unpickler->fix_imports = fix_imports;
7870
7871 result = load(unpickler);
7872 Py_DECREF(unpickler);
7873 return result;
7874
7875 error:
7876 Py_XDECREF(unpickler);
7877 return NULL;
7878 }
7879
7880 static struct PyMethodDef pickle_methods[] = {
7881 _PICKLE_DUMP_METHODDEF
7882 _PICKLE_DUMPS_METHODDEF
7883 _PICKLE_LOAD_METHODDEF
7884 _PICKLE_LOADS_METHODDEF
7885 {NULL, NULL} /* sentinel */
7886 };
7887
7888 static int
pickle_clear(PyObject * m)7889 pickle_clear(PyObject *m)
7890 {
7891 _Pickle_ClearState(_Pickle_GetState(m));
7892 return 0;
7893 }
7894
7895 static void
pickle_free(PyObject * m)7896 pickle_free(PyObject *m)
7897 {
7898 _Pickle_ClearState(_Pickle_GetState(m));
7899 }
7900
7901 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7902 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7903 {
7904 PickleState *st = _Pickle_GetState(m);
7905 Py_VISIT(st->PickleError);
7906 Py_VISIT(st->PicklingError);
7907 Py_VISIT(st->UnpicklingError);
7908 Py_VISIT(st->dispatch_table);
7909 Py_VISIT(st->extension_registry);
7910 Py_VISIT(st->extension_cache);
7911 Py_VISIT(st->inverted_registry);
7912 Py_VISIT(st->name_mapping_2to3);
7913 Py_VISIT(st->import_mapping_2to3);
7914 Py_VISIT(st->name_mapping_3to2);
7915 Py_VISIT(st->import_mapping_3to2);
7916 Py_VISIT(st->codecs_encode);
7917 Py_VISIT(st->getattr);
7918 return 0;
7919 }
7920
7921 static struct PyModuleDef _picklemodule = {
7922 PyModuleDef_HEAD_INIT,
7923 "_pickle", /* m_name */
7924 pickle_module_doc, /* m_doc */
7925 sizeof(PickleState), /* m_size */
7926 pickle_methods, /* m_methods */
7927 NULL, /* m_reload */
7928 pickle_traverse, /* m_traverse */
7929 pickle_clear, /* m_clear */
7930 (freefunc)pickle_free /* m_free */
7931 };
7932
7933 PyMODINIT_FUNC
PyInit__pickle(void)7934 PyInit__pickle(void)
7935 {
7936 PyObject *m;
7937 PickleState *st;
7938
7939 m = PyState_FindModule(&_picklemodule);
7940 if (m) {
7941 Py_INCREF(m);
7942 return m;
7943 }
7944
7945 if (PyType_Ready(&Unpickler_Type) < 0)
7946 return NULL;
7947 if (PyType_Ready(&Pickler_Type) < 0)
7948 return NULL;
7949 if (PyType_Ready(&Pdata_Type) < 0)
7950 return NULL;
7951 if (PyType_Ready(&PicklerMemoProxyType) < 0)
7952 return NULL;
7953 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7954 return NULL;
7955
7956 /* Create the module and add the functions. */
7957 m = PyModule_Create(&_picklemodule);
7958 if (m == NULL)
7959 return NULL;
7960
7961 /* Add types */
7962 Py_INCREF(&Pickler_Type);
7963 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
7964 return NULL;
7965 Py_INCREF(&Unpickler_Type);
7966 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
7967 return NULL;
7968 Py_INCREF(&PyPickleBuffer_Type);
7969 if (PyModule_AddObject(m, "PickleBuffer",
7970 (PyObject *)&PyPickleBuffer_Type) < 0)
7971 return NULL;
7972
7973 st = _Pickle_GetState(m);
7974
7975 /* Initialize the exceptions. */
7976 st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7977 if (st->PickleError == NULL)
7978 return NULL;
7979 st->PicklingError = \
7980 PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7981 if (st->PicklingError == NULL)
7982 return NULL;
7983 st->UnpicklingError = \
7984 PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7985 if (st->UnpicklingError == NULL)
7986 return NULL;
7987
7988 Py_INCREF(st->PickleError);
7989 if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
7990 return NULL;
7991 Py_INCREF(st->PicklingError);
7992 if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
7993 return NULL;
7994 Py_INCREF(st->UnpicklingError);
7995 if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
7996 return NULL;
7997
7998 if (_Pickle_InitState(st) < 0)
7999 return NULL;
8000
8001 return m;
8002 }
8003