1
2 /* Write Python objects to files and read them back.
3 This is primarily intended for writing and reading compiled Python code,
4 even though dicts, lists, sets and frozensets, not commonly seen in
5 code objects, are supported.
6 Version 3 of this protocol properly supports circular links
7 and sharing. */
8
9 #include "Python.h"
10 #include "pycore_call.h" // _PyObject_CallNoArgs()
11 #include "pycore_code.h" // _PyCode_New()
12 #include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION()
13 #include "pycore_hashtable.h" // _Py_hashtable_t
14 #include "pycore_long.h" // _PyLong_DigitCount
15 #include "pycore_setobject.h" // _PySet_NextEntry()
16 #include "marshal.h" // Py_MARSHAL_VERSION
17 #include "pycore_pystate.h" // _PyInterpreterState_GET()
18
19 #ifdef __APPLE__
20 # include "TargetConditionals.h"
21 #endif /* __APPLE__ */
22
23 /*[clinic input]
24 module marshal
25 [clinic start generated code]*/
26 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
27
28 #include "clinic/marshal.c.h"
29
30 /* High water mark to determine when the marshalled object is dangerously deep
31 * and risks coring the interpreter. When the object stack gets this deep,
32 * raise an exception instead of continuing.
33 * On Windows debug builds, reduce this value.
34 *
35 * BUG: https://bugs.python.org/issue33720
36 * On Windows PGO builds, the r_object function overallocates its stack and
37 * can cause a stack overflow. We reduce the maximum depth for all Windows
38 * releases to protect against this.
39 * #if defined(MS_WINDOWS) && defined(_DEBUG)
40 */
41 #if defined(MS_WINDOWS)
42 # define MAX_MARSHAL_STACK_DEPTH 1000
43 #elif defined(__wasi__)
44 # define MAX_MARSHAL_STACK_DEPTH 1500
45 // TARGET_OS_IPHONE covers any non-macOS Apple platform.
46 // It won't be defined on older macOS SDKs
47 #elif defined(__APPLE__) && defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
48 # define MAX_MARSHAL_STACK_DEPTH 1500
49 #else
50 # define MAX_MARSHAL_STACK_DEPTH 2000
51 #endif
52
53 #define TYPE_NULL '0'
54 #define TYPE_NONE 'N'
55 #define TYPE_FALSE 'F'
56 #define TYPE_TRUE 'T'
57 #define TYPE_STOPITER 'S'
58 #define TYPE_ELLIPSIS '.'
59 #define TYPE_INT 'i'
60 /* TYPE_INT64 is not generated anymore.
61 Supported for backward compatibility only. */
62 #define TYPE_INT64 'I'
63 #define TYPE_FLOAT 'f'
64 #define TYPE_BINARY_FLOAT 'g'
65 #define TYPE_COMPLEX 'x'
66 #define TYPE_BINARY_COMPLEX 'y'
67 #define TYPE_LONG 'l'
68 #define TYPE_STRING 's'
69 #define TYPE_INTERNED 't'
70 #define TYPE_REF 'r'
71 #define TYPE_TUPLE '('
72 #define TYPE_LIST '['
73 #define TYPE_DICT '{'
74 #define TYPE_CODE 'c'
75 #define TYPE_UNICODE 'u'
76 #define TYPE_UNKNOWN '?'
77 #define TYPE_SET '<'
78 #define TYPE_FROZENSET '>'
79 #define FLAG_REF '\x80' /* with a type, add obj to index */
80
81 #define TYPE_ASCII 'a'
82 #define TYPE_ASCII_INTERNED 'A'
83 #define TYPE_SMALL_TUPLE ')'
84 #define TYPE_SHORT_ASCII 'z'
85 #define TYPE_SHORT_ASCII_INTERNED 'Z'
86
87 #define WFERR_OK 0
88 #define WFERR_UNMARSHALLABLE 1
89 #define WFERR_NESTEDTOODEEP 2
90 #define WFERR_NOMEMORY 3
91 #define WFERR_CODE_NOT_ALLOWED 4
92
93 typedef struct {
94 FILE *fp;
95 int error; /* see WFERR_* values */
96 int depth;
97 PyObject *str;
98 char *ptr;
99 const char *end;
100 char *buf;
101 _Py_hashtable_t *hashtable;
102 int version;
103 int allow_code;
104 } WFILE;
105
106 #define w_byte(c, p) do { \
107 if ((p)->ptr != (p)->end || w_reserve((p), 1)) \
108 *(p)->ptr++ = (c); \
109 } while(0)
110
111 static void
w_flush(WFILE * p)112 w_flush(WFILE *p)
113 {
114 assert(p->fp != NULL);
115 fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
116 p->ptr = p->buf;
117 }
118
119 static int
w_reserve(WFILE * p,Py_ssize_t needed)120 w_reserve(WFILE *p, Py_ssize_t needed)
121 {
122 Py_ssize_t pos, size, delta;
123 if (p->ptr == NULL)
124 return 0; /* An error already occurred */
125 if (p->fp != NULL) {
126 w_flush(p);
127 return needed <= p->end - p->ptr;
128 }
129 assert(p->str != NULL);
130 pos = p->ptr - p->buf;
131 size = PyBytes_GET_SIZE(p->str);
132 if (size > 16*1024*1024)
133 delta = (size >> 3); /* 12.5% overallocation */
134 else
135 delta = size + 1024;
136 delta = Py_MAX(delta, needed);
137 if (delta > PY_SSIZE_T_MAX - size) {
138 p->error = WFERR_NOMEMORY;
139 return 0;
140 }
141 size += delta;
142 if (_PyBytes_Resize(&p->str, size) != 0) {
143 p->end = p->ptr = p->buf = NULL;
144 return 0;
145 }
146 else {
147 p->buf = PyBytes_AS_STRING(p->str);
148 p->ptr = p->buf + pos;
149 p->end = p->buf + size;
150 return 1;
151 }
152 }
153
154 static void
w_string(const void * s,Py_ssize_t n,WFILE * p)155 w_string(const void *s, Py_ssize_t n, WFILE *p)
156 {
157 Py_ssize_t m;
158 if (!n || p->ptr == NULL)
159 return;
160 m = p->end - p->ptr;
161 if (p->fp != NULL) {
162 if (n <= m) {
163 memcpy(p->ptr, s, n);
164 p->ptr += n;
165 }
166 else {
167 w_flush(p);
168 fwrite(s, 1, n, p->fp);
169 }
170 }
171 else {
172 if (n <= m || w_reserve(p, n - m)) {
173 memcpy(p->ptr, s, n);
174 p->ptr += n;
175 }
176 }
177 }
178
179 static void
w_short(int x,WFILE * p)180 w_short(int x, WFILE *p)
181 {
182 w_byte((char)( x & 0xff), p);
183 w_byte((char)((x>> 8) & 0xff), p);
184 }
185
186 static void
w_long(long x,WFILE * p)187 w_long(long x, WFILE *p)
188 {
189 w_byte((char)( x & 0xff), p);
190 w_byte((char)((x>> 8) & 0xff), p);
191 w_byte((char)((x>>16) & 0xff), p);
192 w_byte((char)((x>>24) & 0xff), p);
193 }
194
195 #define SIZE32_MAX 0x7FFFFFFF
196
197 #if SIZEOF_SIZE_T > 4
198 # define W_SIZE(n, p) do { \
199 if ((n) > SIZE32_MAX) { \
200 (p)->depth--; \
201 (p)->error = WFERR_UNMARSHALLABLE; \
202 return; \
203 } \
204 w_long((long)(n), p); \
205 } while(0)
206 #else
207 # define W_SIZE w_long
208 #endif
209
210 static void
w_pstring(const void * s,Py_ssize_t n,WFILE * p)211 w_pstring(const void *s, Py_ssize_t n, WFILE *p)
212 {
213 W_SIZE(n, p);
214 w_string(s, n, p);
215 }
216
217 static void
w_short_pstring(const void * s,Py_ssize_t n,WFILE * p)218 w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
219 {
220 w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
221 w_string(s, n, p);
222 }
223
224 /* We assume that Python ints are stored internally in base some power of
225 2**15; for the sake of portability we'll always read and write them in base
226 exactly 2**15. */
227
228 #define PyLong_MARSHAL_SHIFT 15
229 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
230 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
231 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
232 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
233 #endif
234 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
235
236 #define W_TYPE(t, p) do { \
237 w_byte((t) | flag, (p)); \
238 } while(0)
239
240 static PyObject *
241 _PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code);
242
243 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)244 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
245 {
246 Py_ssize_t i, j, n, l;
247 digit d;
248
249 W_TYPE(TYPE_LONG, p);
250 if (_PyLong_IsZero(ob)) {
251 w_long((long)0, p);
252 return;
253 }
254
255 /* set l to number of base PyLong_MARSHAL_BASE digits */
256 n = _PyLong_DigitCount(ob);
257 l = (n-1) * PyLong_MARSHAL_RATIO;
258 d = ob->long_value.ob_digit[n-1];
259 assert(d != 0); /* a PyLong is always normalized */
260 do {
261 d >>= PyLong_MARSHAL_SHIFT;
262 l++;
263 } while (d != 0);
264 if (l > SIZE32_MAX) {
265 p->depth--;
266 p->error = WFERR_UNMARSHALLABLE;
267 return;
268 }
269 w_long((long)(_PyLong_IsNegative(ob) ? -l : l), p);
270
271 for (i=0; i < n-1; i++) {
272 d = ob->long_value.ob_digit[i];
273 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
274 w_short(d & PyLong_MARSHAL_MASK, p);
275 d >>= PyLong_MARSHAL_SHIFT;
276 }
277 assert (d == 0);
278 }
279 d = ob->long_value.ob_digit[n-1];
280 do {
281 w_short(d & PyLong_MARSHAL_MASK, p);
282 d >>= PyLong_MARSHAL_SHIFT;
283 } while (d != 0);
284 }
285
286 static void
w_float_bin(double v,WFILE * p)287 w_float_bin(double v, WFILE *p)
288 {
289 char buf[8];
290 if (PyFloat_Pack8(v, buf, 1) < 0) {
291 p->error = WFERR_UNMARSHALLABLE;
292 return;
293 }
294 w_string(buf, 8, p);
295 }
296
297 static void
w_float_str(double v,WFILE * p)298 w_float_str(double v, WFILE *p)
299 {
300 char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
301 if (!buf) {
302 p->error = WFERR_NOMEMORY;
303 return;
304 }
305 w_short_pstring(buf, strlen(buf), p);
306 PyMem_Free(buf);
307 }
308
309 static int
w_ref(PyObject * v,char * flag,WFILE * p)310 w_ref(PyObject *v, char *flag, WFILE *p)
311 {
312 _Py_hashtable_entry_t *entry;
313 int w;
314
315 if (p->version < 3 || p->hashtable == NULL)
316 return 0; /* not writing object references */
317
318 /* If it has only one reference, it definitely isn't shared.
319 * But we use TYPE_REF always for interned string, to PYC file stable
320 * as possible.
321 */
322 if (Py_REFCNT(v) == 1 &&
323 !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
324 return 0;
325 }
326
327 entry = _Py_hashtable_get_entry(p->hashtable, v);
328 if (entry != NULL) {
329 /* write the reference index to the stream */
330 w = (int)(uintptr_t)entry->value;
331 /* we don't store "long" indices in the dict */
332 assert(0 <= w && w <= 0x7fffffff);
333 w_byte(TYPE_REF, p);
334 w_long(w, p);
335 return 1;
336 } else {
337 size_t s = p->hashtable->nentries;
338 /* we don't support long indices */
339 if (s >= 0x7fffffff) {
340 PyErr_SetString(PyExc_ValueError, "too many objects");
341 goto err;
342 }
343 w = (int)s;
344 if (_Py_hashtable_set(p->hashtable, Py_NewRef(v),
345 (void *)(uintptr_t)w) < 0) {
346 Py_DECREF(v);
347 goto err;
348 }
349 *flag |= FLAG_REF;
350 return 0;
351 }
352 err:
353 p->error = WFERR_UNMARSHALLABLE;
354 return 1;
355 }
356
357 static void
358 w_complex_object(PyObject *v, char flag, WFILE *p);
359
360 static void
w_object(PyObject * v,WFILE * p)361 w_object(PyObject *v, WFILE *p)
362 {
363 char flag = '\0';
364
365 p->depth++;
366
367 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
368 p->error = WFERR_NESTEDTOODEEP;
369 }
370 else if (v == NULL) {
371 w_byte(TYPE_NULL, p);
372 }
373 else if (v == Py_None) {
374 w_byte(TYPE_NONE, p);
375 }
376 else if (v == PyExc_StopIteration) {
377 w_byte(TYPE_STOPITER, p);
378 }
379 else if (v == Py_Ellipsis) {
380 w_byte(TYPE_ELLIPSIS, p);
381 }
382 else if (v == Py_False) {
383 w_byte(TYPE_FALSE, p);
384 }
385 else if (v == Py_True) {
386 w_byte(TYPE_TRUE, p);
387 }
388 else if (!w_ref(v, &flag, p))
389 w_complex_object(v, flag, p);
390
391 p->depth--;
392 }
393
394 static void
w_complex_object(PyObject * v,char flag,WFILE * p)395 w_complex_object(PyObject *v, char flag, WFILE *p)
396 {
397 Py_ssize_t i, n;
398
399 if (PyLong_CheckExact(v)) {
400 int overflow;
401 long x = PyLong_AsLongAndOverflow(v, &overflow);
402 if (overflow) {
403 w_PyLong((PyLongObject *)v, flag, p);
404 }
405 else {
406 #if SIZEOF_LONG > 4
407 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
408 if (y && y != -1) {
409 /* Too large for TYPE_INT */
410 w_PyLong((PyLongObject*)v, flag, p);
411 }
412 else
413 #endif
414 {
415 W_TYPE(TYPE_INT, p);
416 w_long(x, p);
417 }
418 }
419 }
420 else if (PyFloat_CheckExact(v)) {
421 if (p->version > 1) {
422 W_TYPE(TYPE_BINARY_FLOAT, p);
423 w_float_bin(PyFloat_AS_DOUBLE(v), p);
424 }
425 else {
426 W_TYPE(TYPE_FLOAT, p);
427 w_float_str(PyFloat_AS_DOUBLE(v), p);
428 }
429 }
430 else if (PyComplex_CheckExact(v)) {
431 if (p->version > 1) {
432 W_TYPE(TYPE_BINARY_COMPLEX, p);
433 w_float_bin(PyComplex_RealAsDouble(v), p);
434 w_float_bin(PyComplex_ImagAsDouble(v), p);
435 }
436 else {
437 W_TYPE(TYPE_COMPLEX, p);
438 w_float_str(PyComplex_RealAsDouble(v), p);
439 w_float_str(PyComplex_ImagAsDouble(v), p);
440 }
441 }
442 else if (PyBytes_CheckExact(v)) {
443 W_TYPE(TYPE_STRING, p);
444 w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
445 }
446 else if (PyUnicode_CheckExact(v)) {
447 if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
448 int is_short = PyUnicode_GET_LENGTH(v) < 256;
449 if (is_short) {
450 if (PyUnicode_CHECK_INTERNED(v))
451 W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
452 else
453 W_TYPE(TYPE_SHORT_ASCII, p);
454 w_short_pstring(PyUnicode_1BYTE_DATA(v),
455 PyUnicode_GET_LENGTH(v), p);
456 }
457 else {
458 if (PyUnicode_CHECK_INTERNED(v))
459 W_TYPE(TYPE_ASCII_INTERNED, p);
460 else
461 W_TYPE(TYPE_ASCII, p);
462 w_pstring(PyUnicode_1BYTE_DATA(v),
463 PyUnicode_GET_LENGTH(v), p);
464 }
465 }
466 else {
467 PyObject *utf8;
468 utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
469 if (utf8 == NULL) {
470 p->depth--;
471 p->error = WFERR_UNMARSHALLABLE;
472 return;
473 }
474 if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v))
475 W_TYPE(TYPE_INTERNED, p);
476 else
477 W_TYPE(TYPE_UNICODE, p);
478 w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
479 Py_DECREF(utf8);
480 }
481 }
482 else if (PyTuple_CheckExact(v)) {
483 n = PyTuple_GET_SIZE(v);
484 if (p->version >= 4 && n < 256) {
485 W_TYPE(TYPE_SMALL_TUPLE, p);
486 w_byte((unsigned char)n, p);
487 }
488 else {
489 W_TYPE(TYPE_TUPLE, p);
490 W_SIZE(n, p);
491 }
492 for (i = 0; i < n; i++) {
493 w_object(PyTuple_GET_ITEM(v, i), p);
494 }
495 }
496 else if (PyList_CheckExact(v)) {
497 W_TYPE(TYPE_LIST, p);
498 n = PyList_GET_SIZE(v);
499 W_SIZE(n, p);
500 for (i = 0; i < n; i++) {
501 w_object(PyList_GET_ITEM(v, i), p);
502 }
503 }
504 else if (PyDict_CheckExact(v)) {
505 Py_ssize_t pos;
506 PyObject *key, *value;
507 W_TYPE(TYPE_DICT, p);
508 /* This one is NULL object terminated! */
509 pos = 0;
510 while (PyDict_Next(v, &pos, &key, &value)) {
511 w_object(key, p);
512 w_object(value, p);
513 }
514 w_object((PyObject *)NULL, p);
515 }
516 else if (PyAnySet_CheckExact(v)) {
517 PyObject *value;
518 Py_ssize_t pos = 0;
519 Py_hash_t hash;
520
521 if (PyFrozenSet_CheckExact(v))
522 W_TYPE(TYPE_FROZENSET, p);
523 else
524 W_TYPE(TYPE_SET, p);
525 n = PySet_GET_SIZE(v);
526 W_SIZE(n, p);
527 // bpo-37596: To support reproducible builds, sets and frozensets need
528 // to have their elements serialized in a consistent order (even when
529 // they have been scrambled by hash randomization). To ensure this, we
530 // use an order equivalent to sorted(v, key=marshal.dumps):
531 PyObject *pairs = PyList_New(n);
532 if (pairs == NULL) {
533 p->error = WFERR_NOMEMORY;
534 return;
535 }
536 Py_ssize_t i = 0;
537 Py_BEGIN_CRITICAL_SECTION(v);
538 while (_PySet_NextEntryRef(v, &pos, &value, &hash)) {
539 PyObject *dump = _PyMarshal_WriteObjectToString(value,
540 p->version, p->allow_code);
541 if (dump == NULL) {
542 p->error = WFERR_UNMARSHALLABLE;
543 Py_DECREF(value);
544 break;
545 }
546 PyObject *pair = PyTuple_Pack(2, dump, value);
547 Py_DECREF(dump);
548 Py_DECREF(value);
549 if (pair == NULL) {
550 p->error = WFERR_NOMEMORY;
551 break;
552 }
553 PyList_SET_ITEM(pairs, i++, pair);
554 }
555 Py_END_CRITICAL_SECTION();
556 if (p->error == WFERR_UNMARSHALLABLE || p->error == WFERR_NOMEMORY) {
557 Py_DECREF(pairs);
558 return;
559 }
560 assert(i == n);
561 if (PyList_Sort(pairs)) {
562 p->error = WFERR_NOMEMORY;
563 Py_DECREF(pairs);
564 return;
565 }
566 for (Py_ssize_t i = 0; i < n; i++) {
567 PyObject *pair = PyList_GET_ITEM(pairs, i);
568 value = PyTuple_GET_ITEM(pair, 1);
569 w_object(value, p);
570 }
571 Py_DECREF(pairs);
572 }
573 else if (PyCode_Check(v)) {
574 if (!p->allow_code) {
575 p->error = WFERR_CODE_NOT_ALLOWED;
576 return;
577 }
578 PyCodeObject *co = (PyCodeObject *)v;
579 PyObject *co_code = _PyCode_GetCode(co);
580 if (co_code == NULL) {
581 p->error = WFERR_NOMEMORY;
582 return;
583 }
584 W_TYPE(TYPE_CODE, p);
585 w_long(co->co_argcount, p);
586 w_long(co->co_posonlyargcount, p);
587 w_long(co->co_kwonlyargcount, p);
588 w_long(co->co_stacksize, p);
589 w_long(co->co_flags, p);
590 w_object(co_code, p);
591 w_object(co->co_consts, p);
592 w_object(co->co_names, p);
593 w_object(co->co_localsplusnames, p);
594 w_object(co->co_localspluskinds, p);
595 w_object(co->co_filename, p);
596 w_object(co->co_name, p);
597 w_object(co->co_qualname, p);
598 w_long(co->co_firstlineno, p);
599 w_object(co->co_linetable, p);
600 w_object(co->co_exceptiontable, p);
601 Py_DECREF(co_code);
602 }
603 else if (PyObject_CheckBuffer(v)) {
604 /* Write unknown bytes-like objects as a bytes object */
605 Py_buffer view;
606 if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
607 w_byte(TYPE_UNKNOWN, p);
608 p->depth--;
609 p->error = WFERR_UNMARSHALLABLE;
610 return;
611 }
612 W_TYPE(TYPE_STRING, p);
613 w_pstring(view.buf, view.len, p);
614 PyBuffer_Release(&view);
615 }
616 else {
617 W_TYPE(TYPE_UNKNOWN, p);
618 p->error = WFERR_UNMARSHALLABLE;
619 }
620 }
621
622 static void
w_decref_entry(void * key)623 w_decref_entry(void *key)
624 {
625 PyObject *entry_key = (PyObject *)key;
626 Py_XDECREF(entry_key);
627 }
628
629 static int
w_init_refs(WFILE * wf,int version)630 w_init_refs(WFILE *wf, int version)
631 {
632 if (version >= 3) {
633 wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
634 _Py_hashtable_compare_direct,
635 w_decref_entry, NULL, NULL);
636 if (wf->hashtable == NULL) {
637 PyErr_NoMemory();
638 return -1;
639 }
640 }
641 return 0;
642 }
643
644 static void
w_clear_refs(WFILE * wf)645 w_clear_refs(WFILE *wf)
646 {
647 if (wf->hashtable != NULL) {
648 _Py_hashtable_destroy(wf->hashtable);
649 }
650 }
651
652 /* version currently has no effect for writing ints. */
653 /* Note that while the documentation states that this function
654 * can error, currently it never does. Setting an exception in
655 * this function should be regarded as an API-breaking change.
656 */
657 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)658 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
659 {
660 char buf[4];
661 WFILE wf;
662 memset(&wf, 0, sizeof(wf));
663 wf.fp = fp;
664 wf.ptr = wf.buf = buf;
665 wf.end = wf.ptr + sizeof(buf);
666 wf.error = WFERR_OK;
667 wf.version = version;
668 w_long(x, &wf);
669 w_flush(&wf);
670 }
671
672 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)673 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
674 {
675 char buf[BUFSIZ];
676 WFILE wf;
677 if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
678 return; /* caller must check PyErr_Occurred() */
679 }
680 memset(&wf, 0, sizeof(wf));
681 wf.fp = fp;
682 wf.ptr = wf.buf = buf;
683 wf.end = wf.ptr + sizeof(buf);
684 wf.error = WFERR_OK;
685 wf.version = version;
686 wf.allow_code = 1;
687 if (w_init_refs(&wf, version)) {
688 return; /* caller must check PyErr_Occurred() */
689 }
690 w_object(x, &wf);
691 w_clear_refs(&wf);
692 w_flush(&wf);
693 }
694
695 typedef struct {
696 FILE *fp;
697 int depth;
698 PyObject *readable; /* Stream-like object being read from */
699 const char *ptr;
700 const char *end;
701 char *buf;
702 Py_ssize_t buf_size;
703 PyObject *refs; /* a list */
704 int allow_code;
705 } RFILE;
706
707 static const char *
r_string(Py_ssize_t n,RFILE * p)708 r_string(Py_ssize_t n, RFILE *p)
709 {
710 Py_ssize_t read = -1;
711
712 if (p->ptr != NULL) {
713 /* Fast path for loads() */
714 const char *res = p->ptr;
715 Py_ssize_t left = p->end - p->ptr;
716 if (left < n) {
717 PyErr_SetString(PyExc_EOFError,
718 "marshal data too short");
719 return NULL;
720 }
721 p->ptr += n;
722 return res;
723 }
724 if (p->buf == NULL) {
725 p->buf = PyMem_Malloc(n);
726 if (p->buf == NULL) {
727 PyErr_NoMemory();
728 return NULL;
729 }
730 p->buf_size = n;
731 }
732 else if (p->buf_size < n) {
733 char *tmp = PyMem_Realloc(p->buf, n);
734 if (tmp == NULL) {
735 PyErr_NoMemory();
736 return NULL;
737 }
738 p->buf = tmp;
739 p->buf_size = n;
740 }
741
742 if (!p->readable) {
743 assert(p->fp != NULL);
744 read = fread(p->buf, 1, n, p->fp);
745 }
746 else {
747 PyObject *res, *mview;
748 Py_buffer buf;
749
750 if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
751 return NULL;
752 mview = PyMemoryView_FromBuffer(&buf);
753 if (mview == NULL)
754 return NULL;
755
756 res = _PyObject_CallMethod(p->readable, &_Py_ID(readinto), "N", mview);
757 if (res != NULL) {
758 read = PyNumber_AsSsize_t(res, PyExc_ValueError);
759 Py_DECREF(res);
760 }
761 }
762 if (read != n) {
763 if (!PyErr_Occurred()) {
764 if (read > n)
765 PyErr_Format(PyExc_ValueError,
766 "read() returned too much data: "
767 "%zd bytes requested, %zd returned",
768 n, read);
769 else
770 PyErr_SetString(PyExc_EOFError,
771 "EOF read where not expected");
772 }
773 return NULL;
774 }
775 return p->buf;
776 }
777
778 static int
r_byte(RFILE * p)779 r_byte(RFILE *p)
780 {
781 if (p->ptr != NULL) {
782 if (p->ptr < p->end) {
783 return (unsigned char) *p->ptr++;
784 }
785 }
786 else if (!p->readable) {
787 assert(p->fp);
788 int c = getc(p->fp);
789 if (c != EOF) {
790 return c;
791 }
792 }
793 else {
794 const char *ptr = r_string(1, p);
795 if (ptr != NULL) {
796 return *(const unsigned char *) ptr;
797 }
798 return EOF;
799 }
800 PyErr_SetString(PyExc_EOFError,
801 "EOF read where not expected");
802 return EOF;
803 }
804
805 static int
r_short(RFILE * p)806 r_short(RFILE *p)
807 {
808 short x = -1;
809 const unsigned char *buffer;
810
811 buffer = (const unsigned char *) r_string(2, p);
812 if (buffer != NULL) {
813 x = buffer[0];
814 x |= buffer[1] << 8;
815 /* Sign-extension, in case short greater than 16 bits */
816 x |= -(x & 0x8000);
817 }
818 return x;
819 }
820
821 static long
r_long(RFILE * p)822 r_long(RFILE *p)
823 {
824 long x = -1;
825 const unsigned char *buffer;
826
827 buffer = (const unsigned char *) r_string(4, p);
828 if (buffer != NULL) {
829 x = buffer[0];
830 x |= (long)buffer[1] << 8;
831 x |= (long)buffer[2] << 16;
832 x |= (long)buffer[3] << 24;
833 #if SIZEOF_LONG > 4
834 /* Sign extension for 64-bit machines */
835 x |= -(x & 0x80000000L);
836 #endif
837 }
838 return x;
839 }
840
841 /* r_long64 deals with the TYPE_INT64 code. */
842 static PyObject *
r_long64(RFILE * p)843 r_long64(RFILE *p)
844 {
845 const unsigned char *buffer = (const unsigned char *) r_string(8, p);
846 if (buffer == NULL) {
847 return NULL;
848 }
849 return _PyLong_FromByteArray(buffer, 8,
850 1 /* little endian */,
851 1 /* signed */);
852 }
853
854 static PyObject *
r_PyLong(RFILE * p)855 r_PyLong(RFILE *p)
856 {
857 PyLongObject *ob;
858 long n, size, i;
859 int j, md, shorts_in_top_digit;
860 digit d;
861
862 n = r_long(p);
863 if (n == 0)
864 return (PyObject *)_PyLong_New(0);
865 if (n == -1 && PyErr_Occurred()) {
866 return NULL;
867 }
868 if (n < -SIZE32_MAX || n > SIZE32_MAX) {
869 PyErr_SetString(PyExc_ValueError,
870 "bad marshal data (long size out of range)");
871 return NULL;
872 }
873
874 size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
875 shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
876 ob = _PyLong_New(size);
877 if (ob == NULL)
878 return NULL;
879
880 _PyLong_SetSignAndDigitCount(ob, n < 0 ? -1 : 1, size);
881
882 for (i = 0; i < size-1; i++) {
883 d = 0;
884 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
885 md = r_short(p);
886 if (md < 0 || md > PyLong_MARSHAL_BASE)
887 goto bad_digit;
888 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
889 }
890 ob->long_value.ob_digit[i] = d;
891 }
892
893 d = 0;
894 for (j=0; j < shorts_in_top_digit; j++) {
895 md = r_short(p);
896 if (md < 0 || md > PyLong_MARSHAL_BASE)
897 goto bad_digit;
898 /* topmost marshal digit should be nonzero */
899 if (md == 0 && j == shorts_in_top_digit - 1) {
900 Py_DECREF(ob);
901 PyErr_SetString(PyExc_ValueError,
902 "bad marshal data (unnormalized long data)");
903 return NULL;
904 }
905 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
906 }
907 assert(!PyErr_Occurred());
908 /* top digit should be nonzero, else the resulting PyLong won't be
909 normalized */
910 ob->long_value.ob_digit[size-1] = d;
911 return (PyObject *)ob;
912 bad_digit:
913 Py_DECREF(ob);
914 if (!PyErr_Occurred()) {
915 PyErr_SetString(PyExc_ValueError,
916 "bad marshal data (digit out of range in long)");
917 }
918 return NULL;
919 }
920
921 static double
r_float_bin(RFILE * p)922 r_float_bin(RFILE *p)
923 {
924 const char *buf = r_string(8, p);
925 if (buf == NULL)
926 return -1;
927 return PyFloat_Unpack8(buf, 1);
928 }
929
930 /* Issue #33720: Disable inlining for reducing the C stack consumption
931 on PGO builds. */
932 Py_NO_INLINE static double
r_float_str(RFILE * p)933 r_float_str(RFILE *p)
934 {
935 int n;
936 char buf[256];
937 const char *ptr;
938 n = r_byte(p);
939 if (n == EOF) {
940 return -1;
941 }
942 ptr = r_string(n, p);
943 if (ptr == NULL) {
944 return -1;
945 }
946 memcpy(buf, ptr, n);
947 buf[n] = '\0';
948 return PyOS_string_to_double(buf, NULL, NULL);
949 }
950
951 /* allocate the reflist index for a new object. Return -1 on failure */
952 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)953 r_ref_reserve(int flag, RFILE *p)
954 {
955 if (flag) { /* currently only FLAG_REF is defined */
956 Py_ssize_t idx = PyList_GET_SIZE(p->refs);
957 if (idx >= 0x7ffffffe) {
958 PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
959 return -1;
960 }
961 if (PyList_Append(p->refs, Py_None) < 0)
962 return -1;
963 return idx;
964 } else
965 return 0;
966 }
967
968 /* insert the new object 'o' to the reflist at previously
969 * allocated index 'idx'.
970 * 'o' can be NULL, in which case nothing is done.
971 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
972 * if 'o' was non-NULL, and the function fails, 'o' is released and
973 * NULL returned. This simplifies error checking at the call site since
974 * a single test for NULL for the function result is enough.
975 */
976 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)977 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
978 {
979 if (o != NULL && flag) { /* currently only FLAG_REF is defined */
980 PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
981 PyList_SET_ITEM(p->refs, idx, Py_NewRef(o));
982 Py_DECREF(tmp);
983 }
984 return o;
985 }
986
987 /* combination of both above, used when an object can be
988 * created whenever it is seen in the file, as opposed to
989 * after having loaded its sub-objects.
990 */
991 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)992 r_ref(PyObject *o, int flag, RFILE *p)
993 {
994 assert(flag & FLAG_REF);
995 if (o == NULL)
996 return NULL;
997 if (PyList_Append(p->refs, o) < 0) {
998 Py_DECREF(o); /* release the new object */
999 return NULL;
1000 }
1001 return o;
1002 }
1003
1004 static PyObject *
r_object(RFILE * p)1005 r_object(RFILE *p)
1006 {
1007 /* NULL is a valid return value, it does not necessarily means that
1008 an exception is set. */
1009 PyObject *v, *v2;
1010 Py_ssize_t idx = 0;
1011 long i, n;
1012 int type, code = r_byte(p);
1013 int flag, is_interned = 0;
1014 PyObject *retval = NULL;
1015
1016 if (code == EOF) {
1017 if (PyErr_ExceptionMatches(PyExc_EOFError)) {
1018 PyErr_SetString(PyExc_EOFError,
1019 "EOF read where object expected");
1020 }
1021 return NULL;
1022 }
1023
1024 p->depth++;
1025
1026 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
1027 p->depth--;
1028 PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
1029 return NULL;
1030 }
1031
1032 flag = code & FLAG_REF;
1033 type = code & ~FLAG_REF;
1034
1035 #define R_REF(O) do{\
1036 if (flag) \
1037 O = r_ref(O, flag, p);\
1038 } while (0)
1039
1040 switch (type) {
1041
1042 case TYPE_NULL:
1043 break;
1044
1045 case TYPE_NONE:
1046 retval = Py_None;
1047 break;
1048
1049 case TYPE_STOPITER:
1050 retval = Py_NewRef(PyExc_StopIteration);
1051 break;
1052
1053 case TYPE_ELLIPSIS:
1054 retval = Py_Ellipsis;
1055 break;
1056
1057 case TYPE_FALSE:
1058 retval = Py_False;
1059 break;
1060
1061 case TYPE_TRUE:
1062 retval = Py_True;
1063 break;
1064
1065 case TYPE_INT:
1066 n = r_long(p);
1067 if (n == -1 && PyErr_Occurred()) {
1068 break;
1069 }
1070 retval = PyLong_FromLong(n);
1071 R_REF(retval);
1072 break;
1073
1074 case TYPE_INT64:
1075 retval = r_long64(p);
1076 R_REF(retval);
1077 break;
1078
1079 case TYPE_LONG:
1080 retval = r_PyLong(p);
1081 R_REF(retval);
1082 break;
1083
1084 case TYPE_FLOAT:
1085 {
1086 double x = r_float_str(p);
1087 if (x == -1.0 && PyErr_Occurred())
1088 break;
1089 retval = PyFloat_FromDouble(x);
1090 R_REF(retval);
1091 break;
1092 }
1093
1094 case TYPE_BINARY_FLOAT:
1095 {
1096 double x = r_float_bin(p);
1097 if (x == -1.0 && PyErr_Occurred())
1098 break;
1099 retval = PyFloat_FromDouble(x);
1100 R_REF(retval);
1101 break;
1102 }
1103
1104 case TYPE_COMPLEX:
1105 {
1106 Py_complex c;
1107 c.real = r_float_str(p);
1108 if (c.real == -1.0 && PyErr_Occurred())
1109 break;
1110 c.imag = r_float_str(p);
1111 if (c.imag == -1.0 && PyErr_Occurred())
1112 break;
1113 retval = PyComplex_FromCComplex(c);
1114 R_REF(retval);
1115 break;
1116 }
1117
1118 case TYPE_BINARY_COMPLEX:
1119 {
1120 Py_complex c;
1121 c.real = r_float_bin(p);
1122 if (c.real == -1.0 && PyErr_Occurred())
1123 break;
1124 c.imag = r_float_bin(p);
1125 if (c.imag == -1.0 && PyErr_Occurred())
1126 break;
1127 retval = PyComplex_FromCComplex(c);
1128 R_REF(retval);
1129 break;
1130 }
1131
1132 case TYPE_STRING:
1133 {
1134 const char *ptr;
1135 n = r_long(p);
1136 if (n < 0 || n > SIZE32_MAX) {
1137 if (!PyErr_Occurred()) {
1138 PyErr_SetString(PyExc_ValueError,
1139 "bad marshal data (bytes object size out of range)");
1140 }
1141 break;
1142 }
1143 v = PyBytes_FromStringAndSize((char *)NULL, n);
1144 if (v == NULL)
1145 break;
1146 ptr = r_string(n, p);
1147 if (ptr == NULL) {
1148 Py_DECREF(v);
1149 break;
1150 }
1151 memcpy(PyBytes_AS_STRING(v), ptr, n);
1152 retval = v;
1153 R_REF(retval);
1154 break;
1155 }
1156
1157 case TYPE_ASCII_INTERNED:
1158 is_interned = 1;
1159 /* fall through */
1160 case TYPE_ASCII:
1161 n = r_long(p);
1162 if (n < 0 || n > SIZE32_MAX) {
1163 if (!PyErr_Occurred()) {
1164 PyErr_SetString(PyExc_ValueError,
1165 "bad marshal data (string size out of range)");
1166 }
1167 break;
1168 }
1169 goto _read_ascii;
1170
1171 case TYPE_SHORT_ASCII_INTERNED:
1172 is_interned = 1;
1173 /* fall through */
1174 case TYPE_SHORT_ASCII:
1175 n = r_byte(p);
1176 if (n == EOF) {
1177 break;
1178 }
1179 _read_ascii:
1180 {
1181 const char *ptr;
1182 ptr = r_string(n, p);
1183 if (ptr == NULL)
1184 break;
1185 v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1186 if (v == NULL)
1187 break;
1188 if (is_interned) {
1189 // marshal is meant to serialize .pyc files with code
1190 // objects, and code-related strings are currently immortal.
1191 PyInterpreterState *interp = _PyInterpreterState_GET();
1192 _PyUnicode_InternImmortal(interp, &v);
1193 }
1194 retval = v;
1195 R_REF(retval);
1196 break;
1197 }
1198
1199 case TYPE_INTERNED:
1200 is_interned = 1;
1201 /* fall through */
1202 case TYPE_UNICODE:
1203 {
1204 const char *buffer;
1205
1206 n = r_long(p);
1207 if (n < 0 || n > SIZE32_MAX) {
1208 if (!PyErr_Occurred()) {
1209 PyErr_SetString(PyExc_ValueError,
1210 "bad marshal data (string size out of range)");
1211 }
1212 break;
1213 }
1214 if (n != 0) {
1215 buffer = r_string(n, p);
1216 if (buffer == NULL)
1217 break;
1218 v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1219 }
1220 else {
1221 v = PyUnicode_New(0, 0);
1222 }
1223 if (v == NULL)
1224 break;
1225 if (is_interned) {
1226 // marshal is meant to serialize .pyc files with code
1227 // objects, and code-related strings are currently immortal.
1228 PyInterpreterState *interp = _PyInterpreterState_GET();
1229 _PyUnicode_InternImmortal(interp, &v);
1230 }
1231 retval = v;
1232 R_REF(retval);
1233 break;
1234 }
1235
1236 case TYPE_SMALL_TUPLE:
1237 n = r_byte(p);
1238 if (n == EOF) {
1239 break;
1240 }
1241 goto _read_tuple;
1242 case TYPE_TUPLE:
1243 n = r_long(p);
1244 if (n < 0 || n > SIZE32_MAX) {
1245 if (!PyErr_Occurred()) {
1246 PyErr_SetString(PyExc_ValueError,
1247 "bad marshal data (tuple size out of range)");
1248 }
1249 break;
1250 }
1251 _read_tuple:
1252 v = PyTuple_New(n);
1253 R_REF(v);
1254 if (v == NULL)
1255 break;
1256
1257 for (i = 0; i < n; i++) {
1258 v2 = r_object(p);
1259 if ( v2 == NULL ) {
1260 if (!PyErr_Occurred())
1261 PyErr_SetString(PyExc_TypeError,
1262 "NULL object in marshal data for tuple");
1263 Py_SETREF(v, NULL);
1264 break;
1265 }
1266 PyTuple_SET_ITEM(v, i, v2);
1267 }
1268 retval = v;
1269 break;
1270
1271 case TYPE_LIST:
1272 n = r_long(p);
1273 if (n < 0 || n > SIZE32_MAX) {
1274 if (!PyErr_Occurred()) {
1275 PyErr_SetString(PyExc_ValueError,
1276 "bad marshal data (list size out of range)");
1277 }
1278 break;
1279 }
1280 v = PyList_New(n);
1281 R_REF(v);
1282 if (v == NULL)
1283 break;
1284 for (i = 0; i < n; i++) {
1285 v2 = r_object(p);
1286 if ( v2 == NULL ) {
1287 if (!PyErr_Occurred())
1288 PyErr_SetString(PyExc_TypeError,
1289 "NULL object in marshal data for list");
1290 Py_SETREF(v, NULL);
1291 break;
1292 }
1293 PyList_SET_ITEM(v, i, v2);
1294 }
1295 retval = v;
1296 break;
1297
1298 case TYPE_DICT:
1299 v = PyDict_New();
1300 R_REF(v);
1301 if (v == NULL)
1302 break;
1303 for (;;) {
1304 PyObject *key, *val;
1305 key = r_object(p);
1306 if (key == NULL)
1307 break;
1308 val = r_object(p);
1309 if (val == NULL) {
1310 Py_DECREF(key);
1311 break;
1312 }
1313 if (PyDict_SetItem(v, key, val) < 0) {
1314 Py_DECREF(key);
1315 Py_DECREF(val);
1316 break;
1317 }
1318 Py_DECREF(key);
1319 Py_DECREF(val);
1320 }
1321 if (PyErr_Occurred()) {
1322 Py_SETREF(v, NULL);
1323 }
1324 retval = v;
1325 break;
1326
1327 case TYPE_SET:
1328 case TYPE_FROZENSET:
1329 n = r_long(p);
1330 if (n < 0 || n > SIZE32_MAX) {
1331 if (!PyErr_Occurred()) {
1332 PyErr_SetString(PyExc_ValueError,
1333 "bad marshal data (set size out of range)");
1334 }
1335 break;
1336 }
1337
1338 if (n == 0 && type == TYPE_FROZENSET) {
1339 /* call frozenset() to get the empty frozenset singleton */
1340 v = _PyObject_CallNoArgs((PyObject*)&PyFrozenSet_Type);
1341 if (v == NULL)
1342 break;
1343 R_REF(v);
1344 retval = v;
1345 }
1346 else {
1347 v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1348 if (type == TYPE_SET) {
1349 R_REF(v);
1350 } else {
1351 /* must use delayed registration of frozensets because they must
1352 * be init with a refcount of 1
1353 */
1354 idx = r_ref_reserve(flag, p);
1355 if (idx < 0)
1356 Py_CLEAR(v); /* signal error */
1357 }
1358 if (v == NULL)
1359 break;
1360
1361 for (i = 0; i < n; i++) {
1362 v2 = r_object(p);
1363 if ( v2 == NULL ) {
1364 if (!PyErr_Occurred())
1365 PyErr_SetString(PyExc_TypeError,
1366 "NULL object in marshal data for set");
1367 Py_SETREF(v, NULL);
1368 break;
1369 }
1370 if (PySet_Add(v, v2) == -1) {
1371 Py_DECREF(v);
1372 Py_DECREF(v2);
1373 v = NULL;
1374 break;
1375 }
1376 Py_DECREF(v2);
1377 }
1378 if (type != TYPE_SET)
1379 v = r_ref_insert(v, idx, flag, p);
1380 retval = v;
1381 }
1382 break;
1383
1384 case TYPE_CODE:
1385 {
1386 int argcount;
1387 int posonlyargcount;
1388 int kwonlyargcount;
1389 int stacksize;
1390 int flags;
1391 PyObject *code = NULL;
1392 PyObject *consts = NULL;
1393 PyObject *names = NULL;
1394 PyObject *localsplusnames = NULL;
1395 PyObject *localspluskinds = NULL;
1396 PyObject *filename = NULL;
1397 PyObject *name = NULL;
1398 PyObject *qualname = NULL;
1399 int firstlineno;
1400 PyObject* linetable = NULL;
1401 PyObject *exceptiontable = NULL;
1402
1403 if (!p->allow_code) {
1404 PyErr_SetString(PyExc_ValueError,
1405 "unmarshalling code objects is disallowed");
1406 break;
1407 }
1408 idx = r_ref_reserve(flag, p);
1409 if (idx < 0)
1410 break;
1411
1412 v = NULL;
1413
1414 /* XXX ignore long->int overflows for now */
1415 argcount = (int)r_long(p);
1416 if (argcount == -1 && PyErr_Occurred())
1417 goto code_error;
1418 posonlyargcount = (int)r_long(p);
1419 if (posonlyargcount == -1 && PyErr_Occurred()) {
1420 goto code_error;
1421 }
1422 kwonlyargcount = (int)r_long(p);
1423 if (kwonlyargcount == -1 && PyErr_Occurred())
1424 goto code_error;
1425 stacksize = (int)r_long(p);
1426 if (stacksize == -1 && PyErr_Occurred())
1427 goto code_error;
1428 flags = (int)r_long(p);
1429 if (flags == -1 && PyErr_Occurred())
1430 goto code_error;
1431 code = r_object(p);
1432 if (code == NULL)
1433 goto code_error;
1434 consts = r_object(p);
1435 if (consts == NULL)
1436 goto code_error;
1437 names = r_object(p);
1438 if (names == NULL)
1439 goto code_error;
1440 localsplusnames = r_object(p);
1441 if (localsplusnames == NULL)
1442 goto code_error;
1443 localspluskinds = r_object(p);
1444 if (localspluskinds == NULL)
1445 goto code_error;
1446 filename = r_object(p);
1447 if (filename == NULL)
1448 goto code_error;
1449 name = r_object(p);
1450 if (name == NULL)
1451 goto code_error;
1452 qualname = r_object(p);
1453 if (qualname == NULL)
1454 goto code_error;
1455 firstlineno = (int)r_long(p);
1456 if (firstlineno == -1 && PyErr_Occurred())
1457 break;
1458 linetable = r_object(p);
1459 if (linetable == NULL)
1460 goto code_error;
1461 exceptiontable = r_object(p);
1462 if (exceptiontable == NULL)
1463 goto code_error;
1464
1465 struct _PyCodeConstructor con = {
1466 .filename = filename,
1467 .name = name,
1468 .qualname = qualname,
1469 .flags = flags,
1470
1471 .code = code,
1472 .firstlineno = firstlineno,
1473 .linetable = linetable,
1474
1475 .consts = consts,
1476 .names = names,
1477
1478 .localsplusnames = localsplusnames,
1479 .localspluskinds = localspluskinds,
1480
1481 .argcount = argcount,
1482 .posonlyargcount = posonlyargcount,
1483 .kwonlyargcount = kwonlyargcount,
1484
1485 .stacksize = stacksize,
1486
1487 .exceptiontable = exceptiontable,
1488 };
1489
1490 if (_PyCode_Validate(&con) < 0) {
1491 goto code_error;
1492 }
1493
1494 v = (PyObject *)_PyCode_New(&con);
1495 if (v == NULL) {
1496 goto code_error;
1497 }
1498
1499 v = r_ref_insert(v, idx, flag, p);
1500
1501 code_error:
1502 if (v == NULL && !PyErr_Occurred()) {
1503 PyErr_SetString(PyExc_TypeError,
1504 "NULL object in marshal data for code object");
1505 }
1506 Py_XDECREF(code);
1507 Py_XDECREF(consts);
1508 Py_XDECREF(names);
1509 Py_XDECREF(localsplusnames);
1510 Py_XDECREF(localspluskinds);
1511 Py_XDECREF(filename);
1512 Py_XDECREF(name);
1513 Py_XDECREF(qualname);
1514 Py_XDECREF(linetable);
1515 Py_XDECREF(exceptiontable);
1516 }
1517 retval = v;
1518 break;
1519
1520 case TYPE_REF:
1521 n = r_long(p);
1522 if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1523 if (!PyErr_Occurred()) {
1524 PyErr_SetString(PyExc_ValueError,
1525 "bad marshal data (invalid reference)");
1526 }
1527 break;
1528 }
1529 v = PyList_GET_ITEM(p->refs, n);
1530 if (v == Py_None) {
1531 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1532 break;
1533 }
1534 retval = Py_NewRef(v);
1535 break;
1536
1537 default:
1538 /* Bogus data got written, which isn't ideal.
1539 This will let you keep working and recover. */
1540 PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1541 break;
1542
1543 }
1544 p->depth--;
1545 return retval;
1546 }
1547
1548 static PyObject *
read_object(RFILE * p)1549 read_object(RFILE *p)
1550 {
1551 PyObject *v;
1552 if (PyErr_Occurred()) {
1553 fprintf(stderr, "XXX readobject called with exception set\n");
1554 return NULL;
1555 }
1556 if (p->ptr && p->end) {
1557 if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1558 return NULL;
1559 }
1560 } else if (p->fp || p->readable) {
1561 if (PySys_Audit("marshal.load", NULL) < 0) {
1562 return NULL;
1563 }
1564 }
1565 v = r_object(p);
1566 if (v == NULL && !PyErr_Occurred())
1567 PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1568 return v;
1569 }
1570
1571 int
PyMarshal_ReadShortFromFile(FILE * fp)1572 PyMarshal_ReadShortFromFile(FILE *fp)
1573 {
1574 RFILE rf;
1575 int res;
1576 assert(fp);
1577 rf.readable = NULL;
1578 rf.fp = fp;
1579 rf.end = rf.ptr = NULL;
1580 rf.buf = NULL;
1581 res = r_short(&rf);
1582 if (rf.buf != NULL)
1583 PyMem_Free(rf.buf);
1584 return res;
1585 }
1586
1587 long
PyMarshal_ReadLongFromFile(FILE * fp)1588 PyMarshal_ReadLongFromFile(FILE *fp)
1589 {
1590 RFILE rf;
1591 long res;
1592 rf.fp = fp;
1593 rf.readable = NULL;
1594 rf.ptr = rf.end = NULL;
1595 rf.buf = NULL;
1596 res = r_long(&rf);
1597 if (rf.buf != NULL)
1598 PyMem_Free(rf.buf);
1599 return res;
1600 }
1601
1602 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1603 static off_t
getfilesize(FILE * fp)1604 getfilesize(FILE *fp)
1605 {
1606 struct _Py_stat_struct st;
1607 if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1608 return -1;
1609 #if SIZEOF_OFF_T == 4
1610 else if (st.st_size >= INT_MAX)
1611 return (off_t)INT_MAX;
1612 #endif
1613 else
1614 return (off_t)st.st_size;
1615 }
1616
1617 /* If we can get the size of the file up-front, and it's reasonably small,
1618 * read it in one gulp and delegate to ...FromString() instead. Much quicker
1619 * than reading a byte at a time from file; speeds .pyc imports.
1620 * CAUTION: since this may read the entire remainder of the file, don't
1621 * call it unless you know you're done with the file.
1622 */
1623 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1624 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1625 {
1626 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1627 #define REASONABLE_FILE_LIMIT (1L << 18)
1628 off_t filesize;
1629 filesize = getfilesize(fp);
1630 if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1631 char* pBuf = (char *)PyMem_Malloc(filesize);
1632 if (pBuf != NULL) {
1633 size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1634 PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1635 PyMem_Free(pBuf);
1636 return v;
1637 }
1638
1639 }
1640 /* We don't have fstat, or we do but the file is larger than
1641 * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1642 */
1643 return PyMarshal_ReadObjectFromFile(fp);
1644
1645 #undef REASONABLE_FILE_LIMIT
1646 }
1647
1648 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1649 PyMarshal_ReadObjectFromFile(FILE *fp)
1650 {
1651 RFILE rf;
1652 PyObject *result;
1653 rf.allow_code = 1;
1654 rf.fp = fp;
1655 rf.readable = NULL;
1656 rf.depth = 0;
1657 rf.ptr = rf.end = NULL;
1658 rf.buf = NULL;
1659 rf.refs = PyList_New(0);
1660 if (rf.refs == NULL)
1661 return NULL;
1662 result = read_object(&rf);
1663 Py_DECREF(rf.refs);
1664 if (rf.buf != NULL)
1665 PyMem_Free(rf.buf);
1666 return result;
1667 }
1668
1669 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1670 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1671 {
1672 RFILE rf;
1673 PyObject *result;
1674 rf.allow_code = 1;
1675 rf.fp = NULL;
1676 rf.readable = NULL;
1677 rf.ptr = str;
1678 rf.end = str + len;
1679 rf.buf = NULL;
1680 rf.depth = 0;
1681 rf.refs = PyList_New(0);
1682 if (rf.refs == NULL)
1683 return NULL;
1684 result = read_object(&rf);
1685 Py_DECREF(rf.refs);
1686 if (rf.buf != NULL)
1687 PyMem_Free(rf.buf);
1688 return result;
1689 }
1690
1691 static PyObject *
_PyMarshal_WriteObjectToString(PyObject * x,int version,int allow_code)1692 _PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code)
1693 {
1694 WFILE wf;
1695
1696 if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1697 return NULL;
1698 }
1699 memset(&wf, 0, sizeof(wf));
1700 wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1701 if (wf.str == NULL)
1702 return NULL;
1703 wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1704 wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1705 wf.error = WFERR_OK;
1706 wf.version = version;
1707 wf.allow_code = allow_code;
1708 if (w_init_refs(&wf, version)) {
1709 Py_DECREF(wf.str);
1710 return NULL;
1711 }
1712 w_object(x, &wf);
1713 w_clear_refs(&wf);
1714 if (wf.str != NULL) {
1715 const char *base = PyBytes_AS_STRING(wf.str);
1716 if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1717 return NULL;
1718 }
1719 if (wf.error != WFERR_OK) {
1720 Py_XDECREF(wf.str);
1721 switch (wf.error) {
1722 case WFERR_NOMEMORY:
1723 PyErr_NoMemory();
1724 break;
1725 case WFERR_NESTEDTOODEEP:
1726 PyErr_SetString(PyExc_ValueError,
1727 "object too deeply nested to marshal");
1728 break;
1729 case WFERR_CODE_NOT_ALLOWED:
1730 PyErr_SetString(PyExc_ValueError,
1731 "marshalling code objects is disallowed");
1732 break;
1733 default:
1734 case WFERR_UNMARSHALLABLE:
1735 PyErr_SetString(PyExc_ValueError,
1736 "unmarshallable object");
1737 break;
1738 }
1739 return NULL;
1740 }
1741 return wf.str;
1742 }
1743
1744 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1745 PyMarshal_WriteObjectToString(PyObject *x, int version)
1746 {
1747 return _PyMarshal_WriteObjectToString(x, version, 1);
1748 }
1749
1750 /* And an interface for Python programs... */
1751 /*[clinic input]
1752 marshal.dump
1753
1754 value: object
1755 Must be a supported type.
1756 file: object
1757 Must be a writeable binary file.
1758 version: int(c_default="Py_MARSHAL_VERSION") = version
1759 Indicates the data format that dump should use.
1760 /
1761 *
1762 allow_code: bool = True
1763 Allow to write code objects.
1764
1765 Write the value on the open file.
1766
1767 If the value has (or contains an object that has) an unsupported type, a
1768 ValueError exception is raised - but garbage data will also be written
1769 to the file. The object will not be properly read back by load().
1770 [clinic start generated code]*/
1771
1772 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version,int allow_code)1773 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1774 int version, int allow_code)
1775 /*[clinic end generated code: output=429e5fd61c2196b9 input=041f7f6669b0aafb]*/
1776 {
1777 /* XXX Quick hack -- need to do this differently */
1778 PyObject *s;
1779 PyObject *res;
1780
1781 s = _PyMarshal_WriteObjectToString(value, version, allow_code);
1782 if (s == NULL)
1783 return NULL;
1784 res = PyObject_CallMethodOneArg(file, &_Py_ID(write), s);
1785 Py_DECREF(s);
1786 return res;
1787 }
1788
1789 /*[clinic input]
1790 marshal.load
1791
1792 file: object
1793 Must be readable binary file.
1794 /
1795 *
1796 allow_code: bool = True
1797 Allow to load code objects.
1798
1799 Read one value from the open file and return it.
1800
1801 If no valid value is read (e.g. because the data has a different Python
1802 version's incompatible marshal format), raise EOFError, ValueError or
1803 TypeError.
1804
1805 Note: If an object containing an unsupported type was marshalled with
1806 dump(), load() will substitute None for the unmarshallable type.
1807 [clinic start generated code]*/
1808
1809 static PyObject *
marshal_load_impl(PyObject * module,PyObject * file,int allow_code)1810 marshal_load_impl(PyObject *module, PyObject *file, int allow_code)
1811 /*[clinic end generated code: output=0c1aaf3546ae3ed3 input=2dca7b570653b82f]*/
1812 {
1813 PyObject *data, *result;
1814 RFILE rf;
1815
1816 /*
1817 * Make a call to the read method, but read zero bytes.
1818 * This is to ensure that the object passed in at least
1819 * has a read method which returns bytes.
1820 * This can be removed if we guarantee good error handling
1821 * for r_string()
1822 */
1823 data = _PyObject_CallMethod(file, &_Py_ID(read), "i", 0);
1824 if (data == NULL)
1825 return NULL;
1826 if (!PyBytes_Check(data)) {
1827 PyErr_Format(PyExc_TypeError,
1828 "file.read() returned not bytes but %.100s",
1829 Py_TYPE(data)->tp_name);
1830 result = NULL;
1831 }
1832 else {
1833 rf.allow_code = allow_code;
1834 rf.depth = 0;
1835 rf.fp = NULL;
1836 rf.readable = file;
1837 rf.ptr = rf.end = NULL;
1838 rf.buf = NULL;
1839 if ((rf.refs = PyList_New(0)) != NULL) {
1840 result = read_object(&rf);
1841 Py_DECREF(rf.refs);
1842 if (rf.buf != NULL)
1843 PyMem_Free(rf.buf);
1844 } else
1845 result = NULL;
1846 }
1847 Py_DECREF(data);
1848 return result;
1849 }
1850
1851 /*[clinic input]
1852 marshal.dumps
1853
1854 value: object
1855 Must be a supported type.
1856 version: int(c_default="Py_MARSHAL_VERSION") = version
1857 Indicates the data format that dumps should use.
1858 /
1859 *
1860 allow_code: bool = True
1861 Allow to write code objects.
1862
1863 Return the bytes object that would be written to a file by dump(value, file).
1864
1865 Raise a ValueError exception if value has (or contains an object that has) an
1866 unsupported type.
1867 [clinic start generated code]*/
1868
1869 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version,int allow_code)1870 marshal_dumps_impl(PyObject *module, PyObject *value, int version,
1871 int allow_code)
1872 /*[clinic end generated code: output=115f90da518d1d49 input=167eaecceb63f0a8]*/
1873 {
1874 return _PyMarshal_WriteObjectToString(value, version, allow_code);
1875 }
1876
1877 /*[clinic input]
1878 marshal.loads
1879
1880 bytes: Py_buffer
1881 /
1882 *
1883 allow_code: bool = True
1884 Allow to load code objects.
1885
1886 Convert the bytes-like object to a value.
1887
1888 If no valid value is found, raise EOFError, ValueError or TypeError. Extra
1889 bytes in the input are ignored.
1890 [clinic start generated code]*/
1891
1892 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes,int allow_code)1893 marshal_loads_impl(PyObject *module, Py_buffer *bytes, int allow_code)
1894 /*[clinic end generated code: output=62c0c538d3edc31f input=14de68965b45aaa7]*/
1895 {
1896 RFILE rf;
1897 char *s = bytes->buf;
1898 Py_ssize_t n = bytes->len;
1899 PyObject* result;
1900 rf.allow_code = allow_code;
1901 rf.fp = NULL;
1902 rf.readable = NULL;
1903 rf.ptr = s;
1904 rf.end = s + n;
1905 rf.depth = 0;
1906 if ((rf.refs = PyList_New(0)) == NULL)
1907 return NULL;
1908 result = read_object(&rf);
1909 Py_DECREF(rf.refs);
1910 return result;
1911 }
1912
1913 static PyMethodDef marshal_methods[] = {
1914 MARSHAL_DUMP_METHODDEF
1915 MARSHAL_LOAD_METHODDEF
1916 MARSHAL_DUMPS_METHODDEF
1917 MARSHAL_LOADS_METHODDEF
1918 {NULL, NULL} /* sentinel */
1919 };
1920
1921
1922 PyDoc_STRVAR(module_doc,
1923 "This module contains functions that can read and write Python values in\n\
1924 a binary format. The format is specific to Python, but independent of\n\
1925 machine architecture issues.\n\
1926 \n\
1927 Not all Python object types are supported; in general, only objects\n\
1928 whose value is independent from a particular invocation of Python can be\n\
1929 written and read by this module. The following types are supported:\n\
1930 None, integers, floating-point numbers, strings, bytes, bytearrays,\n\
1931 tuples, lists, sets, dictionaries, and code objects, where it\n\
1932 should be understood that tuples, lists and dictionaries are only\n\
1933 supported as long as the values contained therein are themselves\n\
1934 supported; and recursive lists and dictionaries should not be written\n\
1935 (they will cause infinite loops).\n\
1936 \n\
1937 Variables:\n\
1938 \n\
1939 version -- indicates the format that the module uses. Version 0 is the\n\
1940 historical format, version 1 shares interned strings and version 2\n\
1941 uses a binary format for floating-point numbers.\n\
1942 Version 3 shares common object references (New in version 3.4).\n\
1943 \n\
1944 Functions:\n\
1945 \n\
1946 dump() -- write value to a file\n\
1947 load() -- read value from a file\n\
1948 dumps() -- marshal value as a bytes object\n\
1949 loads() -- read value from a bytes-like object");
1950
1951
1952 static int
marshal_module_exec(PyObject * mod)1953 marshal_module_exec(PyObject *mod)
1954 {
1955 if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
1956 return -1;
1957 }
1958 return 0;
1959 }
1960
1961 static PyModuleDef_Slot marshalmodule_slots[] = {
1962 {Py_mod_exec, marshal_module_exec},
1963 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1964 {Py_mod_gil, Py_MOD_GIL_NOT_USED},
1965 {0, NULL}
1966 };
1967
1968 static struct PyModuleDef marshalmodule = {
1969 PyModuleDef_HEAD_INIT,
1970 .m_name = "marshal",
1971 .m_doc = module_doc,
1972 .m_methods = marshal_methods,
1973 .m_slots = marshalmodule_slots,
1974 };
1975
1976 PyMODINIT_FUNC
PyMarshal_Init(void)1977 PyMarshal_Init(void)
1978 {
1979 return PyModuleDef_Init(&marshalmodule);
1980 }
1981