1
2 /* Write Python objects to files and read them back.
3 This is primarily intended for writing and reading compiled Python code,
4 even though dicts, lists, sets and frozensets, not commonly seen in
5 code objects, are supported.
6 Version 3 of this protocol properly supports circular links
7 and sharing. */
8
9 #define PY_SSIZE_T_CLEAN
10
11 #include "Python.h"
12 #include "longintrepr.h"
13 #include "code.h"
14 #include "marshal.h"
15 #include "pycore_hashtable.h"
16
17 /*[clinic input]
18 module marshal
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
21
22 #include "clinic/marshal.c.h"
23
24 /* High water mark to determine when the marshalled object is dangerously deep
25 * and risks coring the interpreter. When the object stack gets this deep,
26 * raise an exception instead of continuing.
27 * On Windows debug builds, reduce this value.
28 *
29 * BUG: https://bugs.python.org/issue33720
30 * On Windows PGO builds, the r_object function overallocates its stack and
31 * can cause a stack overflow. We reduce the maximum depth for all Windows
32 * releases to protect against this.
33 * #if defined(MS_WINDOWS) && defined(_DEBUG)
34 */
35 #if defined(MS_WINDOWS)
36 #define MAX_MARSHAL_STACK_DEPTH 1000
37 #else
38 #define MAX_MARSHAL_STACK_DEPTH 2000
39 #endif
40
41 #define TYPE_NULL '0'
42 #define TYPE_NONE 'N'
43 #define TYPE_FALSE 'F'
44 #define TYPE_TRUE 'T'
45 #define TYPE_STOPITER 'S'
46 #define TYPE_ELLIPSIS '.'
47 #define TYPE_INT 'i'
48 /* TYPE_INT64 is not generated anymore.
49 Supported for backward compatibility only. */
50 #define TYPE_INT64 'I'
51 #define TYPE_FLOAT 'f'
52 #define TYPE_BINARY_FLOAT 'g'
53 #define TYPE_COMPLEX 'x'
54 #define TYPE_BINARY_COMPLEX 'y'
55 #define TYPE_LONG 'l'
56 #define TYPE_STRING 's'
57 #define TYPE_INTERNED 't'
58 #define TYPE_REF 'r'
59 #define TYPE_TUPLE '('
60 #define TYPE_LIST '['
61 #define TYPE_DICT '{'
62 #define TYPE_CODE 'c'
63 #define TYPE_UNICODE 'u'
64 #define TYPE_UNKNOWN '?'
65 #define TYPE_SET '<'
66 #define TYPE_FROZENSET '>'
67 #define FLAG_REF '\x80' /* with a type, add obj to index */
68
69 #define TYPE_ASCII 'a'
70 #define TYPE_ASCII_INTERNED 'A'
71 #define TYPE_SMALL_TUPLE ')'
72 #define TYPE_SHORT_ASCII 'z'
73 #define TYPE_SHORT_ASCII_INTERNED 'Z'
74
75 #define WFERR_OK 0
76 #define WFERR_UNMARSHALLABLE 1
77 #define WFERR_NESTEDTOODEEP 2
78 #define WFERR_NOMEMORY 3
79
80 typedef struct {
81 FILE *fp;
82 int error; /* see WFERR_* values */
83 int depth;
84 PyObject *str;
85 char *ptr;
86 const char *end;
87 char *buf;
88 _Py_hashtable_t *hashtable;
89 int version;
90 } WFILE;
91
92 #define w_byte(c, p) do { \
93 if ((p)->ptr != (p)->end || w_reserve((p), 1)) \
94 *(p)->ptr++ = (c); \
95 } while(0)
96
97 static void
w_flush(WFILE * p)98 w_flush(WFILE *p)
99 {
100 assert(p->fp != NULL);
101 fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
102 p->ptr = p->buf;
103 }
104
105 static int
w_reserve(WFILE * p,Py_ssize_t needed)106 w_reserve(WFILE *p, Py_ssize_t needed)
107 {
108 Py_ssize_t pos, size, delta;
109 if (p->ptr == NULL)
110 return 0; /* An error already occurred */
111 if (p->fp != NULL) {
112 w_flush(p);
113 return needed <= p->end - p->ptr;
114 }
115 assert(p->str != NULL);
116 pos = p->ptr - p->buf;
117 size = PyBytes_GET_SIZE(p->str);
118 if (size > 16*1024*1024)
119 delta = (size >> 3); /* 12.5% overallocation */
120 else
121 delta = size + 1024;
122 delta = Py_MAX(delta, needed);
123 if (delta > PY_SSIZE_T_MAX - size) {
124 p->error = WFERR_NOMEMORY;
125 return 0;
126 }
127 size += delta;
128 if (_PyBytes_Resize(&p->str, size) != 0) {
129 p->end = p->ptr = p->buf = NULL;
130 return 0;
131 }
132 else {
133 p->buf = PyBytes_AS_STRING(p->str);
134 p->ptr = p->buf + pos;
135 p->end = p->buf + size;
136 return 1;
137 }
138 }
139
140 static void
w_string(const void * s,Py_ssize_t n,WFILE * p)141 w_string(const void *s, Py_ssize_t n, WFILE *p)
142 {
143 Py_ssize_t m;
144 if (!n || p->ptr == NULL)
145 return;
146 m = p->end - p->ptr;
147 if (p->fp != NULL) {
148 if (n <= m) {
149 memcpy(p->ptr, s, n);
150 p->ptr += n;
151 }
152 else {
153 w_flush(p);
154 fwrite(s, 1, n, p->fp);
155 }
156 }
157 else {
158 if (n <= m || w_reserve(p, n - m)) {
159 memcpy(p->ptr, s, n);
160 p->ptr += n;
161 }
162 }
163 }
164
165 static void
w_short(int x,WFILE * p)166 w_short(int x, WFILE *p)
167 {
168 w_byte((char)( x & 0xff), p);
169 w_byte((char)((x>> 8) & 0xff), p);
170 }
171
172 static void
w_long(long x,WFILE * p)173 w_long(long x, WFILE *p)
174 {
175 w_byte((char)( x & 0xff), p);
176 w_byte((char)((x>> 8) & 0xff), p);
177 w_byte((char)((x>>16) & 0xff), p);
178 w_byte((char)((x>>24) & 0xff), p);
179 }
180
181 #define SIZE32_MAX 0x7FFFFFFF
182
183 #if SIZEOF_SIZE_T > 4
184 # define W_SIZE(n, p) do { \
185 if ((n) > SIZE32_MAX) { \
186 (p)->depth--; \
187 (p)->error = WFERR_UNMARSHALLABLE; \
188 return; \
189 } \
190 w_long((long)(n), p); \
191 } while(0)
192 #else
193 # define W_SIZE w_long
194 #endif
195
196 static void
w_pstring(const void * s,Py_ssize_t n,WFILE * p)197 w_pstring(const void *s, Py_ssize_t n, WFILE *p)
198 {
199 W_SIZE(n, p);
200 w_string(s, n, p);
201 }
202
203 static void
w_short_pstring(const void * s,Py_ssize_t n,WFILE * p)204 w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
205 {
206 w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
207 w_string(s, n, p);
208 }
209
210 /* We assume that Python ints are stored internally in base some power of
211 2**15; for the sake of portability we'll always read and write them in base
212 exactly 2**15. */
213
214 #define PyLong_MARSHAL_SHIFT 15
215 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
216 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
217 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
218 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
219 #endif
220 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
221
222 #define W_TYPE(t, p) do { \
223 w_byte((t) | flag, (p)); \
224 } while(0)
225
226 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)227 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
228 {
229 Py_ssize_t i, j, n, l;
230 digit d;
231
232 W_TYPE(TYPE_LONG, p);
233 if (Py_SIZE(ob) == 0) {
234 w_long((long)0, p);
235 return;
236 }
237
238 /* set l to number of base PyLong_MARSHAL_BASE digits */
239 n = Py_ABS(Py_SIZE(ob));
240 l = (n-1) * PyLong_MARSHAL_RATIO;
241 d = ob->ob_digit[n-1];
242 assert(d != 0); /* a PyLong is always normalized */
243 do {
244 d >>= PyLong_MARSHAL_SHIFT;
245 l++;
246 } while (d != 0);
247 if (l > SIZE32_MAX) {
248 p->depth--;
249 p->error = WFERR_UNMARSHALLABLE;
250 return;
251 }
252 w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
253
254 for (i=0; i < n-1; i++) {
255 d = ob->ob_digit[i];
256 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
257 w_short(d & PyLong_MARSHAL_MASK, p);
258 d >>= PyLong_MARSHAL_SHIFT;
259 }
260 assert (d == 0);
261 }
262 d = ob->ob_digit[n-1];
263 do {
264 w_short(d & PyLong_MARSHAL_MASK, p);
265 d >>= PyLong_MARSHAL_SHIFT;
266 } while (d != 0);
267 }
268
269 static void
w_float_bin(double v,WFILE * p)270 w_float_bin(double v, WFILE *p)
271 {
272 unsigned char buf[8];
273 if (_PyFloat_Pack8(v, buf, 1) < 0) {
274 p->error = WFERR_UNMARSHALLABLE;
275 return;
276 }
277 w_string(buf, 8, p);
278 }
279
280 static void
w_float_str(double v,WFILE * p)281 w_float_str(double v, WFILE *p)
282 {
283 char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
284 if (!buf) {
285 p->error = WFERR_NOMEMORY;
286 return;
287 }
288 w_short_pstring(buf, strlen(buf), p);
289 PyMem_Free(buf);
290 }
291
292 static int
w_ref(PyObject * v,char * flag,WFILE * p)293 w_ref(PyObject *v, char *flag, WFILE *p)
294 {
295 _Py_hashtable_entry_t *entry;
296 int w;
297
298 if (p->version < 3 || p->hashtable == NULL)
299 return 0; /* not writing object references */
300
301 /* if it has only one reference, it definitely isn't shared */
302 if (Py_REFCNT(v) == 1)
303 return 0;
304
305 entry = _Py_hashtable_get_entry(p->hashtable, v);
306 if (entry != NULL) {
307 /* write the reference index to the stream */
308 w = (int)(uintptr_t)entry->value;
309 /* we don't store "long" indices in the dict */
310 assert(0 <= w && w <= 0x7fffffff);
311 w_byte(TYPE_REF, p);
312 w_long(w, p);
313 return 1;
314 } else {
315 size_t s = p->hashtable->nentries;
316 /* we don't support long indices */
317 if (s >= 0x7fffffff) {
318 PyErr_SetString(PyExc_ValueError, "too many objects");
319 goto err;
320 }
321 w = (int)s;
322 Py_INCREF(v);
323 if (_Py_hashtable_set(p->hashtable, v, (void *)(uintptr_t)w) < 0) {
324 Py_DECREF(v);
325 goto err;
326 }
327 *flag |= FLAG_REF;
328 return 0;
329 }
330 err:
331 p->error = WFERR_UNMARSHALLABLE;
332 return 1;
333 }
334
335 static void
336 w_complex_object(PyObject *v, char flag, WFILE *p);
337
338 static void
w_object(PyObject * v,WFILE * p)339 w_object(PyObject *v, WFILE *p)
340 {
341 char flag = '\0';
342
343 p->depth++;
344
345 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
346 p->error = WFERR_NESTEDTOODEEP;
347 }
348 else if (v == NULL) {
349 w_byte(TYPE_NULL, p);
350 }
351 else if (v == Py_None) {
352 w_byte(TYPE_NONE, p);
353 }
354 else if (v == PyExc_StopIteration) {
355 w_byte(TYPE_STOPITER, p);
356 }
357 else if (v == Py_Ellipsis) {
358 w_byte(TYPE_ELLIPSIS, p);
359 }
360 else if (v == Py_False) {
361 w_byte(TYPE_FALSE, p);
362 }
363 else if (v == Py_True) {
364 w_byte(TYPE_TRUE, p);
365 }
366 else if (!w_ref(v, &flag, p))
367 w_complex_object(v, flag, p);
368
369 p->depth--;
370 }
371
372 static void
w_complex_object(PyObject * v,char flag,WFILE * p)373 w_complex_object(PyObject *v, char flag, WFILE *p)
374 {
375 Py_ssize_t i, n;
376
377 if (PyLong_CheckExact(v)) {
378 int overflow;
379 long x = PyLong_AsLongAndOverflow(v, &overflow);
380 if (overflow) {
381 w_PyLong((PyLongObject *)v, flag, p);
382 }
383 else {
384 #if SIZEOF_LONG > 4
385 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
386 if (y && y != -1) {
387 /* Too large for TYPE_INT */
388 w_PyLong((PyLongObject*)v, flag, p);
389 }
390 else
391 #endif
392 {
393 W_TYPE(TYPE_INT, p);
394 w_long(x, p);
395 }
396 }
397 }
398 else if (PyFloat_CheckExact(v)) {
399 if (p->version > 1) {
400 W_TYPE(TYPE_BINARY_FLOAT, p);
401 w_float_bin(PyFloat_AS_DOUBLE(v), p);
402 }
403 else {
404 W_TYPE(TYPE_FLOAT, p);
405 w_float_str(PyFloat_AS_DOUBLE(v), p);
406 }
407 }
408 else if (PyComplex_CheckExact(v)) {
409 if (p->version > 1) {
410 W_TYPE(TYPE_BINARY_COMPLEX, p);
411 w_float_bin(PyComplex_RealAsDouble(v), p);
412 w_float_bin(PyComplex_ImagAsDouble(v), p);
413 }
414 else {
415 W_TYPE(TYPE_COMPLEX, p);
416 w_float_str(PyComplex_RealAsDouble(v), p);
417 w_float_str(PyComplex_ImagAsDouble(v), p);
418 }
419 }
420 else if (PyBytes_CheckExact(v)) {
421 W_TYPE(TYPE_STRING, p);
422 w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
423 }
424 else if (PyUnicode_CheckExact(v)) {
425 if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
426 int is_short = PyUnicode_GET_LENGTH(v) < 256;
427 if (is_short) {
428 if (PyUnicode_CHECK_INTERNED(v))
429 W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
430 else
431 W_TYPE(TYPE_SHORT_ASCII, p);
432 w_short_pstring(PyUnicode_1BYTE_DATA(v),
433 PyUnicode_GET_LENGTH(v), p);
434 }
435 else {
436 if (PyUnicode_CHECK_INTERNED(v))
437 W_TYPE(TYPE_ASCII_INTERNED, p);
438 else
439 W_TYPE(TYPE_ASCII, p);
440 w_pstring(PyUnicode_1BYTE_DATA(v),
441 PyUnicode_GET_LENGTH(v), p);
442 }
443 }
444 else {
445 PyObject *utf8;
446 utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
447 if (utf8 == NULL) {
448 p->depth--;
449 p->error = WFERR_UNMARSHALLABLE;
450 return;
451 }
452 if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v))
453 W_TYPE(TYPE_INTERNED, p);
454 else
455 W_TYPE(TYPE_UNICODE, p);
456 w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
457 Py_DECREF(utf8);
458 }
459 }
460 else if (PyTuple_CheckExact(v)) {
461 n = PyTuple_GET_SIZE(v);
462 if (p->version >= 4 && n < 256) {
463 W_TYPE(TYPE_SMALL_TUPLE, p);
464 w_byte((unsigned char)n, p);
465 }
466 else {
467 W_TYPE(TYPE_TUPLE, p);
468 W_SIZE(n, p);
469 }
470 for (i = 0; i < n; i++) {
471 w_object(PyTuple_GET_ITEM(v, i), p);
472 }
473 }
474 else if (PyList_CheckExact(v)) {
475 W_TYPE(TYPE_LIST, p);
476 n = PyList_GET_SIZE(v);
477 W_SIZE(n, p);
478 for (i = 0; i < n; i++) {
479 w_object(PyList_GET_ITEM(v, i), p);
480 }
481 }
482 else if (PyDict_CheckExact(v)) {
483 Py_ssize_t pos;
484 PyObject *key, *value;
485 W_TYPE(TYPE_DICT, p);
486 /* This one is NULL object terminated! */
487 pos = 0;
488 while (PyDict_Next(v, &pos, &key, &value)) {
489 w_object(key, p);
490 w_object(value, p);
491 }
492 w_object((PyObject *)NULL, p);
493 }
494 else if (PyAnySet_CheckExact(v)) {
495 PyObject *value;
496 Py_ssize_t pos = 0;
497 Py_hash_t hash;
498
499 if (PyFrozenSet_CheckExact(v))
500 W_TYPE(TYPE_FROZENSET, p);
501 else
502 W_TYPE(TYPE_SET, p);
503 n = PySet_GET_SIZE(v);
504 W_SIZE(n, p);
505 while (_PySet_NextEntry(v, &pos, &value, &hash)) {
506 w_object(value, p);
507 }
508 }
509 else if (PyCode_Check(v)) {
510 PyCodeObject *co = (PyCodeObject *)v;
511 W_TYPE(TYPE_CODE, p);
512 w_long(co->co_argcount, p);
513 w_long(co->co_posonlyargcount, p);
514 w_long(co->co_kwonlyargcount, p);
515 w_long(co->co_nlocals, p);
516 w_long(co->co_stacksize, p);
517 w_long(co->co_flags, p);
518 w_object(co->co_code, p);
519 w_object(co->co_consts, p);
520 w_object(co->co_names, p);
521 w_object(co->co_varnames, p);
522 w_object(co->co_freevars, p);
523 w_object(co->co_cellvars, p);
524 w_object(co->co_filename, p);
525 w_object(co->co_name, p);
526 w_long(co->co_firstlineno, p);
527 w_object(co->co_linetable, p);
528 }
529 else if (PyObject_CheckBuffer(v)) {
530 /* Write unknown bytes-like objects as a bytes object */
531 Py_buffer view;
532 if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
533 w_byte(TYPE_UNKNOWN, p);
534 p->depth--;
535 p->error = WFERR_UNMARSHALLABLE;
536 return;
537 }
538 W_TYPE(TYPE_STRING, p);
539 w_pstring(view.buf, view.len, p);
540 PyBuffer_Release(&view);
541 }
542 else {
543 W_TYPE(TYPE_UNKNOWN, p);
544 p->error = WFERR_UNMARSHALLABLE;
545 }
546 }
547
548 static void
w_decref_entry(void * key)549 w_decref_entry(void *key)
550 {
551 PyObject *entry_key = (PyObject *)key;
552 Py_XDECREF(entry_key);
553 }
554
555 static int
w_init_refs(WFILE * wf,int version)556 w_init_refs(WFILE *wf, int version)
557 {
558 if (version >= 3) {
559 wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
560 _Py_hashtable_compare_direct,
561 w_decref_entry, NULL, NULL);
562 if (wf->hashtable == NULL) {
563 PyErr_NoMemory();
564 return -1;
565 }
566 }
567 return 0;
568 }
569
570 static void
w_clear_refs(WFILE * wf)571 w_clear_refs(WFILE *wf)
572 {
573 if (wf->hashtable != NULL) {
574 _Py_hashtable_destroy(wf->hashtable);
575 }
576 }
577
578 /* version currently has no effect for writing ints. */
579 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)580 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
581 {
582 char buf[4];
583 WFILE wf;
584 memset(&wf, 0, sizeof(wf));
585 wf.fp = fp;
586 wf.ptr = wf.buf = buf;
587 wf.end = wf.ptr + sizeof(buf);
588 wf.error = WFERR_OK;
589 wf.version = version;
590 w_long(x, &wf);
591 w_flush(&wf);
592 }
593
594 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)595 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
596 {
597 char buf[BUFSIZ];
598 WFILE wf;
599 if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
600 return; /* caller must check PyErr_Occurred() */
601 }
602 memset(&wf, 0, sizeof(wf));
603 wf.fp = fp;
604 wf.ptr = wf.buf = buf;
605 wf.end = wf.ptr + sizeof(buf);
606 wf.error = WFERR_OK;
607 wf.version = version;
608 if (w_init_refs(&wf, version)) {
609 return; /* caller must check PyErr_Occurred() */
610 }
611 w_object(x, &wf);
612 w_clear_refs(&wf);
613 w_flush(&wf);
614 }
615
616 typedef struct {
617 FILE *fp;
618 int depth;
619 PyObject *readable; /* Stream-like object being read from */
620 const char *ptr;
621 const char *end;
622 char *buf;
623 Py_ssize_t buf_size;
624 PyObject *refs; /* a list */
625 } RFILE;
626
627 static const char *
r_string(Py_ssize_t n,RFILE * p)628 r_string(Py_ssize_t n, RFILE *p)
629 {
630 Py_ssize_t read = -1;
631
632 if (p->ptr != NULL) {
633 /* Fast path for loads() */
634 const char *res = p->ptr;
635 Py_ssize_t left = p->end - p->ptr;
636 if (left < n) {
637 PyErr_SetString(PyExc_EOFError,
638 "marshal data too short");
639 return NULL;
640 }
641 p->ptr += n;
642 return res;
643 }
644 if (p->buf == NULL) {
645 p->buf = PyMem_Malloc(n);
646 if (p->buf == NULL) {
647 PyErr_NoMemory();
648 return NULL;
649 }
650 p->buf_size = n;
651 }
652 else if (p->buf_size < n) {
653 char *tmp = PyMem_Realloc(p->buf, n);
654 if (tmp == NULL) {
655 PyErr_NoMemory();
656 return NULL;
657 }
658 p->buf = tmp;
659 p->buf_size = n;
660 }
661
662 if (!p->readable) {
663 assert(p->fp != NULL);
664 read = fread(p->buf, 1, n, p->fp);
665 }
666 else {
667 _Py_IDENTIFIER(readinto);
668 PyObject *res, *mview;
669 Py_buffer buf;
670
671 if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
672 return NULL;
673 mview = PyMemoryView_FromBuffer(&buf);
674 if (mview == NULL)
675 return NULL;
676
677 res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview);
678 if (res != NULL) {
679 read = PyNumber_AsSsize_t(res, PyExc_ValueError);
680 Py_DECREF(res);
681 }
682 }
683 if (read != n) {
684 if (!PyErr_Occurred()) {
685 if (read > n)
686 PyErr_Format(PyExc_ValueError,
687 "read() returned too much data: "
688 "%zd bytes requested, %zd returned",
689 n, read);
690 else
691 PyErr_SetString(PyExc_EOFError,
692 "EOF read where not expected");
693 }
694 return NULL;
695 }
696 return p->buf;
697 }
698
699 static int
r_byte(RFILE * p)700 r_byte(RFILE *p)
701 {
702 int c = EOF;
703
704 if (p->ptr != NULL) {
705 if (p->ptr < p->end)
706 c = (unsigned char) *p->ptr++;
707 return c;
708 }
709 if (!p->readable) {
710 assert(p->fp);
711 c = getc(p->fp);
712 }
713 else {
714 const char *ptr = r_string(1, p);
715 if (ptr != NULL)
716 c = *(const unsigned char *) ptr;
717 }
718 return c;
719 }
720
721 static int
r_short(RFILE * p)722 r_short(RFILE *p)
723 {
724 short x = -1;
725 const unsigned char *buffer;
726
727 buffer = (const unsigned char *) r_string(2, p);
728 if (buffer != NULL) {
729 x = buffer[0];
730 x |= buffer[1] << 8;
731 /* Sign-extension, in case short greater than 16 bits */
732 x |= -(x & 0x8000);
733 }
734 return x;
735 }
736
737 static long
r_long(RFILE * p)738 r_long(RFILE *p)
739 {
740 long x = -1;
741 const unsigned char *buffer;
742
743 buffer = (const unsigned char *) r_string(4, p);
744 if (buffer != NULL) {
745 x = buffer[0];
746 x |= (long)buffer[1] << 8;
747 x |= (long)buffer[2] << 16;
748 x |= (long)buffer[3] << 24;
749 #if SIZEOF_LONG > 4
750 /* Sign extension for 64-bit machines */
751 x |= -(x & 0x80000000L);
752 #endif
753 }
754 return x;
755 }
756
757 /* r_long64 deals with the TYPE_INT64 code. */
758 static PyObject *
r_long64(RFILE * p)759 r_long64(RFILE *p)
760 {
761 const unsigned char *buffer = (const unsigned char *) r_string(8, p);
762 if (buffer == NULL) {
763 return NULL;
764 }
765 return _PyLong_FromByteArray(buffer, 8,
766 1 /* little endian */,
767 1 /* signed */);
768 }
769
770 static PyObject *
r_PyLong(RFILE * p)771 r_PyLong(RFILE *p)
772 {
773 PyLongObject *ob;
774 long n, size, i;
775 int j, md, shorts_in_top_digit;
776 digit d;
777
778 n = r_long(p);
779 if (PyErr_Occurred())
780 return NULL;
781 if (n == 0)
782 return (PyObject *)_PyLong_New(0);
783 if (n < -SIZE32_MAX || n > SIZE32_MAX) {
784 PyErr_SetString(PyExc_ValueError,
785 "bad marshal data (long size out of range)");
786 return NULL;
787 }
788
789 size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
790 shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
791 ob = _PyLong_New(size);
792 if (ob == NULL)
793 return NULL;
794
795 Py_SET_SIZE(ob, n > 0 ? size : -size);
796
797 for (i = 0; i < size-1; i++) {
798 d = 0;
799 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
800 md = r_short(p);
801 if (PyErr_Occurred()) {
802 Py_DECREF(ob);
803 return NULL;
804 }
805 if (md < 0 || md > PyLong_MARSHAL_BASE)
806 goto bad_digit;
807 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
808 }
809 ob->ob_digit[i] = d;
810 }
811
812 d = 0;
813 for (j=0; j < shorts_in_top_digit; j++) {
814 md = r_short(p);
815 if (PyErr_Occurred()) {
816 Py_DECREF(ob);
817 return NULL;
818 }
819 if (md < 0 || md > PyLong_MARSHAL_BASE)
820 goto bad_digit;
821 /* topmost marshal digit should be nonzero */
822 if (md == 0 && j == shorts_in_top_digit - 1) {
823 Py_DECREF(ob);
824 PyErr_SetString(PyExc_ValueError,
825 "bad marshal data (unnormalized long data)");
826 return NULL;
827 }
828 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
829 }
830 if (PyErr_Occurred()) {
831 Py_DECREF(ob);
832 return NULL;
833 }
834 /* top digit should be nonzero, else the resulting PyLong won't be
835 normalized */
836 ob->ob_digit[size-1] = d;
837 return (PyObject *)ob;
838 bad_digit:
839 Py_DECREF(ob);
840 PyErr_SetString(PyExc_ValueError,
841 "bad marshal data (digit out of range in long)");
842 return NULL;
843 }
844
845 static double
r_float_bin(RFILE * p)846 r_float_bin(RFILE *p)
847 {
848 const unsigned char *buf = (const unsigned char *) r_string(8, p);
849 if (buf == NULL)
850 return -1;
851 return _PyFloat_Unpack8(buf, 1);
852 }
853
854 /* Issue #33720: Disable inlining for reducing the C stack consumption
855 on PGO builds. */
856 _Py_NO_INLINE static double
r_float_str(RFILE * p)857 r_float_str(RFILE *p)
858 {
859 int n;
860 char buf[256];
861 const char *ptr;
862 n = r_byte(p);
863 if (n == EOF) {
864 PyErr_SetString(PyExc_EOFError,
865 "EOF read where object expected");
866 return -1;
867 }
868 ptr = r_string(n, p);
869 if (ptr == NULL) {
870 return -1;
871 }
872 memcpy(buf, ptr, n);
873 buf[n] = '\0';
874 return PyOS_string_to_double(buf, NULL, NULL);
875 }
876
877 /* allocate the reflist index for a new object. Return -1 on failure */
878 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)879 r_ref_reserve(int flag, RFILE *p)
880 {
881 if (flag) { /* currently only FLAG_REF is defined */
882 Py_ssize_t idx = PyList_GET_SIZE(p->refs);
883 if (idx >= 0x7ffffffe) {
884 PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
885 return -1;
886 }
887 if (PyList_Append(p->refs, Py_None) < 0)
888 return -1;
889 return idx;
890 } else
891 return 0;
892 }
893
894 /* insert the new object 'o' to the reflist at previously
895 * allocated index 'idx'.
896 * 'o' can be NULL, in which case nothing is done.
897 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
898 * if 'o' was non-NULL, and the function fails, 'o' is released and
899 * NULL returned. This simplifies error checking at the call site since
900 * a single test for NULL for the function result is enough.
901 */
902 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)903 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
904 {
905 if (o != NULL && flag) { /* currently only FLAG_REF is defined */
906 PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
907 Py_INCREF(o);
908 PyList_SET_ITEM(p->refs, idx, o);
909 Py_DECREF(tmp);
910 }
911 return o;
912 }
913
914 /* combination of both above, used when an object can be
915 * created whenever it is seen in the file, as opposed to
916 * after having loaded its sub-objects.
917 */
918 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)919 r_ref(PyObject *o, int flag, RFILE *p)
920 {
921 assert(flag & FLAG_REF);
922 if (o == NULL)
923 return NULL;
924 if (PyList_Append(p->refs, o) < 0) {
925 Py_DECREF(o); /* release the new object */
926 return NULL;
927 }
928 return o;
929 }
930
931 static PyObject *
r_object(RFILE * p)932 r_object(RFILE *p)
933 {
934 /* NULL is a valid return value, it does not necessarily means that
935 an exception is set. */
936 PyObject *v, *v2;
937 Py_ssize_t idx = 0;
938 long i, n;
939 int type, code = r_byte(p);
940 int flag, is_interned = 0;
941 PyObject *retval = NULL;
942
943 if (code == EOF) {
944 PyErr_SetString(PyExc_EOFError,
945 "EOF read where object expected");
946 return NULL;
947 }
948
949 p->depth++;
950
951 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
952 p->depth--;
953 PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
954 return NULL;
955 }
956
957 flag = code & FLAG_REF;
958 type = code & ~FLAG_REF;
959
960 #define R_REF(O) do{\
961 if (flag) \
962 O = r_ref(O, flag, p);\
963 } while (0)
964
965 switch (type) {
966
967 case TYPE_NULL:
968 break;
969
970 case TYPE_NONE:
971 Py_INCREF(Py_None);
972 retval = Py_None;
973 break;
974
975 case TYPE_STOPITER:
976 Py_INCREF(PyExc_StopIteration);
977 retval = PyExc_StopIteration;
978 break;
979
980 case TYPE_ELLIPSIS:
981 Py_INCREF(Py_Ellipsis);
982 retval = Py_Ellipsis;
983 break;
984
985 case TYPE_FALSE:
986 Py_INCREF(Py_False);
987 retval = Py_False;
988 break;
989
990 case TYPE_TRUE:
991 Py_INCREF(Py_True);
992 retval = Py_True;
993 break;
994
995 case TYPE_INT:
996 n = r_long(p);
997 retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
998 R_REF(retval);
999 break;
1000
1001 case TYPE_INT64:
1002 retval = r_long64(p);
1003 R_REF(retval);
1004 break;
1005
1006 case TYPE_LONG:
1007 retval = r_PyLong(p);
1008 R_REF(retval);
1009 break;
1010
1011 case TYPE_FLOAT:
1012 {
1013 double x = r_float_str(p);
1014 if (x == -1.0 && PyErr_Occurred())
1015 break;
1016 retval = PyFloat_FromDouble(x);
1017 R_REF(retval);
1018 break;
1019 }
1020
1021 case TYPE_BINARY_FLOAT:
1022 {
1023 double x = r_float_bin(p);
1024 if (x == -1.0 && PyErr_Occurred())
1025 break;
1026 retval = PyFloat_FromDouble(x);
1027 R_REF(retval);
1028 break;
1029 }
1030
1031 case TYPE_COMPLEX:
1032 {
1033 Py_complex c;
1034 c.real = r_float_str(p);
1035 if (c.real == -1.0 && PyErr_Occurred())
1036 break;
1037 c.imag = r_float_str(p);
1038 if (c.imag == -1.0 && PyErr_Occurred())
1039 break;
1040 retval = PyComplex_FromCComplex(c);
1041 R_REF(retval);
1042 break;
1043 }
1044
1045 case TYPE_BINARY_COMPLEX:
1046 {
1047 Py_complex c;
1048 c.real = r_float_bin(p);
1049 if (c.real == -1.0 && PyErr_Occurred())
1050 break;
1051 c.imag = r_float_bin(p);
1052 if (c.imag == -1.0 && PyErr_Occurred())
1053 break;
1054 retval = PyComplex_FromCComplex(c);
1055 R_REF(retval);
1056 break;
1057 }
1058
1059 case TYPE_STRING:
1060 {
1061 const char *ptr;
1062 n = r_long(p);
1063 if (PyErr_Occurred())
1064 break;
1065 if (n < 0 || n > SIZE32_MAX) {
1066 PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1067 break;
1068 }
1069 v = PyBytes_FromStringAndSize((char *)NULL, n);
1070 if (v == NULL)
1071 break;
1072 ptr = r_string(n, p);
1073 if (ptr == NULL) {
1074 Py_DECREF(v);
1075 break;
1076 }
1077 memcpy(PyBytes_AS_STRING(v), ptr, n);
1078 retval = v;
1079 R_REF(retval);
1080 break;
1081 }
1082
1083 case TYPE_ASCII_INTERNED:
1084 is_interned = 1;
1085 /* fall through */
1086 case TYPE_ASCII:
1087 n = r_long(p);
1088 if (PyErr_Occurred())
1089 break;
1090 if (n < 0 || n > SIZE32_MAX) {
1091 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1092 break;
1093 }
1094 goto _read_ascii;
1095
1096 case TYPE_SHORT_ASCII_INTERNED:
1097 is_interned = 1;
1098 /* fall through */
1099 case TYPE_SHORT_ASCII:
1100 n = r_byte(p);
1101 if (n == EOF) {
1102 PyErr_SetString(PyExc_EOFError,
1103 "EOF read where object expected");
1104 break;
1105 }
1106 _read_ascii:
1107 {
1108 const char *ptr;
1109 ptr = r_string(n, p);
1110 if (ptr == NULL)
1111 break;
1112 v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1113 if (v == NULL)
1114 break;
1115 if (is_interned)
1116 PyUnicode_InternInPlace(&v);
1117 retval = v;
1118 R_REF(retval);
1119 break;
1120 }
1121
1122 case TYPE_INTERNED:
1123 is_interned = 1;
1124 /* fall through */
1125 case TYPE_UNICODE:
1126 {
1127 const char *buffer;
1128
1129 n = r_long(p);
1130 if (PyErr_Occurred())
1131 break;
1132 if (n < 0 || n > SIZE32_MAX) {
1133 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1134 break;
1135 }
1136 if (n != 0) {
1137 buffer = r_string(n, p);
1138 if (buffer == NULL)
1139 break;
1140 v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1141 }
1142 else {
1143 v = PyUnicode_New(0, 0);
1144 }
1145 if (v == NULL)
1146 break;
1147 if (is_interned)
1148 PyUnicode_InternInPlace(&v);
1149 retval = v;
1150 R_REF(retval);
1151 break;
1152 }
1153
1154 case TYPE_SMALL_TUPLE:
1155 n = (unsigned char) r_byte(p);
1156 if (PyErr_Occurred())
1157 break;
1158 goto _read_tuple;
1159 case TYPE_TUPLE:
1160 n = r_long(p);
1161 if (PyErr_Occurred())
1162 break;
1163 if (n < 0 || n > SIZE32_MAX) {
1164 PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1165 break;
1166 }
1167 _read_tuple:
1168 v = PyTuple_New(n);
1169 R_REF(v);
1170 if (v == NULL)
1171 break;
1172
1173 for (i = 0; i < n; i++) {
1174 v2 = r_object(p);
1175 if ( v2 == NULL ) {
1176 if (!PyErr_Occurred())
1177 PyErr_SetString(PyExc_TypeError,
1178 "NULL object in marshal data for tuple");
1179 Py_DECREF(v);
1180 v = NULL;
1181 break;
1182 }
1183 PyTuple_SET_ITEM(v, i, v2);
1184 }
1185 retval = v;
1186 break;
1187
1188 case TYPE_LIST:
1189 n = r_long(p);
1190 if (PyErr_Occurred())
1191 break;
1192 if (n < 0 || n > SIZE32_MAX) {
1193 PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1194 break;
1195 }
1196 v = PyList_New(n);
1197 R_REF(v);
1198 if (v == NULL)
1199 break;
1200 for (i = 0; i < n; i++) {
1201 v2 = r_object(p);
1202 if ( v2 == NULL ) {
1203 if (!PyErr_Occurred())
1204 PyErr_SetString(PyExc_TypeError,
1205 "NULL object in marshal data for list");
1206 Py_DECREF(v);
1207 v = NULL;
1208 break;
1209 }
1210 PyList_SET_ITEM(v, i, v2);
1211 }
1212 retval = v;
1213 break;
1214
1215 case TYPE_DICT:
1216 v = PyDict_New();
1217 R_REF(v);
1218 if (v == NULL)
1219 break;
1220 for (;;) {
1221 PyObject *key, *val;
1222 key = r_object(p);
1223 if (key == NULL)
1224 break;
1225 val = r_object(p);
1226 if (val == NULL) {
1227 Py_DECREF(key);
1228 break;
1229 }
1230 if (PyDict_SetItem(v, key, val) < 0) {
1231 Py_DECREF(key);
1232 Py_DECREF(val);
1233 break;
1234 }
1235 Py_DECREF(key);
1236 Py_DECREF(val);
1237 }
1238 if (PyErr_Occurred()) {
1239 Py_DECREF(v);
1240 v = NULL;
1241 }
1242 retval = v;
1243 break;
1244
1245 case TYPE_SET:
1246 case TYPE_FROZENSET:
1247 n = r_long(p);
1248 if (PyErr_Occurred())
1249 break;
1250 if (n < 0 || n > SIZE32_MAX) {
1251 PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1252 break;
1253 }
1254
1255 if (n == 0 && type == TYPE_FROZENSET) {
1256 /* call frozenset() to get the empty frozenset singleton */
1257 v = _PyObject_CallNoArg((PyObject*)&PyFrozenSet_Type);
1258 if (v == NULL)
1259 break;
1260 R_REF(v);
1261 retval = v;
1262 }
1263 else {
1264 v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1265 if (type == TYPE_SET) {
1266 R_REF(v);
1267 } else {
1268 /* must use delayed registration of frozensets because they must
1269 * be init with a refcount of 1
1270 */
1271 idx = r_ref_reserve(flag, p);
1272 if (idx < 0)
1273 Py_CLEAR(v); /* signal error */
1274 }
1275 if (v == NULL)
1276 break;
1277
1278 for (i = 0; i < n; i++) {
1279 v2 = r_object(p);
1280 if ( v2 == NULL ) {
1281 if (!PyErr_Occurred())
1282 PyErr_SetString(PyExc_TypeError,
1283 "NULL object in marshal data for set");
1284 Py_DECREF(v);
1285 v = NULL;
1286 break;
1287 }
1288 if (PySet_Add(v, v2) == -1) {
1289 Py_DECREF(v);
1290 Py_DECREF(v2);
1291 v = NULL;
1292 break;
1293 }
1294 Py_DECREF(v2);
1295 }
1296 if (type != TYPE_SET)
1297 v = r_ref_insert(v, idx, flag, p);
1298 retval = v;
1299 }
1300 break;
1301
1302 case TYPE_CODE:
1303 {
1304 int argcount;
1305 int posonlyargcount;
1306 int kwonlyargcount;
1307 int nlocals;
1308 int stacksize;
1309 int flags;
1310 PyObject *code = NULL;
1311 PyObject *consts = NULL;
1312 PyObject *names = NULL;
1313 PyObject *varnames = NULL;
1314 PyObject *freevars = NULL;
1315 PyObject *cellvars = NULL;
1316 PyObject *filename = NULL;
1317 PyObject *name = NULL;
1318 int firstlineno;
1319 PyObject *linetable = NULL;
1320
1321 idx = r_ref_reserve(flag, p);
1322 if (idx < 0)
1323 break;
1324
1325 v = NULL;
1326
1327 /* XXX ignore long->int overflows for now */
1328 argcount = (int)r_long(p);
1329 if (PyErr_Occurred())
1330 goto code_error;
1331 posonlyargcount = (int)r_long(p);
1332 if (PyErr_Occurred()) {
1333 goto code_error;
1334 }
1335 kwonlyargcount = (int)r_long(p);
1336 if (PyErr_Occurred())
1337 goto code_error;
1338 nlocals = (int)r_long(p);
1339 if (PyErr_Occurred())
1340 goto code_error;
1341 stacksize = (int)r_long(p);
1342 if (PyErr_Occurred())
1343 goto code_error;
1344 flags = (int)r_long(p);
1345 if (PyErr_Occurred())
1346 goto code_error;
1347 code = r_object(p);
1348 if (code == NULL)
1349 goto code_error;
1350 consts = r_object(p);
1351 if (consts == NULL)
1352 goto code_error;
1353 names = r_object(p);
1354 if (names == NULL)
1355 goto code_error;
1356 varnames = r_object(p);
1357 if (varnames == NULL)
1358 goto code_error;
1359 freevars = r_object(p);
1360 if (freevars == NULL)
1361 goto code_error;
1362 cellvars = r_object(p);
1363 if (cellvars == NULL)
1364 goto code_error;
1365 filename = r_object(p);
1366 if (filename == NULL)
1367 goto code_error;
1368 name = r_object(p);
1369 if (name == NULL)
1370 goto code_error;
1371 firstlineno = (int)r_long(p);
1372 if (firstlineno == -1 && PyErr_Occurred())
1373 break;
1374 linetable = r_object(p);
1375 if (linetable == NULL)
1376 goto code_error;
1377
1378 v = (PyObject *) PyCode_NewWithPosOnlyArgs(
1379 argcount, posonlyargcount, kwonlyargcount,
1380 nlocals, stacksize, flags,
1381 code, consts, names, varnames,
1382 freevars, cellvars, filename, name,
1383 firstlineno, linetable);
1384 v = r_ref_insert(v, idx, flag, p);
1385
1386 code_error:
1387 Py_XDECREF(code);
1388 Py_XDECREF(consts);
1389 Py_XDECREF(names);
1390 Py_XDECREF(varnames);
1391 Py_XDECREF(freevars);
1392 Py_XDECREF(cellvars);
1393 Py_XDECREF(filename);
1394 Py_XDECREF(name);
1395 Py_XDECREF(linetable);
1396 }
1397 retval = v;
1398 break;
1399
1400 case TYPE_REF:
1401 n = r_long(p);
1402 if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1403 if (n == -1 && PyErr_Occurred())
1404 break;
1405 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1406 break;
1407 }
1408 v = PyList_GET_ITEM(p->refs, n);
1409 if (v == Py_None) {
1410 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1411 break;
1412 }
1413 Py_INCREF(v);
1414 retval = v;
1415 break;
1416
1417 default:
1418 /* Bogus data got written, which isn't ideal.
1419 This will let you keep working and recover. */
1420 PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1421 break;
1422
1423 }
1424 p->depth--;
1425 return retval;
1426 }
1427
1428 static PyObject *
read_object(RFILE * p)1429 read_object(RFILE *p)
1430 {
1431 PyObject *v;
1432 if (PyErr_Occurred()) {
1433 fprintf(stderr, "XXX readobject called with exception set\n");
1434 return NULL;
1435 }
1436 if (p->ptr && p->end) {
1437 if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1438 return NULL;
1439 }
1440 } else if (p->fp || p->readable) {
1441 if (PySys_Audit("marshal.load", NULL) < 0) {
1442 return NULL;
1443 }
1444 }
1445 v = r_object(p);
1446 if (v == NULL && !PyErr_Occurred())
1447 PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1448 return v;
1449 }
1450
1451 int
PyMarshal_ReadShortFromFile(FILE * fp)1452 PyMarshal_ReadShortFromFile(FILE *fp)
1453 {
1454 RFILE rf;
1455 int res;
1456 assert(fp);
1457 rf.readable = NULL;
1458 rf.fp = fp;
1459 rf.end = rf.ptr = NULL;
1460 rf.buf = NULL;
1461 res = r_short(&rf);
1462 if (rf.buf != NULL)
1463 PyMem_Free(rf.buf);
1464 return res;
1465 }
1466
1467 long
PyMarshal_ReadLongFromFile(FILE * fp)1468 PyMarshal_ReadLongFromFile(FILE *fp)
1469 {
1470 RFILE rf;
1471 long res;
1472 rf.fp = fp;
1473 rf.readable = NULL;
1474 rf.ptr = rf.end = NULL;
1475 rf.buf = NULL;
1476 res = r_long(&rf);
1477 if (rf.buf != NULL)
1478 PyMem_Free(rf.buf);
1479 return res;
1480 }
1481
1482 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1483 static off_t
getfilesize(FILE * fp)1484 getfilesize(FILE *fp)
1485 {
1486 struct _Py_stat_struct st;
1487 if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1488 return -1;
1489 #if SIZEOF_OFF_T == 4
1490 else if (st.st_size >= INT_MAX)
1491 return (off_t)INT_MAX;
1492 #endif
1493 else
1494 return (off_t)st.st_size;
1495 }
1496
1497 /* If we can get the size of the file up-front, and it's reasonably small,
1498 * read it in one gulp and delegate to ...FromString() instead. Much quicker
1499 * than reading a byte at a time from file; speeds .pyc imports.
1500 * CAUTION: since this may read the entire remainder of the file, don't
1501 * call it unless you know you're done with the file.
1502 */
1503 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1504 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1505 {
1506 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1507 #define REASONABLE_FILE_LIMIT (1L << 18)
1508 off_t filesize;
1509 filesize = getfilesize(fp);
1510 if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1511 char* pBuf = (char *)PyMem_Malloc(filesize);
1512 if (pBuf != NULL) {
1513 size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1514 PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1515 PyMem_Free(pBuf);
1516 return v;
1517 }
1518
1519 }
1520 /* We don't have fstat, or we do but the file is larger than
1521 * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1522 */
1523 return PyMarshal_ReadObjectFromFile(fp);
1524
1525 #undef REASONABLE_FILE_LIMIT
1526 }
1527
1528 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1529 PyMarshal_ReadObjectFromFile(FILE *fp)
1530 {
1531 RFILE rf;
1532 PyObject *result;
1533 rf.fp = fp;
1534 rf.readable = NULL;
1535 rf.depth = 0;
1536 rf.ptr = rf.end = NULL;
1537 rf.buf = NULL;
1538 rf.refs = PyList_New(0);
1539 if (rf.refs == NULL)
1540 return NULL;
1541 result = read_object(&rf);
1542 Py_DECREF(rf.refs);
1543 if (rf.buf != NULL)
1544 PyMem_Free(rf.buf);
1545 return result;
1546 }
1547
1548 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1549 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1550 {
1551 RFILE rf;
1552 PyObject *result;
1553 rf.fp = NULL;
1554 rf.readable = NULL;
1555 rf.ptr = str;
1556 rf.end = str + len;
1557 rf.buf = NULL;
1558 rf.depth = 0;
1559 rf.refs = PyList_New(0);
1560 if (rf.refs == NULL)
1561 return NULL;
1562 result = read_object(&rf);
1563 Py_DECREF(rf.refs);
1564 if (rf.buf != NULL)
1565 PyMem_Free(rf.buf);
1566 return result;
1567 }
1568
1569 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1570 PyMarshal_WriteObjectToString(PyObject *x, int version)
1571 {
1572 WFILE wf;
1573
1574 if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1575 return NULL;
1576 }
1577 memset(&wf, 0, sizeof(wf));
1578 wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1579 if (wf.str == NULL)
1580 return NULL;
1581 wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1582 wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1583 wf.error = WFERR_OK;
1584 wf.version = version;
1585 if (w_init_refs(&wf, version)) {
1586 Py_DECREF(wf.str);
1587 return NULL;
1588 }
1589 w_object(x, &wf);
1590 w_clear_refs(&wf);
1591 if (wf.str != NULL) {
1592 const char *base = PyBytes_AS_STRING(wf.str);
1593 if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1594 return NULL;
1595 }
1596 if (wf.error != WFERR_OK) {
1597 Py_XDECREF(wf.str);
1598 if (wf.error == WFERR_NOMEMORY)
1599 PyErr_NoMemory();
1600 else
1601 PyErr_SetString(PyExc_ValueError,
1602 (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1603 :"object too deeply nested to marshal");
1604 return NULL;
1605 }
1606 return wf.str;
1607 }
1608
1609 /* And an interface for Python programs... */
1610 /*[clinic input]
1611 marshal.dump
1612
1613 value: object
1614 Must be a supported type.
1615 file: object
1616 Must be a writeable binary file.
1617 version: int(c_default="Py_MARSHAL_VERSION") = version
1618 Indicates the data format that dump should use.
1619 /
1620
1621 Write the value on the open file.
1622
1623 If the value has (or contains an object that has) an unsupported type, a
1624 ValueError exception is raised - but garbage data will also be written
1625 to the file. The object will not be properly read back by load().
1626 [clinic start generated code]*/
1627
1628 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version)1629 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1630 int version)
1631 /*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
1632 {
1633 /* XXX Quick hack -- need to do this differently */
1634 PyObject *s;
1635 PyObject *res;
1636 _Py_IDENTIFIER(write);
1637
1638 s = PyMarshal_WriteObjectToString(value, version);
1639 if (s == NULL)
1640 return NULL;
1641 res = _PyObject_CallMethodIdOneArg(file, &PyId_write, s);
1642 Py_DECREF(s);
1643 return res;
1644 }
1645
1646 /*[clinic input]
1647 marshal.load
1648
1649 file: object
1650 Must be readable binary file.
1651 /
1652
1653 Read one value from the open file and return it.
1654
1655 If no valid value is read (e.g. because the data has a different Python
1656 version's incompatible marshal format), raise EOFError, ValueError or
1657 TypeError.
1658
1659 Note: If an object containing an unsupported type was marshalled with
1660 dump(), load() will substitute None for the unmarshallable type.
1661 [clinic start generated code]*/
1662
1663 static PyObject *
marshal_load(PyObject * module,PyObject * file)1664 marshal_load(PyObject *module, PyObject *file)
1665 /*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/
1666 {
1667 PyObject *data, *result;
1668 _Py_IDENTIFIER(read);
1669 RFILE rf;
1670
1671 /*
1672 * Make a call to the read method, but read zero bytes.
1673 * This is to ensure that the object passed in at least
1674 * has a read method which returns bytes.
1675 * This can be removed if we guarantee good error handling
1676 * for r_string()
1677 */
1678 data = _PyObject_CallMethodId(file, &PyId_read, "i", 0);
1679 if (data == NULL)
1680 return NULL;
1681 if (!PyBytes_Check(data)) {
1682 PyErr_Format(PyExc_TypeError,
1683 "file.read() returned not bytes but %.100s",
1684 Py_TYPE(data)->tp_name);
1685 result = NULL;
1686 }
1687 else {
1688 rf.depth = 0;
1689 rf.fp = NULL;
1690 rf.readable = file;
1691 rf.ptr = rf.end = NULL;
1692 rf.buf = NULL;
1693 if ((rf.refs = PyList_New(0)) != NULL) {
1694 result = read_object(&rf);
1695 Py_DECREF(rf.refs);
1696 if (rf.buf != NULL)
1697 PyMem_Free(rf.buf);
1698 } else
1699 result = NULL;
1700 }
1701 Py_DECREF(data);
1702 return result;
1703 }
1704
1705 /*[clinic input]
1706 marshal.dumps
1707
1708 value: object
1709 Must be a supported type.
1710 version: int(c_default="Py_MARSHAL_VERSION") = version
1711 Indicates the data format that dumps should use.
1712 /
1713
1714 Return the bytes object that would be written to a file by dump(value, file).
1715
1716 Raise a ValueError exception if value has (or contains an object that has) an
1717 unsupported type.
1718 [clinic start generated code]*/
1719
1720 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version)1721 marshal_dumps_impl(PyObject *module, PyObject *value, int version)
1722 /*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
1723 {
1724 return PyMarshal_WriteObjectToString(value, version);
1725 }
1726
1727 /*[clinic input]
1728 marshal.loads
1729
1730 bytes: Py_buffer
1731 /
1732
1733 Convert the bytes-like object to a value.
1734
1735 If no valid value is found, raise EOFError, ValueError or TypeError. Extra
1736 bytes in the input are ignored.
1737 [clinic start generated code]*/
1738
1739 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes)1740 marshal_loads_impl(PyObject *module, Py_buffer *bytes)
1741 /*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/
1742 {
1743 RFILE rf;
1744 char *s = bytes->buf;
1745 Py_ssize_t n = bytes->len;
1746 PyObject* result;
1747 rf.fp = NULL;
1748 rf.readable = NULL;
1749 rf.ptr = s;
1750 rf.end = s + n;
1751 rf.depth = 0;
1752 if ((rf.refs = PyList_New(0)) == NULL)
1753 return NULL;
1754 result = read_object(&rf);
1755 Py_DECREF(rf.refs);
1756 return result;
1757 }
1758
1759 static PyMethodDef marshal_methods[] = {
1760 MARSHAL_DUMP_METHODDEF
1761 MARSHAL_LOAD_METHODDEF
1762 MARSHAL_DUMPS_METHODDEF
1763 MARSHAL_LOADS_METHODDEF
1764 {NULL, NULL} /* sentinel */
1765 };
1766
1767
1768 PyDoc_STRVAR(module_doc,
1769 "This module contains functions that can read and write Python values in\n\
1770 a binary format. The format is specific to Python, but independent of\n\
1771 machine architecture issues.\n\
1772 \n\
1773 Not all Python object types are supported; in general, only objects\n\
1774 whose value is independent from a particular invocation of Python can be\n\
1775 written and read by this module. The following types are supported:\n\
1776 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1777 tuples, lists, sets, dictionaries, and code objects, where it\n\
1778 should be understood that tuples, lists and dictionaries are only\n\
1779 supported as long as the values contained therein are themselves\n\
1780 supported; and recursive lists and dictionaries should not be written\n\
1781 (they will cause infinite loops).\n\
1782 \n\
1783 Variables:\n\
1784 \n\
1785 version -- indicates the format that the module uses. Version 0 is the\n\
1786 historical format, version 1 shares interned strings and version 2\n\
1787 uses a binary format for floating point numbers.\n\
1788 Version 3 shares common object references (New in version 3.4).\n\
1789 \n\
1790 Functions:\n\
1791 \n\
1792 dump() -- write value to a file\n\
1793 load() -- read value from a file\n\
1794 dumps() -- marshal value as a bytes object\n\
1795 loads() -- read value from a bytes-like object");
1796
1797
1798 static int
marshal_module_exec(PyObject * mod)1799 marshal_module_exec(PyObject *mod)
1800 {
1801 if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
1802 return -1;
1803 }
1804 return 0;
1805 }
1806
1807 static PyModuleDef_Slot marshalmodule_slots[] = {
1808 {Py_mod_exec, marshal_module_exec},
1809 {0, NULL}
1810 };
1811
1812 static struct PyModuleDef marshalmodule = {
1813 PyModuleDef_HEAD_INIT,
1814 .m_name = "marshal",
1815 .m_doc = module_doc,
1816 .m_methods = marshal_methods,
1817 .m_slots = marshalmodule_slots,
1818 };
1819
1820 PyMODINIT_FUNC
PyMarshal_Init(void)1821 PyMarshal_Init(void)
1822 {
1823 return PyModuleDef_Init(&marshalmodule);
1824 }
1825