1
2 /* Write Python objects to files and read them back.
3 This is primarily intended for writing and reading compiled Python code,
4 even though dicts, lists, sets and frozensets, not commonly seen in
5 code objects, are supported.
6 Version 3 of this protocol properly supports circular links
7 and sharing. */
8
9 #define PY_SSIZE_T_CLEAN
10
11 #include "Python.h"
12 #include "longintrepr.h"
13 #include "code.h"
14 #include "marshal.h"
15 #include "../Modules/hashtable.h"
16
17 /*[clinic input]
18 module marshal
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
21
22 #include "clinic/marshal.c.h"
23
24 /* High water mark to determine when the marshalled object is dangerously deep
25 * and risks coring the interpreter. When the object stack gets this deep,
26 * raise an exception instead of continuing.
27 * On Windows debug builds, reduce this value.
28 *
29 * BUG: https://bugs.python.org/issue33720
30 * On Windows PGO builds, the r_object function overallocates its stack and
31 * can cause a stack overflow. We reduce the maximum depth for all Windows
32 * releases to protect against this.
33 * #if defined(MS_WINDOWS) && defined(_DEBUG)
34 */
35 #if defined(MS_WINDOWS)
36 #define MAX_MARSHAL_STACK_DEPTH 1000
37 #else
38 #define MAX_MARSHAL_STACK_DEPTH 2000
39 #endif
40
41 #define TYPE_NULL '0'
42 #define TYPE_NONE 'N'
43 #define TYPE_FALSE 'F'
44 #define TYPE_TRUE 'T'
45 #define TYPE_STOPITER 'S'
46 #define TYPE_ELLIPSIS '.'
47 #define TYPE_INT 'i'
48 /* TYPE_INT64 is not generated anymore.
49 Supported for backward compatibility only. */
50 #define TYPE_INT64 'I'
51 #define TYPE_FLOAT 'f'
52 #define TYPE_BINARY_FLOAT 'g'
53 #define TYPE_COMPLEX 'x'
54 #define TYPE_BINARY_COMPLEX 'y'
55 #define TYPE_LONG 'l'
56 #define TYPE_STRING 's'
57 #define TYPE_INTERNED 't'
58 #define TYPE_REF 'r'
59 #define TYPE_TUPLE '('
60 #define TYPE_LIST '['
61 #define TYPE_DICT '{'
62 #define TYPE_CODE 'c'
63 #define TYPE_UNICODE 'u'
64 #define TYPE_UNKNOWN '?'
65 #define TYPE_SET '<'
66 #define TYPE_FROZENSET '>'
67 #define FLAG_REF '\x80' /* with a type, add obj to index */
68
69 #define TYPE_ASCII 'a'
70 #define TYPE_ASCII_INTERNED 'A'
71 #define TYPE_SMALL_TUPLE ')'
72 #define TYPE_SHORT_ASCII 'z'
73 #define TYPE_SHORT_ASCII_INTERNED 'Z'
74
75 #define WFERR_OK 0
76 #define WFERR_UNMARSHALLABLE 1
77 #define WFERR_NESTEDTOODEEP 2
78 #define WFERR_NOMEMORY 3
79
80 typedef struct {
81 FILE *fp;
82 int error; /* see WFERR_* values */
83 int depth;
84 PyObject *str;
85 char *ptr;
86 char *end;
87 char *buf;
88 _Py_hashtable_t *hashtable;
89 int version;
90 } WFILE;
91
92 #define w_byte(c, p) do { \
93 if ((p)->ptr != (p)->end || w_reserve((p), 1)) \
94 *(p)->ptr++ = (c); \
95 } while(0)
96
97 static void
w_flush(WFILE * p)98 w_flush(WFILE *p)
99 {
100 assert(p->fp != NULL);
101 fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
102 p->ptr = p->buf;
103 }
104
105 static int
w_reserve(WFILE * p,Py_ssize_t needed)106 w_reserve(WFILE *p, Py_ssize_t needed)
107 {
108 Py_ssize_t pos, size, delta;
109 if (p->ptr == NULL)
110 return 0; /* An error already occurred */
111 if (p->fp != NULL) {
112 w_flush(p);
113 return needed <= p->end - p->ptr;
114 }
115 assert(p->str != NULL);
116 pos = p->ptr - p->buf;
117 size = PyBytes_Size(p->str);
118 if (size > 16*1024*1024)
119 delta = (size >> 3); /* 12.5% overallocation */
120 else
121 delta = size + 1024;
122 delta = Py_MAX(delta, needed);
123 if (delta > PY_SSIZE_T_MAX - size) {
124 p->error = WFERR_NOMEMORY;
125 return 0;
126 }
127 size += delta;
128 if (_PyBytes_Resize(&p->str, size) != 0) {
129 p->ptr = p->buf = p->end = NULL;
130 return 0;
131 }
132 else {
133 p->buf = PyBytes_AS_STRING(p->str);
134 p->ptr = p->buf + pos;
135 p->end = p->buf + size;
136 return 1;
137 }
138 }
139
140 static void
w_string(const char * s,Py_ssize_t n,WFILE * p)141 w_string(const char *s, Py_ssize_t n, WFILE *p)
142 {
143 Py_ssize_t m;
144 if (!n || p->ptr == NULL)
145 return;
146 m = p->end - p->ptr;
147 if (p->fp != NULL) {
148 if (n <= m) {
149 memcpy(p->ptr, s, n);
150 p->ptr += n;
151 }
152 else {
153 w_flush(p);
154 fwrite(s, 1, n, p->fp);
155 }
156 }
157 else {
158 if (n <= m || w_reserve(p, n - m)) {
159 memcpy(p->ptr, s, n);
160 p->ptr += n;
161 }
162 }
163 }
164
165 static void
w_short(int x,WFILE * p)166 w_short(int x, WFILE *p)
167 {
168 w_byte((char)( x & 0xff), p);
169 w_byte((char)((x>> 8) & 0xff), p);
170 }
171
172 static void
w_long(long x,WFILE * p)173 w_long(long x, WFILE *p)
174 {
175 w_byte((char)( x & 0xff), p);
176 w_byte((char)((x>> 8) & 0xff), p);
177 w_byte((char)((x>>16) & 0xff), p);
178 w_byte((char)((x>>24) & 0xff), p);
179 }
180
181 #define SIZE32_MAX 0x7FFFFFFF
182
183 #if SIZEOF_SIZE_T > 4
184 # define W_SIZE(n, p) do { \
185 if ((n) > SIZE32_MAX) { \
186 (p)->depth--; \
187 (p)->error = WFERR_UNMARSHALLABLE; \
188 return; \
189 } \
190 w_long((long)(n), p); \
191 } while(0)
192 #else
193 # define W_SIZE w_long
194 #endif
195
196 static void
w_pstring(const char * s,Py_ssize_t n,WFILE * p)197 w_pstring(const char *s, Py_ssize_t n, WFILE *p)
198 {
199 W_SIZE(n, p);
200 w_string(s, n, p);
201 }
202
203 static void
w_short_pstring(const char * s,Py_ssize_t n,WFILE * p)204 w_short_pstring(const char *s, Py_ssize_t n, WFILE *p)
205 {
206 w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
207 w_string(s, n, p);
208 }
209
210 /* We assume that Python ints are stored internally in base some power of
211 2**15; for the sake of portability we'll always read and write them in base
212 exactly 2**15. */
213
214 #define PyLong_MARSHAL_SHIFT 15
215 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
216 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
217 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
218 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
219 #endif
220 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
221
222 #define W_TYPE(t, p) do { \
223 w_byte((t) | flag, (p)); \
224 } while(0)
225
226 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)227 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
228 {
229 Py_ssize_t i, j, n, l;
230 digit d;
231
232 W_TYPE(TYPE_LONG, p);
233 if (Py_SIZE(ob) == 0) {
234 w_long((long)0, p);
235 return;
236 }
237
238 /* set l to number of base PyLong_MARSHAL_BASE digits */
239 n = Py_ABS(Py_SIZE(ob));
240 l = (n-1) * PyLong_MARSHAL_RATIO;
241 d = ob->ob_digit[n-1];
242 assert(d != 0); /* a PyLong is always normalized */
243 do {
244 d >>= PyLong_MARSHAL_SHIFT;
245 l++;
246 } while (d != 0);
247 if (l > SIZE32_MAX) {
248 p->depth--;
249 p->error = WFERR_UNMARSHALLABLE;
250 return;
251 }
252 w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
253
254 for (i=0; i < n-1; i++) {
255 d = ob->ob_digit[i];
256 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
257 w_short(d & PyLong_MARSHAL_MASK, p);
258 d >>= PyLong_MARSHAL_SHIFT;
259 }
260 assert (d == 0);
261 }
262 d = ob->ob_digit[n-1];
263 do {
264 w_short(d & PyLong_MARSHAL_MASK, p);
265 d >>= PyLong_MARSHAL_SHIFT;
266 } while (d != 0);
267 }
268
269 static void
w_float_bin(double v,WFILE * p)270 w_float_bin(double v, WFILE *p)
271 {
272 unsigned char buf[8];
273 if (_PyFloat_Pack8(v, buf, 1) < 0) {
274 p->error = WFERR_UNMARSHALLABLE;
275 return;
276 }
277 w_string((const char *)buf, 8, p);
278 }
279
280 static void
w_float_str(double v,WFILE * p)281 w_float_str(double v, WFILE *p)
282 {
283 int n;
284 char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
285 if (!buf) {
286 p->error = WFERR_NOMEMORY;
287 return;
288 }
289 n = (int)strlen(buf);
290 w_byte(n, p);
291 w_string(buf, n, p);
292 PyMem_Free(buf);
293 }
294
295 static int
w_ref(PyObject * v,char * flag,WFILE * p)296 w_ref(PyObject *v, char *flag, WFILE *p)
297 {
298 _Py_hashtable_entry_t *entry;
299 int w;
300
301 if (p->version < 3 || p->hashtable == NULL)
302 return 0; /* not writing object references */
303
304 /* if it has only one reference, it definitely isn't shared */
305 if (Py_REFCNT(v) == 1)
306 return 0;
307
308 entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
309 if (entry != NULL) {
310 /* write the reference index to the stream */
311 _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, w);
312 /* we don't store "long" indices in the dict */
313 assert(0 <= w && w <= 0x7fffffff);
314 w_byte(TYPE_REF, p);
315 w_long(w, p);
316 return 1;
317 } else {
318 size_t s = p->hashtable->entries;
319 /* we don't support long indices */
320 if (s >= 0x7fffffff) {
321 PyErr_SetString(PyExc_ValueError, "too many objects");
322 goto err;
323 }
324 w = (int)s;
325 Py_INCREF(v);
326 if (_Py_HASHTABLE_SET(p->hashtable, v, w) < 0) {
327 Py_DECREF(v);
328 goto err;
329 }
330 *flag |= FLAG_REF;
331 return 0;
332 }
333 err:
334 p->error = WFERR_UNMARSHALLABLE;
335 return 1;
336 }
337
338 static void
339 w_complex_object(PyObject *v, char flag, WFILE *p);
340
341 static void
w_object(PyObject * v,WFILE * p)342 w_object(PyObject *v, WFILE *p)
343 {
344 char flag = '\0';
345
346 p->depth++;
347
348 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
349 p->error = WFERR_NESTEDTOODEEP;
350 }
351 else if (v == NULL) {
352 w_byte(TYPE_NULL, p);
353 }
354 else if (v == Py_None) {
355 w_byte(TYPE_NONE, p);
356 }
357 else if (v == PyExc_StopIteration) {
358 w_byte(TYPE_STOPITER, p);
359 }
360 else if (v == Py_Ellipsis) {
361 w_byte(TYPE_ELLIPSIS, p);
362 }
363 else if (v == Py_False) {
364 w_byte(TYPE_FALSE, p);
365 }
366 else if (v == Py_True) {
367 w_byte(TYPE_TRUE, p);
368 }
369 else if (!w_ref(v, &flag, p))
370 w_complex_object(v, flag, p);
371
372 p->depth--;
373 }
374
375 static void
w_complex_object(PyObject * v,char flag,WFILE * p)376 w_complex_object(PyObject *v, char flag, WFILE *p)
377 {
378 Py_ssize_t i, n;
379
380 if (PyLong_CheckExact(v)) {
381 long x = PyLong_AsLong(v);
382 if ((x == -1) && PyErr_Occurred()) {
383 PyLongObject *ob = (PyLongObject *)v;
384 PyErr_Clear();
385 w_PyLong(ob, flag, p);
386 }
387 else {
388 #if SIZEOF_LONG > 4
389 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
390 if (y && y != -1) {
391 /* Too large for TYPE_INT */
392 w_PyLong((PyLongObject*)v, flag, p);
393 }
394 else
395 #endif
396 {
397 W_TYPE(TYPE_INT, p);
398 w_long(x, p);
399 }
400 }
401 }
402 else if (PyFloat_CheckExact(v)) {
403 if (p->version > 1) {
404 W_TYPE(TYPE_BINARY_FLOAT, p);
405 w_float_bin(PyFloat_AS_DOUBLE(v), p);
406 }
407 else {
408 W_TYPE(TYPE_FLOAT, p);
409 w_float_str(PyFloat_AS_DOUBLE(v), p);
410 }
411 }
412 else if (PyComplex_CheckExact(v)) {
413 if (p->version > 1) {
414 W_TYPE(TYPE_BINARY_COMPLEX, p);
415 w_float_bin(PyComplex_RealAsDouble(v), p);
416 w_float_bin(PyComplex_ImagAsDouble(v), p);
417 }
418 else {
419 W_TYPE(TYPE_COMPLEX, p);
420 w_float_str(PyComplex_RealAsDouble(v), p);
421 w_float_str(PyComplex_ImagAsDouble(v), p);
422 }
423 }
424 else if (PyBytes_CheckExact(v)) {
425 W_TYPE(TYPE_STRING, p);
426 w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
427 }
428 else if (PyUnicode_CheckExact(v)) {
429 if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
430 int is_short = PyUnicode_GET_LENGTH(v) < 256;
431 if (is_short) {
432 if (PyUnicode_CHECK_INTERNED(v))
433 W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
434 else
435 W_TYPE(TYPE_SHORT_ASCII, p);
436 w_short_pstring((char *) PyUnicode_1BYTE_DATA(v),
437 PyUnicode_GET_LENGTH(v), p);
438 }
439 else {
440 if (PyUnicode_CHECK_INTERNED(v))
441 W_TYPE(TYPE_ASCII_INTERNED, p);
442 else
443 W_TYPE(TYPE_ASCII, p);
444 w_pstring((char *) PyUnicode_1BYTE_DATA(v),
445 PyUnicode_GET_LENGTH(v), p);
446 }
447 }
448 else {
449 PyObject *utf8;
450 utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
451 if (utf8 == NULL) {
452 p->depth--;
453 p->error = WFERR_UNMARSHALLABLE;
454 return;
455 }
456 if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v))
457 W_TYPE(TYPE_INTERNED, p);
458 else
459 W_TYPE(TYPE_UNICODE, p);
460 w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
461 Py_DECREF(utf8);
462 }
463 }
464 else if (PyTuple_CheckExact(v)) {
465 n = PyTuple_Size(v);
466 if (p->version >= 4 && n < 256) {
467 W_TYPE(TYPE_SMALL_TUPLE, p);
468 w_byte((unsigned char)n, p);
469 }
470 else {
471 W_TYPE(TYPE_TUPLE, p);
472 W_SIZE(n, p);
473 }
474 for (i = 0; i < n; i++) {
475 w_object(PyTuple_GET_ITEM(v, i), p);
476 }
477 }
478 else if (PyList_CheckExact(v)) {
479 W_TYPE(TYPE_LIST, p);
480 n = PyList_GET_SIZE(v);
481 W_SIZE(n, p);
482 for (i = 0; i < n; i++) {
483 w_object(PyList_GET_ITEM(v, i), p);
484 }
485 }
486 else if (PyDict_CheckExact(v)) {
487 Py_ssize_t pos;
488 PyObject *key, *value;
489 W_TYPE(TYPE_DICT, p);
490 /* This one is NULL object terminated! */
491 pos = 0;
492 while (PyDict_Next(v, &pos, &key, &value)) {
493 w_object(key, p);
494 w_object(value, p);
495 }
496 w_object((PyObject *)NULL, p);
497 }
498 else if (PyAnySet_CheckExact(v)) {
499 PyObject *value, *it;
500
501 if (PyObject_TypeCheck(v, &PySet_Type))
502 W_TYPE(TYPE_SET, p);
503 else
504 W_TYPE(TYPE_FROZENSET, p);
505 n = PyObject_Size(v);
506 if (n == -1) {
507 p->depth--;
508 p->error = WFERR_UNMARSHALLABLE;
509 return;
510 }
511 W_SIZE(n, p);
512 it = PyObject_GetIter(v);
513 if (it == NULL) {
514 p->depth--;
515 p->error = WFERR_UNMARSHALLABLE;
516 return;
517 }
518 while ((value = PyIter_Next(it)) != NULL) {
519 w_object(value, p);
520 Py_DECREF(value);
521 }
522 Py_DECREF(it);
523 if (PyErr_Occurred()) {
524 p->depth--;
525 p->error = WFERR_UNMARSHALLABLE;
526 return;
527 }
528 }
529 else if (PyCode_Check(v)) {
530 PyCodeObject *co = (PyCodeObject *)v;
531 W_TYPE(TYPE_CODE, p);
532 w_long(co->co_argcount, p);
533 w_long(co->co_posonlyargcount, p);
534 w_long(co->co_kwonlyargcount, p);
535 w_long(co->co_nlocals, p);
536 w_long(co->co_stacksize, p);
537 w_long(co->co_flags, p);
538 w_object(co->co_code, p);
539 w_object(co->co_consts, p);
540 w_object(co->co_names, p);
541 w_object(co->co_varnames, p);
542 w_object(co->co_freevars, p);
543 w_object(co->co_cellvars, p);
544 w_object(co->co_filename, p);
545 w_object(co->co_name, p);
546 w_long(co->co_firstlineno, p);
547 w_object(co->co_lnotab, p);
548 }
549 else if (PyObject_CheckBuffer(v)) {
550 /* Write unknown bytes-like objects as a bytes object */
551 Py_buffer view;
552 if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
553 w_byte(TYPE_UNKNOWN, p);
554 p->depth--;
555 p->error = WFERR_UNMARSHALLABLE;
556 return;
557 }
558 W_TYPE(TYPE_STRING, p);
559 w_pstring(view.buf, view.len, p);
560 PyBuffer_Release(&view);
561 }
562 else {
563 W_TYPE(TYPE_UNKNOWN, p);
564 p->error = WFERR_UNMARSHALLABLE;
565 }
566 }
567
568 static int
w_init_refs(WFILE * wf,int version)569 w_init_refs(WFILE *wf, int version)
570 {
571 if (version >= 3) {
572 wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int),
573 _Py_hashtable_hash_ptr,
574 _Py_hashtable_compare_direct);
575 if (wf->hashtable == NULL) {
576 PyErr_NoMemory();
577 return -1;
578 }
579 }
580 return 0;
581 }
582
583 static int
w_decref_entry(_Py_hashtable_t * ht,_Py_hashtable_entry_t * entry,void * Py_UNUSED (data))584 w_decref_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
585 void *Py_UNUSED(data))
586 {
587 PyObject *entry_key;
588
589 _Py_HASHTABLE_ENTRY_READ_KEY(ht, entry, entry_key);
590 Py_XDECREF(entry_key);
591 return 0;
592 }
593
594 static void
w_clear_refs(WFILE * wf)595 w_clear_refs(WFILE *wf)
596 {
597 if (wf->hashtable != NULL) {
598 _Py_hashtable_foreach(wf->hashtable, w_decref_entry, NULL);
599 _Py_hashtable_destroy(wf->hashtable);
600 }
601 }
602
603 /* version currently has no effect for writing ints. */
604 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)605 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
606 {
607 char buf[4];
608 WFILE wf;
609 memset(&wf, 0, sizeof(wf));
610 wf.fp = fp;
611 wf.ptr = wf.buf = buf;
612 wf.end = wf.ptr + sizeof(buf);
613 wf.error = WFERR_OK;
614 wf.version = version;
615 w_long(x, &wf);
616 w_flush(&wf);
617 }
618
619 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)620 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
621 {
622 char buf[BUFSIZ];
623 WFILE wf;
624 memset(&wf, 0, sizeof(wf));
625 wf.fp = fp;
626 wf.ptr = wf.buf = buf;
627 wf.end = wf.ptr + sizeof(buf);
628 wf.error = WFERR_OK;
629 wf.version = version;
630 if (w_init_refs(&wf, version))
631 return; /* caller mush check PyErr_Occurred() */
632 w_object(x, &wf);
633 w_clear_refs(&wf);
634 w_flush(&wf);
635 }
636
637 typedef struct {
638 FILE *fp;
639 int depth;
640 PyObject *readable; /* Stream-like object being read from */
641 char *ptr;
642 char *end;
643 char *buf;
644 Py_ssize_t buf_size;
645 PyObject *refs; /* a list */
646 } RFILE;
647
648 static const char *
r_string(Py_ssize_t n,RFILE * p)649 r_string(Py_ssize_t n, RFILE *p)
650 {
651 Py_ssize_t read = -1;
652
653 if (p->ptr != NULL) {
654 /* Fast path for loads() */
655 char *res = p->ptr;
656 Py_ssize_t left = p->end - p->ptr;
657 if (left < n) {
658 PyErr_SetString(PyExc_EOFError,
659 "marshal data too short");
660 return NULL;
661 }
662 p->ptr += n;
663 return res;
664 }
665 if (p->buf == NULL) {
666 p->buf = PyMem_MALLOC(n);
667 if (p->buf == NULL) {
668 PyErr_NoMemory();
669 return NULL;
670 }
671 p->buf_size = n;
672 }
673 else if (p->buf_size < n) {
674 char *tmp = PyMem_REALLOC(p->buf, n);
675 if (tmp == NULL) {
676 PyErr_NoMemory();
677 return NULL;
678 }
679 p->buf = tmp;
680 p->buf_size = n;
681 }
682
683 if (!p->readable) {
684 assert(p->fp != NULL);
685 read = fread(p->buf, 1, n, p->fp);
686 }
687 else {
688 _Py_IDENTIFIER(readinto);
689 PyObject *res, *mview;
690 Py_buffer buf;
691
692 if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
693 return NULL;
694 mview = PyMemoryView_FromBuffer(&buf);
695 if (mview == NULL)
696 return NULL;
697
698 res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview);
699 if (res != NULL) {
700 read = PyNumber_AsSsize_t(res, PyExc_ValueError);
701 Py_DECREF(res);
702 }
703 }
704 if (read != n) {
705 if (!PyErr_Occurred()) {
706 if (read > n)
707 PyErr_Format(PyExc_ValueError,
708 "read() returned too much data: "
709 "%zd bytes requested, %zd returned",
710 n, read);
711 else
712 PyErr_SetString(PyExc_EOFError,
713 "EOF read where not expected");
714 }
715 return NULL;
716 }
717 return p->buf;
718 }
719
720 static int
r_byte(RFILE * p)721 r_byte(RFILE *p)
722 {
723 int c = EOF;
724
725 if (p->ptr != NULL) {
726 if (p->ptr < p->end)
727 c = (unsigned char) *p->ptr++;
728 return c;
729 }
730 if (!p->readable) {
731 assert(p->fp);
732 c = getc(p->fp);
733 }
734 else {
735 const char *ptr = r_string(1, p);
736 if (ptr != NULL)
737 c = *(unsigned char *) ptr;
738 }
739 return c;
740 }
741
742 static int
r_short(RFILE * p)743 r_short(RFILE *p)
744 {
745 short x = -1;
746 const unsigned char *buffer;
747
748 buffer = (const unsigned char *) r_string(2, p);
749 if (buffer != NULL) {
750 x = buffer[0];
751 x |= buffer[1] << 8;
752 /* Sign-extension, in case short greater than 16 bits */
753 x |= -(x & 0x8000);
754 }
755 return x;
756 }
757
758 static long
r_long(RFILE * p)759 r_long(RFILE *p)
760 {
761 long x = -1;
762 const unsigned char *buffer;
763
764 buffer = (const unsigned char *) r_string(4, p);
765 if (buffer != NULL) {
766 x = buffer[0];
767 x |= (long)buffer[1] << 8;
768 x |= (long)buffer[2] << 16;
769 x |= (long)buffer[3] << 24;
770 #if SIZEOF_LONG > 4
771 /* Sign extension for 64-bit machines */
772 x |= -(x & 0x80000000L);
773 #endif
774 }
775 return x;
776 }
777
778 /* r_long64 deals with the TYPE_INT64 code. */
779 static PyObject *
r_long64(RFILE * p)780 r_long64(RFILE *p)
781 {
782 const unsigned char *buffer = (const unsigned char *) r_string(8, p);
783 if (buffer == NULL) {
784 return NULL;
785 }
786 return _PyLong_FromByteArray(buffer, 8,
787 1 /* little endian */,
788 1 /* signed */);
789 }
790
791 static PyObject *
r_PyLong(RFILE * p)792 r_PyLong(RFILE *p)
793 {
794 PyLongObject *ob;
795 long n, size, i;
796 int j, md, shorts_in_top_digit;
797 digit d;
798
799 n = r_long(p);
800 if (PyErr_Occurred())
801 return NULL;
802 if (n == 0)
803 return (PyObject *)_PyLong_New(0);
804 if (n < -SIZE32_MAX || n > SIZE32_MAX) {
805 PyErr_SetString(PyExc_ValueError,
806 "bad marshal data (long size out of range)");
807 return NULL;
808 }
809
810 size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
811 shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
812 ob = _PyLong_New(size);
813 if (ob == NULL)
814 return NULL;
815
816 Py_SIZE(ob) = n > 0 ? size : -size;
817
818 for (i = 0; i < size-1; i++) {
819 d = 0;
820 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
821 md = r_short(p);
822 if (PyErr_Occurred()) {
823 Py_DECREF(ob);
824 return NULL;
825 }
826 if (md < 0 || md > PyLong_MARSHAL_BASE)
827 goto bad_digit;
828 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
829 }
830 ob->ob_digit[i] = d;
831 }
832
833 d = 0;
834 for (j=0; j < shorts_in_top_digit; j++) {
835 md = r_short(p);
836 if (PyErr_Occurred()) {
837 Py_DECREF(ob);
838 return NULL;
839 }
840 if (md < 0 || md > PyLong_MARSHAL_BASE)
841 goto bad_digit;
842 /* topmost marshal digit should be nonzero */
843 if (md == 0 && j == shorts_in_top_digit - 1) {
844 Py_DECREF(ob);
845 PyErr_SetString(PyExc_ValueError,
846 "bad marshal data (unnormalized long data)");
847 return NULL;
848 }
849 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
850 }
851 if (PyErr_Occurred()) {
852 Py_DECREF(ob);
853 return NULL;
854 }
855 /* top digit should be nonzero, else the resulting PyLong won't be
856 normalized */
857 ob->ob_digit[size-1] = d;
858 return (PyObject *)ob;
859 bad_digit:
860 Py_DECREF(ob);
861 PyErr_SetString(PyExc_ValueError,
862 "bad marshal data (digit out of range in long)");
863 return NULL;
864 }
865
866 static double
r_float_bin(RFILE * p)867 r_float_bin(RFILE *p)
868 {
869 const unsigned char *buf = (const unsigned char *) r_string(8, p);
870 if (buf == NULL)
871 return -1;
872 return _PyFloat_Unpack8(buf, 1);
873 }
874
875 /* Issue #33720: Disable inlining for reducing the C stack consumption
876 on PGO builds. */
877 _Py_NO_INLINE static double
r_float_str(RFILE * p)878 r_float_str(RFILE *p)
879 {
880 int n;
881 char buf[256];
882 const char *ptr;
883 n = r_byte(p);
884 if (n == EOF) {
885 PyErr_SetString(PyExc_EOFError,
886 "EOF read where object expected");
887 return -1;
888 }
889 ptr = r_string(n, p);
890 if (ptr == NULL) {
891 return -1;
892 }
893 memcpy(buf, ptr, n);
894 buf[n] = '\0';
895 return PyOS_string_to_double(buf, NULL, NULL);
896 }
897
898 /* allocate the reflist index for a new object. Return -1 on failure */
899 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)900 r_ref_reserve(int flag, RFILE *p)
901 {
902 if (flag) { /* currently only FLAG_REF is defined */
903 Py_ssize_t idx = PyList_GET_SIZE(p->refs);
904 if (idx >= 0x7ffffffe) {
905 PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
906 return -1;
907 }
908 if (PyList_Append(p->refs, Py_None) < 0)
909 return -1;
910 return idx;
911 } else
912 return 0;
913 }
914
915 /* insert the new object 'o' to the reflist at previously
916 * allocated index 'idx'.
917 * 'o' can be NULL, in which case nothing is done.
918 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
919 * if 'o' was non-NULL, and the function fails, 'o' is released and
920 * NULL returned. This simplifies error checking at the call site since
921 * a single test for NULL for the function result is enough.
922 */
923 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)924 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
925 {
926 if (o != NULL && flag) { /* currently only FLAG_REF is defined */
927 PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
928 Py_INCREF(o);
929 PyList_SET_ITEM(p->refs, idx, o);
930 Py_DECREF(tmp);
931 }
932 return o;
933 }
934
935 /* combination of both above, used when an object can be
936 * created whenever it is seen in the file, as opposed to
937 * after having loaded its sub-objects.
938 */
939 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)940 r_ref(PyObject *o, int flag, RFILE *p)
941 {
942 assert(flag & FLAG_REF);
943 if (o == NULL)
944 return NULL;
945 if (PyList_Append(p->refs, o) < 0) {
946 Py_DECREF(o); /* release the new object */
947 return NULL;
948 }
949 return o;
950 }
951
952 static PyObject *
r_object(RFILE * p)953 r_object(RFILE *p)
954 {
955 /* NULL is a valid return value, it does not necessarily means that
956 an exception is set. */
957 PyObject *v, *v2;
958 Py_ssize_t idx = 0;
959 long i, n;
960 int type, code = r_byte(p);
961 int flag, is_interned = 0;
962 PyObject *retval = NULL;
963
964 if (code == EOF) {
965 PyErr_SetString(PyExc_EOFError,
966 "EOF read where object expected");
967 return NULL;
968 }
969
970 p->depth++;
971
972 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
973 p->depth--;
974 PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
975 return NULL;
976 }
977
978 flag = code & FLAG_REF;
979 type = code & ~FLAG_REF;
980
981 #define R_REF(O) do{\
982 if (flag) \
983 O = r_ref(O, flag, p);\
984 } while (0)
985
986 switch (type) {
987
988 case TYPE_NULL:
989 break;
990
991 case TYPE_NONE:
992 Py_INCREF(Py_None);
993 retval = Py_None;
994 break;
995
996 case TYPE_STOPITER:
997 Py_INCREF(PyExc_StopIteration);
998 retval = PyExc_StopIteration;
999 break;
1000
1001 case TYPE_ELLIPSIS:
1002 Py_INCREF(Py_Ellipsis);
1003 retval = Py_Ellipsis;
1004 break;
1005
1006 case TYPE_FALSE:
1007 Py_INCREF(Py_False);
1008 retval = Py_False;
1009 break;
1010
1011 case TYPE_TRUE:
1012 Py_INCREF(Py_True);
1013 retval = Py_True;
1014 break;
1015
1016 case TYPE_INT:
1017 n = r_long(p);
1018 retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
1019 R_REF(retval);
1020 break;
1021
1022 case TYPE_INT64:
1023 retval = r_long64(p);
1024 R_REF(retval);
1025 break;
1026
1027 case TYPE_LONG:
1028 retval = r_PyLong(p);
1029 R_REF(retval);
1030 break;
1031
1032 case TYPE_FLOAT:
1033 {
1034 double x = r_float_str(p);
1035 if (x == -1.0 && PyErr_Occurred())
1036 break;
1037 retval = PyFloat_FromDouble(x);
1038 R_REF(retval);
1039 break;
1040 }
1041
1042 case TYPE_BINARY_FLOAT:
1043 {
1044 double x = r_float_bin(p);
1045 if (x == -1.0 && PyErr_Occurred())
1046 break;
1047 retval = PyFloat_FromDouble(x);
1048 R_REF(retval);
1049 break;
1050 }
1051
1052 case TYPE_COMPLEX:
1053 {
1054 Py_complex c;
1055 c.real = r_float_str(p);
1056 if (c.real == -1.0 && PyErr_Occurred())
1057 break;
1058 c.imag = r_float_str(p);
1059 if (c.imag == -1.0 && PyErr_Occurred())
1060 break;
1061 retval = PyComplex_FromCComplex(c);
1062 R_REF(retval);
1063 break;
1064 }
1065
1066 case TYPE_BINARY_COMPLEX:
1067 {
1068 Py_complex c;
1069 c.real = r_float_bin(p);
1070 if (c.real == -1.0 && PyErr_Occurred())
1071 break;
1072 c.imag = r_float_bin(p);
1073 if (c.imag == -1.0 && PyErr_Occurred())
1074 break;
1075 retval = PyComplex_FromCComplex(c);
1076 R_REF(retval);
1077 break;
1078 }
1079
1080 case TYPE_STRING:
1081 {
1082 const char *ptr;
1083 n = r_long(p);
1084 if (PyErr_Occurred())
1085 break;
1086 if (n < 0 || n > SIZE32_MAX) {
1087 PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1088 break;
1089 }
1090 v = PyBytes_FromStringAndSize((char *)NULL, n);
1091 if (v == NULL)
1092 break;
1093 ptr = r_string(n, p);
1094 if (ptr == NULL) {
1095 Py_DECREF(v);
1096 break;
1097 }
1098 memcpy(PyBytes_AS_STRING(v), ptr, n);
1099 retval = v;
1100 R_REF(retval);
1101 break;
1102 }
1103
1104 case TYPE_ASCII_INTERNED:
1105 is_interned = 1;
1106 /* fall through */
1107 case TYPE_ASCII:
1108 n = r_long(p);
1109 if (PyErr_Occurred())
1110 break;
1111 if (n < 0 || n > SIZE32_MAX) {
1112 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1113 break;
1114 }
1115 goto _read_ascii;
1116
1117 case TYPE_SHORT_ASCII_INTERNED:
1118 is_interned = 1;
1119 /* fall through */
1120 case TYPE_SHORT_ASCII:
1121 n = r_byte(p);
1122 if (n == EOF) {
1123 PyErr_SetString(PyExc_EOFError,
1124 "EOF read where object expected");
1125 break;
1126 }
1127 _read_ascii:
1128 {
1129 const char *ptr;
1130 ptr = r_string(n, p);
1131 if (ptr == NULL)
1132 break;
1133 v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1134 if (v == NULL)
1135 break;
1136 if (is_interned)
1137 PyUnicode_InternInPlace(&v);
1138 retval = v;
1139 R_REF(retval);
1140 break;
1141 }
1142
1143 case TYPE_INTERNED:
1144 is_interned = 1;
1145 /* fall through */
1146 case TYPE_UNICODE:
1147 {
1148 const char *buffer;
1149
1150 n = r_long(p);
1151 if (PyErr_Occurred())
1152 break;
1153 if (n < 0 || n > SIZE32_MAX) {
1154 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1155 break;
1156 }
1157 if (n != 0) {
1158 buffer = r_string(n, p);
1159 if (buffer == NULL)
1160 break;
1161 v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1162 }
1163 else {
1164 v = PyUnicode_New(0, 0);
1165 }
1166 if (v == NULL)
1167 break;
1168 if (is_interned)
1169 PyUnicode_InternInPlace(&v);
1170 retval = v;
1171 R_REF(retval);
1172 break;
1173 }
1174
1175 case TYPE_SMALL_TUPLE:
1176 n = (unsigned char) r_byte(p);
1177 if (PyErr_Occurred())
1178 break;
1179 goto _read_tuple;
1180 case TYPE_TUPLE:
1181 n = r_long(p);
1182 if (PyErr_Occurred())
1183 break;
1184 if (n < 0 || n > SIZE32_MAX) {
1185 PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1186 break;
1187 }
1188 _read_tuple:
1189 v = PyTuple_New(n);
1190 R_REF(v);
1191 if (v == NULL)
1192 break;
1193
1194 for (i = 0; i < n; i++) {
1195 v2 = r_object(p);
1196 if ( v2 == NULL ) {
1197 if (!PyErr_Occurred())
1198 PyErr_SetString(PyExc_TypeError,
1199 "NULL object in marshal data for tuple");
1200 Py_DECREF(v);
1201 v = NULL;
1202 break;
1203 }
1204 PyTuple_SET_ITEM(v, i, v2);
1205 }
1206 retval = v;
1207 break;
1208
1209 case TYPE_LIST:
1210 n = r_long(p);
1211 if (PyErr_Occurred())
1212 break;
1213 if (n < 0 || n > SIZE32_MAX) {
1214 PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1215 break;
1216 }
1217 v = PyList_New(n);
1218 R_REF(v);
1219 if (v == NULL)
1220 break;
1221 for (i = 0; i < n; i++) {
1222 v2 = r_object(p);
1223 if ( v2 == NULL ) {
1224 if (!PyErr_Occurred())
1225 PyErr_SetString(PyExc_TypeError,
1226 "NULL object in marshal data for list");
1227 Py_DECREF(v);
1228 v = NULL;
1229 break;
1230 }
1231 PyList_SET_ITEM(v, i, v2);
1232 }
1233 retval = v;
1234 break;
1235
1236 case TYPE_DICT:
1237 v = PyDict_New();
1238 R_REF(v);
1239 if (v == NULL)
1240 break;
1241 for (;;) {
1242 PyObject *key, *val;
1243 key = r_object(p);
1244 if (key == NULL)
1245 break;
1246 val = r_object(p);
1247 if (val == NULL) {
1248 Py_DECREF(key);
1249 break;
1250 }
1251 if (PyDict_SetItem(v, key, val) < 0) {
1252 Py_DECREF(key);
1253 Py_DECREF(val);
1254 break;
1255 }
1256 Py_DECREF(key);
1257 Py_DECREF(val);
1258 }
1259 if (PyErr_Occurred()) {
1260 Py_DECREF(v);
1261 v = NULL;
1262 }
1263 retval = v;
1264 break;
1265
1266 case TYPE_SET:
1267 case TYPE_FROZENSET:
1268 n = r_long(p);
1269 if (PyErr_Occurred())
1270 break;
1271 if (n < 0 || n > SIZE32_MAX) {
1272 PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1273 break;
1274 }
1275
1276 if (n == 0 && type == TYPE_FROZENSET) {
1277 /* call frozenset() to get the empty frozenset singleton */
1278 v = _PyObject_CallNoArg((PyObject*)&PyFrozenSet_Type);
1279 if (v == NULL)
1280 break;
1281 R_REF(v);
1282 retval = v;
1283 }
1284 else {
1285 v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1286 if (type == TYPE_SET) {
1287 R_REF(v);
1288 } else {
1289 /* must use delayed registration of frozensets because they must
1290 * be init with a refcount of 1
1291 */
1292 idx = r_ref_reserve(flag, p);
1293 if (idx < 0)
1294 Py_CLEAR(v); /* signal error */
1295 }
1296 if (v == NULL)
1297 break;
1298
1299 for (i = 0; i < n; i++) {
1300 v2 = r_object(p);
1301 if ( v2 == NULL ) {
1302 if (!PyErr_Occurred())
1303 PyErr_SetString(PyExc_TypeError,
1304 "NULL object in marshal data for set");
1305 Py_DECREF(v);
1306 v = NULL;
1307 break;
1308 }
1309 if (PySet_Add(v, v2) == -1) {
1310 Py_DECREF(v);
1311 Py_DECREF(v2);
1312 v = NULL;
1313 break;
1314 }
1315 Py_DECREF(v2);
1316 }
1317 if (type != TYPE_SET)
1318 v = r_ref_insert(v, idx, flag, p);
1319 retval = v;
1320 }
1321 break;
1322
1323 case TYPE_CODE:
1324 {
1325 int argcount;
1326 int posonlyargcount;
1327 int kwonlyargcount;
1328 int nlocals;
1329 int stacksize;
1330 int flags;
1331 PyObject *code = NULL;
1332 PyObject *consts = NULL;
1333 PyObject *names = NULL;
1334 PyObject *varnames = NULL;
1335 PyObject *freevars = NULL;
1336 PyObject *cellvars = NULL;
1337 PyObject *filename = NULL;
1338 PyObject *name = NULL;
1339 int firstlineno;
1340 PyObject *lnotab = NULL;
1341
1342 idx = r_ref_reserve(flag, p);
1343 if (idx < 0)
1344 break;
1345
1346 v = NULL;
1347
1348 /* XXX ignore long->int overflows for now */
1349 argcount = (int)r_long(p);
1350 if (PyErr_Occurred())
1351 goto code_error;
1352 posonlyargcount = (int)r_long(p);
1353 if (PyErr_Occurred()) {
1354 goto code_error;
1355 }
1356 kwonlyargcount = (int)r_long(p);
1357 if (PyErr_Occurred())
1358 goto code_error;
1359 nlocals = (int)r_long(p);
1360 if (PyErr_Occurred())
1361 goto code_error;
1362 stacksize = (int)r_long(p);
1363 if (PyErr_Occurred())
1364 goto code_error;
1365 flags = (int)r_long(p);
1366 if (PyErr_Occurred())
1367 goto code_error;
1368 code = r_object(p);
1369 if (code == NULL)
1370 goto code_error;
1371 consts = r_object(p);
1372 if (consts == NULL)
1373 goto code_error;
1374 names = r_object(p);
1375 if (names == NULL)
1376 goto code_error;
1377 varnames = r_object(p);
1378 if (varnames == NULL)
1379 goto code_error;
1380 freevars = r_object(p);
1381 if (freevars == NULL)
1382 goto code_error;
1383 cellvars = r_object(p);
1384 if (cellvars == NULL)
1385 goto code_error;
1386 filename = r_object(p);
1387 if (filename == NULL)
1388 goto code_error;
1389 name = r_object(p);
1390 if (name == NULL)
1391 goto code_error;
1392 firstlineno = (int)r_long(p);
1393 if (firstlineno == -1 && PyErr_Occurred())
1394 break;
1395 lnotab = r_object(p);
1396 if (lnotab == NULL)
1397 goto code_error;
1398
1399 v = (PyObject *) PyCode_NewWithPosOnlyArgs(
1400 argcount, posonlyargcount, kwonlyargcount,
1401 nlocals, stacksize, flags,
1402 code, consts, names, varnames,
1403 freevars, cellvars, filename, name,
1404 firstlineno, lnotab);
1405 v = r_ref_insert(v, idx, flag, p);
1406
1407 code_error:
1408 Py_XDECREF(code);
1409 Py_XDECREF(consts);
1410 Py_XDECREF(names);
1411 Py_XDECREF(varnames);
1412 Py_XDECREF(freevars);
1413 Py_XDECREF(cellvars);
1414 Py_XDECREF(filename);
1415 Py_XDECREF(name);
1416 Py_XDECREF(lnotab);
1417 }
1418 retval = v;
1419 break;
1420
1421 case TYPE_REF:
1422 n = r_long(p);
1423 if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1424 if (n == -1 && PyErr_Occurred())
1425 break;
1426 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1427 break;
1428 }
1429 v = PyList_GET_ITEM(p->refs, n);
1430 if (v == Py_None) {
1431 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1432 break;
1433 }
1434 Py_INCREF(v);
1435 retval = v;
1436 break;
1437
1438 default:
1439 /* Bogus data got written, which isn't ideal.
1440 This will let you keep working and recover. */
1441 PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1442 break;
1443
1444 }
1445 p->depth--;
1446 return retval;
1447 }
1448
1449 static PyObject *
read_object(RFILE * p)1450 read_object(RFILE *p)
1451 {
1452 PyObject *v;
1453 if (PyErr_Occurred()) {
1454 fprintf(stderr, "XXX readobject called with exception set\n");
1455 return NULL;
1456 }
1457 v = r_object(p);
1458 if (v == NULL && !PyErr_Occurred())
1459 PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1460 return v;
1461 }
1462
1463 int
PyMarshal_ReadShortFromFile(FILE * fp)1464 PyMarshal_ReadShortFromFile(FILE *fp)
1465 {
1466 RFILE rf;
1467 int res;
1468 assert(fp);
1469 rf.readable = NULL;
1470 rf.fp = fp;
1471 rf.end = rf.ptr = NULL;
1472 rf.buf = NULL;
1473 res = r_short(&rf);
1474 if (rf.buf != NULL)
1475 PyMem_FREE(rf.buf);
1476 return res;
1477 }
1478
1479 long
PyMarshal_ReadLongFromFile(FILE * fp)1480 PyMarshal_ReadLongFromFile(FILE *fp)
1481 {
1482 RFILE rf;
1483 long res;
1484 rf.fp = fp;
1485 rf.readable = NULL;
1486 rf.ptr = rf.end = NULL;
1487 rf.buf = NULL;
1488 res = r_long(&rf);
1489 if (rf.buf != NULL)
1490 PyMem_FREE(rf.buf);
1491 return res;
1492 }
1493
1494 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1495 static off_t
getfilesize(FILE * fp)1496 getfilesize(FILE *fp)
1497 {
1498 struct _Py_stat_struct st;
1499 if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1500 return -1;
1501 #if SIZEOF_OFF_T == 4
1502 else if (st.st_size >= INT_MAX)
1503 return (off_t)INT_MAX;
1504 #endif
1505 else
1506 return (off_t)st.st_size;
1507 }
1508
1509 /* If we can get the size of the file up-front, and it's reasonably small,
1510 * read it in one gulp and delegate to ...FromString() instead. Much quicker
1511 * than reading a byte at a time from file; speeds .pyc imports.
1512 * CAUTION: since this may read the entire remainder of the file, don't
1513 * call it unless you know you're done with the file.
1514 */
1515 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1516 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1517 {
1518 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1519 #define REASONABLE_FILE_LIMIT (1L << 18)
1520 off_t filesize;
1521 filesize = getfilesize(fp);
1522 if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1523 char* pBuf = (char *)PyMem_MALLOC(filesize);
1524 if (pBuf != NULL) {
1525 size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1526 PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1527 PyMem_FREE(pBuf);
1528 return v;
1529 }
1530
1531 }
1532 /* We don't have fstat, or we do but the file is larger than
1533 * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1534 */
1535 return PyMarshal_ReadObjectFromFile(fp);
1536
1537 #undef REASONABLE_FILE_LIMIT
1538 }
1539
1540 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1541 PyMarshal_ReadObjectFromFile(FILE *fp)
1542 {
1543 RFILE rf;
1544 PyObject *result;
1545 rf.fp = fp;
1546 rf.readable = NULL;
1547 rf.depth = 0;
1548 rf.ptr = rf.end = NULL;
1549 rf.buf = NULL;
1550 rf.refs = PyList_New(0);
1551 if (rf.refs == NULL)
1552 return NULL;
1553 result = r_object(&rf);
1554 Py_DECREF(rf.refs);
1555 if (rf.buf != NULL)
1556 PyMem_FREE(rf.buf);
1557 return result;
1558 }
1559
1560 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1561 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1562 {
1563 RFILE rf;
1564 PyObject *result;
1565 rf.fp = NULL;
1566 rf.readable = NULL;
1567 rf.ptr = (char *)str;
1568 rf.end = (char *)str + len;
1569 rf.buf = NULL;
1570 rf.depth = 0;
1571 rf.refs = PyList_New(0);
1572 if (rf.refs == NULL)
1573 return NULL;
1574 result = r_object(&rf);
1575 Py_DECREF(rf.refs);
1576 if (rf.buf != NULL)
1577 PyMem_FREE(rf.buf);
1578 return result;
1579 }
1580
1581 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1582 PyMarshal_WriteObjectToString(PyObject *x, int version)
1583 {
1584 WFILE wf;
1585
1586 memset(&wf, 0, sizeof(wf));
1587 wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1588 if (wf.str == NULL)
1589 return NULL;
1590 wf.ptr = wf.buf = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1591 wf.end = wf.ptr + PyBytes_Size(wf.str);
1592 wf.error = WFERR_OK;
1593 wf.version = version;
1594 if (w_init_refs(&wf, version)) {
1595 Py_DECREF(wf.str);
1596 return NULL;
1597 }
1598 w_object(x, &wf);
1599 w_clear_refs(&wf);
1600 if (wf.str != NULL) {
1601 char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1602 if (wf.ptr - base > PY_SSIZE_T_MAX) {
1603 Py_DECREF(wf.str);
1604 PyErr_SetString(PyExc_OverflowError,
1605 "too much marshal data for a bytes object");
1606 return NULL;
1607 }
1608 if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1609 return NULL;
1610 }
1611 if (wf.error != WFERR_OK) {
1612 Py_XDECREF(wf.str);
1613 if (wf.error == WFERR_NOMEMORY)
1614 PyErr_NoMemory();
1615 else
1616 PyErr_SetString(PyExc_ValueError,
1617 (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1618 :"object too deeply nested to marshal");
1619 return NULL;
1620 }
1621 return wf.str;
1622 }
1623
1624 /* And an interface for Python programs... */
1625 /*[clinic input]
1626 marshal.dump
1627
1628 value: object
1629 Must be a supported type.
1630 file: object
1631 Must be a writeable binary file.
1632 version: int(c_default="Py_MARSHAL_VERSION") = version
1633 Indicates the data format that dump should use.
1634 /
1635
1636 Write the value on the open file.
1637
1638 If the value has (or contains an object that has) an unsupported type, a
1639 ValueError exception is raised - but garbage data will also be written
1640 to the file. The object will not be properly read back by load().
1641 [clinic start generated code]*/
1642
1643 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version)1644 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1645 int version)
1646 /*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
1647 {
1648 /* XXX Quick hack -- need to do this differently */
1649 PyObject *s;
1650 PyObject *res;
1651 _Py_IDENTIFIER(write);
1652
1653 s = PyMarshal_WriteObjectToString(value, version);
1654 if (s == NULL)
1655 return NULL;
1656 res = _PyObject_CallMethodIdObjArgs(file, &PyId_write, s, NULL);
1657 Py_DECREF(s);
1658 return res;
1659 }
1660
1661 /*[clinic input]
1662 marshal.load
1663
1664 file: object
1665 Must be readable binary file.
1666 /
1667
1668 Read one value from the open file and return it.
1669
1670 If no valid value is read (e.g. because the data has a different Python
1671 version's incompatible marshal format), raise EOFError, ValueError or
1672 TypeError.
1673
1674 Note: If an object containing an unsupported type was marshalled with
1675 dump(), load() will substitute None for the unmarshallable type.
1676 [clinic start generated code]*/
1677
1678 static PyObject *
marshal_load(PyObject * module,PyObject * file)1679 marshal_load(PyObject *module, PyObject *file)
1680 /*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/
1681 {
1682 PyObject *data, *result;
1683 _Py_IDENTIFIER(read);
1684 RFILE rf;
1685
1686 /*
1687 * Make a call to the read method, but read zero bytes.
1688 * This is to ensure that the object passed in at least
1689 * has a read method which returns bytes.
1690 * This can be removed if we guarantee good error handling
1691 * for r_string()
1692 */
1693 data = _PyObject_CallMethodId(file, &PyId_read, "i", 0);
1694 if (data == NULL)
1695 return NULL;
1696 if (!PyBytes_Check(data)) {
1697 PyErr_Format(PyExc_TypeError,
1698 "file.read() returned not bytes but %.100s",
1699 data->ob_type->tp_name);
1700 result = NULL;
1701 }
1702 else {
1703 rf.depth = 0;
1704 rf.fp = NULL;
1705 rf.readable = file;
1706 rf.ptr = rf.end = NULL;
1707 rf.buf = NULL;
1708 if ((rf.refs = PyList_New(0)) != NULL) {
1709 result = read_object(&rf);
1710 Py_DECREF(rf.refs);
1711 if (rf.buf != NULL)
1712 PyMem_FREE(rf.buf);
1713 } else
1714 result = NULL;
1715 }
1716 Py_DECREF(data);
1717 return result;
1718 }
1719
1720 /*[clinic input]
1721 marshal.dumps
1722
1723 value: object
1724 Must be a supported type.
1725 version: int(c_default="Py_MARSHAL_VERSION") = version
1726 Indicates the data format that dumps should use.
1727 /
1728
1729 Return the bytes object that would be written to a file by dump(value, file).
1730
1731 Raise a ValueError exception if value has (or contains an object that has) an
1732 unsupported type.
1733 [clinic start generated code]*/
1734
1735 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version)1736 marshal_dumps_impl(PyObject *module, PyObject *value, int version)
1737 /*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
1738 {
1739 return PyMarshal_WriteObjectToString(value, version);
1740 }
1741
1742 /*[clinic input]
1743 marshal.loads
1744
1745 bytes: Py_buffer
1746 /
1747
1748 Convert the bytes-like object to a value.
1749
1750 If no valid value is found, raise EOFError, ValueError or TypeError. Extra
1751 bytes in the input are ignored.
1752 [clinic start generated code]*/
1753
1754 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes)1755 marshal_loads_impl(PyObject *module, Py_buffer *bytes)
1756 /*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/
1757 {
1758 RFILE rf;
1759 char *s = bytes->buf;
1760 Py_ssize_t n = bytes->len;
1761 PyObject* result;
1762 rf.fp = NULL;
1763 rf.readable = NULL;
1764 rf.ptr = s;
1765 rf.end = s + n;
1766 rf.depth = 0;
1767 if ((rf.refs = PyList_New(0)) == NULL)
1768 return NULL;
1769 result = read_object(&rf);
1770 Py_DECREF(rf.refs);
1771 return result;
1772 }
1773
1774 static PyMethodDef marshal_methods[] = {
1775 MARSHAL_DUMP_METHODDEF
1776 MARSHAL_LOAD_METHODDEF
1777 MARSHAL_DUMPS_METHODDEF
1778 MARSHAL_LOADS_METHODDEF
1779 {NULL, NULL} /* sentinel */
1780 };
1781
1782
1783 PyDoc_STRVAR(module_doc,
1784 "This module contains functions that can read and write Python values in\n\
1785 a binary format. The format is specific to Python, but independent of\n\
1786 machine architecture issues.\n\
1787 \n\
1788 Not all Python object types are supported; in general, only objects\n\
1789 whose value is independent from a particular invocation of Python can be\n\
1790 written and read by this module. The following types are supported:\n\
1791 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1792 tuples, lists, sets, dictionaries, and code objects, where it\n\
1793 should be understood that tuples, lists and dictionaries are only\n\
1794 supported as long as the values contained therein are themselves\n\
1795 supported; and recursive lists and dictionaries should not be written\n\
1796 (they will cause infinite loops).\n\
1797 \n\
1798 Variables:\n\
1799 \n\
1800 version -- indicates the format that the module uses. Version 0 is the\n\
1801 historical format, version 1 shares interned strings and version 2\n\
1802 uses a binary format for floating point numbers.\n\
1803 Version 3 shares common object references (New in version 3.4).\n\
1804 \n\
1805 Functions:\n\
1806 \n\
1807 dump() -- write value to a file\n\
1808 load() -- read value from a file\n\
1809 dumps() -- marshal value as a bytes object\n\
1810 loads() -- read value from a bytes-like object");
1811
1812
1813
1814 static struct PyModuleDef marshalmodule = {
1815 PyModuleDef_HEAD_INIT,
1816 "marshal",
1817 module_doc,
1818 0,
1819 marshal_methods,
1820 NULL,
1821 NULL,
1822 NULL,
1823 NULL
1824 };
1825
1826 PyMODINIT_FUNC
PyMarshal_Init(void)1827 PyMarshal_Init(void)
1828 {
1829 PyObject *mod = PyModule_Create(&marshalmodule);
1830 if (mod == NULL)
1831 return NULL;
1832 if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
1833 Py_DECREF(mod);
1834 return NULL;
1835 }
1836 return mod;
1837 }
1838