• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* Write Python objects to files and read them back.
3    This is primarily intended for writing and reading compiled Python code,
4    even though dicts, lists, sets and frozensets, not commonly seen in
5    code objects, are supported.
6    Version 3 of this protocol properly supports circular links
7    and sharing. */
8 
9 #define PY_SSIZE_T_CLEAN
10 
11 #include "Python.h"
12 #include "longintrepr.h"
13 #include "code.h"
14 #include "marshal.h"
15 #include "pycore_hashtable.h"
16 
17 /*[clinic input]
18 module marshal
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
21 
22 #include "clinic/marshal.c.h"
23 
24 /* High water mark to determine when the marshalled object is dangerously deep
25  * and risks coring the interpreter.  When the object stack gets this deep,
26  * raise an exception instead of continuing.
27  * On Windows debug builds, reduce this value.
28  *
29  * BUG: https://bugs.python.org/issue33720
30  * On Windows PGO builds, the r_object function overallocates its stack and
31  * can cause a stack overflow. We reduce the maximum depth for all Windows
32  * releases to protect against this.
33  * #if defined(MS_WINDOWS) && defined(_DEBUG)
34  */
35 #if defined(MS_WINDOWS)
36 #define MAX_MARSHAL_STACK_DEPTH 1000
37 #else
38 #define MAX_MARSHAL_STACK_DEPTH 2000
39 #endif
40 
41 #define TYPE_NULL               '0'
42 #define TYPE_NONE               'N'
43 #define TYPE_FALSE              'F'
44 #define TYPE_TRUE               'T'
45 #define TYPE_STOPITER           'S'
46 #define TYPE_ELLIPSIS           '.'
47 #define TYPE_INT                'i'
48 /* TYPE_INT64 is not generated anymore.
49    Supported for backward compatibility only. */
50 #define TYPE_INT64              'I'
51 #define TYPE_FLOAT              'f'
52 #define TYPE_BINARY_FLOAT       'g'
53 #define TYPE_COMPLEX            'x'
54 #define TYPE_BINARY_COMPLEX     'y'
55 #define TYPE_LONG               'l'
56 #define TYPE_STRING             's'
57 #define TYPE_INTERNED           't'
58 #define TYPE_REF                'r'
59 #define TYPE_TUPLE              '('
60 #define TYPE_LIST               '['
61 #define TYPE_DICT               '{'
62 #define TYPE_CODE               'c'
63 #define TYPE_UNICODE            'u'
64 #define TYPE_UNKNOWN            '?'
65 #define TYPE_SET                '<'
66 #define TYPE_FROZENSET          '>'
67 #define FLAG_REF                '\x80' /* with a type, add obj to index */
68 
69 #define TYPE_ASCII              'a'
70 #define TYPE_ASCII_INTERNED     'A'
71 #define TYPE_SMALL_TUPLE        ')'
72 #define TYPE_SHORT_ASCII        'z'
73 #define TYPE_SHORT_ASCII_INTERNED 'Z'
74 
75 #define WFERR_OK 0
76 #define WFERR_UNMARSHALLABLE 1
77 #define WFERR_NESTEDTOODEEP 2
78 #define WFERR_NOMEMORY 3
79 
80 typedef struct {
81     FILE *fp;
82     int error;  /* see WFERR_* values */
83     int depth;
84     PyObject *str;
85     char *ptr;
86     const char *end;
87     char *buf;
88     _Py_hashtable_t *hashtable;
89     int version;
90 } WFILE;
91 
92 #define w_byte(c, p) do {                               \
93         if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
94             *(p)->ptr++ = (c);                          \
95     } while(0)
96 
97 static void
w_flush(WFILE * p)98 w_flush(WFILE *p)
99 {
100     assert(p->fp != NULL);
101     fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
102     p->ptr = p->buf;
103 }
104 
105 static int
w_reserve(WFILE * p,Py_ssize_t needed)106 w_reserve(WFILE *p, Py_ssize_t needed)
107 {
108     Py_ssize_t pos, size, delta;
109     if (p->ptr == NULL)
110         return 0; /* An error already occurred */
111     if (p->fp != NULL) {
112         w_flush(p);
113         return needed <= p->end - p->ptr;
114     }
115     assert(p->str != NULL);
116     pos = p->ptr - p->buf;
117     size = PyBytes_GET_SIZE(p->str);
118     if (size > 16*1024*1024)
119         delta = (size >> 3);            /* 12.5% overallocation */
120     else
121         delta = size + 1024;
122     delta = Py_MAX(delta, needed);
123     if (delta > PY_SSIZE_T_MAX - size) {
124         p->error = WFERR_NOMEMORY;
125         return 0;
126     }
127     size += delta;
128     if (_PyBytes_Resize(&p->str, size) != 0) {
129         p->end = p->ptr = p->buf = NULL;
130         return 0;
131     }
132     else {
133         p->buf = PyBytes_AS_STRING(p->str);
134         p->ptr = p->buf + pos;
135         p->end = p->buf + size;
136         return 1;
137     }
138 }
139 
140 static void
w_string(const void * s,Py_ssize_t n,WFILE * p)141 w_string(const void *s, Py_ssize_t n, WFILE *p)
142 {
143     Py_ssize_t m;
144     if (!n || p->ptr == NULL)
145         return;
146     m = p->end - p->ptr;
147     if (p->fp != NULL) {
148         if (n <= m) {
149             memcpy(p->ptr, s, n);
150             p->ptr += n;
151         }
152         else {
153             w_flush(p);
154             fwrite(s, 1, n, p->fp);
155         }
156     }
157     else {
158         if (n <= m || w_reserve(p, n - m)) {
159             memcpy(p->ptr, s, n);
160             p->ptr += n;
161         }
162     }
163 }
164 
165 static void
w_short(int x,WFILE * p)166 w_short(int x, WFILE *p)
167 {
168     w_byte((char)( x      & 0xff), p);
169     w_byte((char)((x>> 8) & 0xff), p);
170 }
171 
172 static void
w_long(long x,WFILE * p)173 w_long(long x, WFILE *p)
174 {
175     w_byte((char)( x      & 0xff), p);
176     w_byte((char)((x>> 8) & 0xff), p);
177     w_byte((char)((x>>16) & 0xff), p);
178     w_byte((char)((x>>24) & 0xff), p);
179 }
180 
181 #define SIZE32_MAX  0x7FFFFFFF
182 
183 #if SIZEOF_SIZE_T > 4
184 # define W_SIZE(n, p)  do {                     \
185         if ((n) > SIZE32_MAX) {                 \
186             (p)->depth--;                       \
187             (p)->error = WFERR_UNMARSHALLABLE;  \
188             return;                             \
189         }                                       \
190         w_long((long)(n), p);                   \
191     } while(0)
192 #else
193 # define W_SIZE  w_long
194 #endif
195 
196 static void
w_pstring(const void * s,Py_ssize_t n,WFILE * p)197 w_pstring(const void *s, Py_ssize_t n, WFILE *p)
198 {
199         W_SIZE(n, p);
200         w_string(s, n, p);
201 }
202 
203 static void
w_short_pstring(const void * s,Py_ssize_t n,WFILE * p)204 w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
205 {
206     w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
207     w_string(s, n, p);
208 }
209 
210 /* We assume that Python ints are stored internally in base some power of
211    2**15; for the sake of portability we'll always read and write them in base
212    exactly 2**15. */
213 
214 #define PyLong_MARSHAL_SHIFT 15
215 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
216 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
217 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
218 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
219 #endif
220 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
221 
222 #define W_TYPE(t, p) do { \
223     w_byte((t) | flag, (p)); \
224 } while(0)
225 
226 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)227 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
228 {
229     Py_ssize_t i, j, n, l;
230     digit d;
231 
232     W_TYPE(TYPE_LONG, p);
233     if (Py_SIZE(ob) == 0) {
234         w_long((long)0, p);
235         return;
236     }
237 
238     /* set l to number of base PyLong_MARSHAL_BASE digits */
239     n = Py_ABS(Py_SIZE(ob));
240     l = (n-1) * PyLong_MARSHAL_RATIO;
241     d = ob->ob_digit[n-1];
242     assert(d != 0); /* a PyLong is always normalized */
243     do {
244         d >>= PyLong_MARSHAL_SHIFT;
245         l++;
246     } while (d != 0);
247     if (l > SIZE32_MAX) {
248         p->depth--;
249         p->error = WFERR_UNMARSHALLABLE;
250         return;
251     }
252     w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
253 
254     for (i=0; i < n-1; i++) {
255         d = ob->ob_digit[i];
256         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
257             w_short(d & PyLong_MARSHAL_MASK, p);
258             d >>= PyLong_MARSHAL_SHIFT;
259         }
260         assert (d == 0);
261     }
262     d = ob->ob_digit[n-1];
263     do {
264         w_short(d & PyLong_MARSHAL_MASK, p);
265         d >>= PyLong_MARSHAL_SHIFT;
266     } while (d != 0);
267 }
268 
269 static void
w_float_bin(double v,WFILE * p)270 w_float_bin(double v, WFILE *p)
271 {
272     unsigned char buf[8];
273     if (_PyFloat_Pack8(v, buf, 1) < 0) {
274         p->error = WFERR_UNMARSHALLABLE;
275         return;
276     }
277     w_string(buf, 8, p);
278 }
279 
280 static void
w_float_str(double v,WFILE * p)281 w_float_str(double v, WFILE *p)
282 {
283     char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
284     if (!buf) {
285         p->error = WFERR_NOMEMORY;
286         return;
287     }
288     w_short_pstring(buf, strlen(buf), p);
289     PyMem_Free(buf);
290 }
291 
292 static int
w_ref(PyObject * v,char * flag,WFILE * p)293 w_ref(PyObject *v, char *flag, WFILE *p)
294 {
295     _Py_hashtable_entry_t *entry;
296     int w;
297 
298     if (p->version < 3 || p->hashtable == NULL)
299         return 0; /* not writing object references */
300 
301     /* if it has only one reference, it definitely isn't shared */
302     if (Py_REFCNT(v) == 1)
303         return 0;
304 
305     entry = _Py_hashtable_get_entry(p->hashtable, v);
306     if (entry != NULL) {
307         /* write the reference index to the stream */
308         w = (int)(uintptr_t)entry->value;
309         /* we don't store "long" indices in the dict */
310         assert(0 <= w && w <= 0x7fffffff);
311         w_byte(TYPE_REF, p);
312         w_long(w, p);
313         return 1;
314     } else {
315         size_t s = p->hashtable->nentries;
316         /* we don't support long indices */
317         if (s >= 0x7fffffff) {
318             PyErr_SetString(PyExc_ValueError, "too many objects");
319             goto err;
320         }
321         w = (int)s;
322         Py_INCREF(v);
323         if (_Py_hashtable_set(p->hashtable, v, (void *)(uintptr_t)w) < 0) {
324             Py_DECREF(v);
325             goto err;
326         }
327         *flag |= FLAG_REF;
328         return 0;
329     }
330 err:
331     p->error = WFERR_UNMARSHALLABLE;
332     return 1;
333 }
334 
335 static void
336 w_complex_object(PyObject *v, char flag, WFILE *p);
337 
338 static void
w_object(PyObject * v,WFILE * p)339 w_object(PyObject *v, WFILE *p)
340 {
341     char flag = '\0';
342 
343     p->depth++;
344 
345     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
346         p->error = WFERR_NESTEDTOODEEP;
347     }
348     else if (v == NULL) {
349         w_byte(TYPE_NULL, p);
350     }
351     else if (v == Py_None) {
352         w_byte(TYPE_NONE, p);
353     }
354     else if (v == PyExc_StopIteration) {
355         w_byte(TYPE_STOPITER, p);
356     }
357     else if (v == Py_Ellipsis) {
358         w_byte(TYPE_ELLIPSIS, p);
359     }
360     else if (v == Py_False) {
361         w_byte(TYPE_FALSE, p);
362     }
363     else if (v == Py_True) {
364         w_byte(TYPE_TRUE, p);
365     }
366     else if (!w_ref(v, &flag, p))
367         w_complex_object(v, flag, p);
368 
369     p->depth--;
370 }
371 
372 static void
w_complex_object(PyObject * v,char flag,WFILE * p)373 w_complex_object(PyObject *v, char flag, WFILE *p)
374 {
375     Py_ssize_t i, n;
376 
377     if (PyLong_CheckExact(v)) {
378         int overflow;
379         long x = PyLong_AsLongAndOverflow(v, &overflow);
380         if (overflow) {
381             w_PyLong((PyLongObject *)v, flag, p);
382         }
383         else {
384 #if SIZEOF_LONG > 4
385             long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
386             if (y && y != -1) {
387                 /* Too large for TYPE_INT */
388                 w_PyLong((PyLongObject*)v, flag, p);
389             }
390             else
391 #endif
392             {
393                 W_TYPE(TYPE_INT, p);
394                 w_long(x, p);
395             }
396         }
397     }
398     else if (PyFloat_CheckExact(v)) {
399         if (p->version > 1) {
400             W_TYPE(TYPE_BINARY_FLOAT, p);
401             w_float_bin(PyFloat_AS_DOUBLE(v), p);
402         }
403         else {
404             W_TYPE(TYPE_FLOAT, p);
405             w_float_str(PyFloat_AS_DOUBLE(v), p);
406         }
407     }
408     else if (PyComplex_CheckExact(v)) {
409         if (p->version > 1) {
410             W_TYPE(TYPE_BINARY_COMPLEX, p);
411             w_float_bin(PyComplex_RealAsDouble(v), p);
412             w_float_bin(PyComplex_ImagAsDouble(v), p);
413         }
414         else {
415             W_TYPE(TYPE_COMPLEX, p);
416             w_float_str(PyComplex_RealAsDouble(v), p);
417             w_float_str(PyComplex_ImagAsDouble(v), p);
418         }
419     }
420     else if (PyBytes_CheckExact(v)) {
421         W_TYPE(TYPE_STRING, p);
422         w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
423     }
424     else if (PyUnicode_CheckExact(v)) {
425         if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
426             int is_short = PyUnicode_GET_LENGTH(v) < 256;
427             if (is_short) {
428                 if (PyUnicode_CHECK_INTERNED(v))
429                     W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
430                 else
431                     W_TYPE(TYPE_SHORT_ASCII, p);
432                 w_short_pstring(PyUnicode_1BYTE_DATA(v),
433                                 PyUnicode_GET_LENGTH(v), p);
434             }
435             else {
436                 if (PyUnicode_CHECK_INTERNED(v))
437                     W_TYPE(TYPE_ASCII_INTERNED, p);
438                 else
439                     W_TYPE(TYPE_ASCII, p);
440                 w_pstring(PyUnicode_1BYTE_DATA(v),
441                           PyUnicode_GET_LENGTH(v), p);
442             }
443         }
444         else {
445             PyObject *utf8;
446             utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
447             if (utf8 == NULL) {
448                 p->depth--;
449                 p->error = WFERR_UNMARSHALLABLE;
450                 return;
451             }
452             if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
453                 W_TYPE(TYPE_INTERNED, p);
454             else
455                 W_TYPE(TYPE_UNICODE, p);
456             w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
457             Py_DECREF(utf8);
458         }
459     }
460     else if (PyTuple_CheckExact(v)) {
461         n = PyTuple_GET_SIZE(v);
462         if (p->version >= 4 && n < 256) {
463             W_TYPE(TYPE_SMALL_TUPLE, p);
464             w_byte((unsigned char)n, p);
465         }
466         else {
467             W_TYPE(TYPE_TUPLE, p);
468             W_SIZE(n, p);
469         }
470         for (i = 0; i < n; i++) {
471             w_object(PyTuple_GET_ITEM(v, i), p);
472         }
473     }
474     else if (PyList_CheckExact(v)) {
475         W_TYPE(TYPE_LIST, p);
476         n = PyList_GET_SIZE(v);
477         W_SIZE(n, p);
478         for (i = 0; i < n; i++) {
479             w_object(PyList_GET_ITEM(v, i), p);
480         }
481     }
482     else if (PyDict_CheckExact(v)) {
483         Py_ssize_t pos;
484         PyObject *key, *value;
485         W_TYPE(TYPE_DICT, p);
486         /* This one is NULL object terminated! */
487         pos = 0;
488         while (PyDict_Next(v, &pos, &key, &value)) {
489             w_object(key, p);
490             w_object(value, p);
491         }
492         w_object((PyObject *)NULL, p);
493     }
494     else if (PyAnySet_CheckExact(v)) {
495         PyObject *value;
496         Py_ssize_t pos = 0;
497         Py_hash_t hash;
498 
499         if (PyFrozenSet_CheckExact(v))
500             W_TYPE(TYPE_FROZENSET, p);
501         else
502             W_TYPE(TYPE_SET, p);
503         n = PySet_GET_SIZE(v);
504         W_SIZE(n, p);
505         while (_PySet_NextEntry(v, &pos, &value, &hash)) {
506             w_object(value, p);
507         }
508     }
509     else if (PyCode_Check(v)) {
510         PyCodeObject *co = (PyCodeObject *)v;
511         W_TYPE(TYPE_CODE, p);
512         w_long(co->co_argcount, p);
513         w_long(co->co_posonlyargcount, p);
514         w_long(co->co_kwonlyargcount, p);
515         w_long(co->co_nlocals, p);
516         w_long(co->co_stacksize, p);
517         w_long(co->co_flags, p);
518         w_object(co->co_code, p);
519         w_object(co->co_consts, p);
520         w_object(co->co_names, p);
521         w_object(co->co_varnames, p);
522         w_object(co->co_freevars, p);
523         w_object(co->co_cellvars, p);
524         w_object(co->co_filename, p);
525         w_object(co->co_name, p);
526         w_long(co->co_firstlineno, p);
527         w_object(co->co_lnotab, p);
528     }
529     else if (PyObject_CheckBuffer(v)) {
530         /* Write unknown bytes-like objects as a bytes object */
531         Py_buffer view;
532         if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
533             w_byte(TYPE_UNKNOWN, p);
534             p->depth--;
535             p->error = WFERR_UNMARSHALLABLE;
536             return;
537         }
538         W_TYPE(TYPE_STRING, p);
539         w_pstring(view.buf, view.len, p);
540         PyBuffer_Release(&view);
541     }
542     else {
543         W_TYPE(TYPE_UNKNOWN, p);
544         p->error = WFERR_UNMARSHALLABLE;
545     }
546 }
547 
548 static void
w_decref_entry(void * key)549 w_decref_entry(void *key)
550 {
551     PyObject *entry_key = (PyObject *)key;
552     Py_XDECREF(entry_key);
553 }
554 
555 static int
w_init_refs(WFILE * wf,int version)556 w_init_refs(WFILE *wf, int version)
557 {
558     if (version >= 3) {
559         wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
560                                                _Py_hashtable_compare_direct,
561                                                w_decref_entry, NULL, NULL);
562         if (wf->hashtable == NULL) {
563             PyErr_NoMemory();
564             return -1;
565         }
566     }
567     return 0;
568 }
569 
570 static void
w_clear_refs(WFILE * wf)571 w_clear_refs(WFILE *wf)
572 {
573     if (wf->hashtable != NULL) {
574         _Py_hashtable_destroy(wf->hashtable);
575     }
576 }
577 
578 /* version currently has no effect for writing ints. */
579 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)580 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
581 {
582     char buf[4];
583     WFILE wf;
584     memset(&wf, 0, sizeof(wf));
585     wf.fp = fp;
586     wf.ptr = wf.buf = buf;
587     wf.end = wf.ptr + sizeof(buf);
588     wf.error = WFERR_OK;
589     wf.version = version;
590     w_long(x, &wf);
591     w_flush(&wf);
592 }
593 
594 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)595 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
596 {
597     char buf[BUFSIZ];
598     WFILE wf;
599     memset(&wf, 0, sizeof(wf));
600     wf.fp = fp;
601     wf.ptr = wf.buf = buf;
602     wf.end = wf.ptr + sizeof(buf);
603     wf.error = WFERR_OK;
604     wf.version = version;
605     if (w_init_refs(&wf, version))
606         return; /* caller mush check PyErr_Occurred() */
607     w_object(x, &wf);
608     w_clear_refs(&wf);
609     w_flush(&wf);
610 }
611 
612 typedef struct {
613     FILE *fp;
614     int depth;
615     PyObject *readable;  /* Stream-like object being read from */
616     const char *ptr;
617     const char *end;
618     char *buf;
619     Py_ssize_t buf_size;
620     PyObject *refs;  /* a list */
621 } RFILE;
622 
623 static const char *
r_string(Py_ssize_t n,RFILE * p)624 r_string(Py_ssize_t n, RFILE *p)
625 {
626     Py_ssize_t read = -1;
627 
628     if (p->ptr != NULL) {
629         /* Fast path for loads() */
630         const char *res = p->ptr;
631         Py_ssize_t left = p->end - p->ptr;
632         if (left < n) {
633             PyErr_SetString(PyExc_EOFError,
634                             "marshal data too short");
635             return NULL;
636         }
637         p->ptr += n;
638         return res;
639     }
640     if (p->buf == NULL) {
641         p->buf = PyMem_MALLOC(n);
642         if (p->buf == NULL) {
643             PyErr_NoMemory();
644             return NULL;
645         }
646         p->buf_size = n;
647     }
648     else if (p->buf_size < n) {
649         char *tmp = PyMem_REALLOC(p->buf, n);
650         if (tmp == NULL) {
651             PyErr_NoMemory();
652             return NULL;
653         }
654         p->buf = tmp;
655         p->buf_size = n;
656     }
657 
658     if (!p->readable) {
659         assert(p->fp != NULL);
660         read = fread(p->buf, 1, n, p->fp);
661     }
662     else {
663         _Py_IDENTIFIER(readinto);
664         PyObject *res, *mview;
665         Py_buffer buf;
666 
667         if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
668             return NULL;
669         mview = PyMemoryView_FromBuffer(&buf);
670         if (mview == NULL)
671             return NULL;
672 
673         res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview);
674         if (res != NULL) {
675             read = PyNumber_AsSsize_t(res, PyExc_ValueError);
676             Py_DECREF(res);
677         }
678     }
679     if (read != n) {
680         if (!PyErr_Occurred()) {
681             if (read > n)
682                 PyErr_Format(PyExc_ValueError,
683                              "read() returned too much data: "
684                              "%zd bytes requested, %zd returned",
685                              n, read);
686             else
687                 PyErr_SetString(PyExc_EOFError,
688                                 "EOF read where not expected");
689         }
690         return NULL;
691     }
692     return p->buf;
693 }
694 
695 static int
r_byte(RFILE * p)696 r_byte(RFILE *p)
697 {
698     int c = EOF;
699 
700     if (p->ptr != NULL) {
701         if (p->ptr < p->end)
702             c = (unsigned char) *p->ptr++;
703         return c;
704     }
705     if (!p->readable) {
706         assert(p->fp);
707         c = getc(p->fp);
708     }
709     else {
710         const char *ptr = r_string(1, p);
711         if (ptr != NULL)
712             c = *(const unsigned char *) ptr;
713     }
714     return c;
715 }
716 
717 static int
r_short(RFILE * p)718 r_short(RFILE *p)
719 {
720     short x = -1;
721     const unsigned char *buffer;
722 
723     buffer = (const unsigned char *) r_string(2, p);
724     if (buffer != NULL) {
725         x = buffer[0];
726         x |= buffer[1] << 8;
727         /* Sign-extension, in case short greater than 16 bits */
728         x |= -(x & 0x8000);
729     }
730     return x;
731 }
732 
733 static long
r_long(RFILE * p)734 r_long(RFILE *p)
735 {
736     long x = -1;
737     const unsigned char *buffer;
738 
739     buffer = (const unsigned char *) r_string(4, p);
740     if (buffer != NULL) {
741         x = buffer[0];
742         x |= (long)buffer[1] << 8;
743         x |= (long)buffer[2] << 16;
744         x |= (long)buffer[3] << 24;
745 #if SIZEOF_LONG > 4
746         /* Sign extension for 64-bit machines */
747         x |= -(x & 0x80000000L);
748 #endif
749     }
750     return x;
751 }
752 
753 /* r_long64 deals with the TYPE_INT64 code. */
754 static PyObject *
r_long64(RFILE * p)755 r_long64(RFILE *p)
756 {
757     const unsigned char *buffer = (const unsigned char *) r_string(8, p);
758     if (buffer == NULL) {
759         return NULL;
760     }
761     return _PyLong_FromByteArray(buffer, 8,
762                                  1 /* little endian */,
763                                  1 /* signed */);
764 }
765 
766 static PyObject *
r_PyLong(RFILE * p)767 r_PyLong(RFILE *p)
768 {
769     PyLongObject *ob;
770     long n, size, i;
771     int j, md, shorts_in_top_digit;
772     digit d;
773 
774     n = r_long(p);
775     if (PyErr_Occurred())
776         return NULL;
777     if (n == 0)
778         return (PyObject *)_PyLong_New(0);
779     if (n < -SIZE32_MAX || n > SIZE32_MAX) {
780         PyErr_SetString(PyExc_ValueError,
781                        "bad marshal data (long size out of range)");
782         return NULL;
783     }
784 
785     size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
786     shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
787     ob = _PyLong_New(size);
788     if (ob == NULL)
789         return NULL;
790 
791     Py_SET_SIZE(ob, n > 0 ? size : -size);
792 
793     for (i = 0; i < size-1; i++) {
794         d = 0;
795         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
796             md = r_short(p);
797             if (PyErr_Occurred()) {
798                 Py_DECREF(ob);
799                 return NULL;
800             }
801             if (md < 0 || md > PyLong_MARSHAL_BASE)
802                 goto bad_digit;
803             d += (digit)md << j*PyLong_MARSHAL_SHIFT;
804         }
805         ob->ob_digit[i] = d;
806     }
807 
808     d = 0;
809     for (j=0; j < shorts_in_top_digit; j++) {
810         md = r_short(p);
811         if (PyErr_Occurred()) {
812             Py_DECREF(ob);
813             return NULL;
814         }
815         if (md < 0 || md > PyLong_MARSHAL_BASE)
816             goto bad_digit;
817         /* topmost marshal digit should be nonzero */
818         if (md == 0 && j == shorts_in_top_digit - 1) {
819             Py_DECREF(ob);
820             PyErr_SetString(PyExc_ValueError,
821                 "bad marshal data (unnormalized long data)");
822             return NULL;
823         }
824         d += (digit)md << j*PyLong_MARSHAL_SHIFT;
825     }
826     if (PyErr_Occurred()) {
827         Py_DECREF(ob);
828         return NULL;
829     }
830     /* top digit should be nonzero, else the resulting PyLong won't be
831        normalized */
832     ob->ob_digit[size-1] = d;
833     return (PyObject *)ob;
834   bad_digit:
835     Py_DECREF(ob);
836     PyErr_SetString(PyExc_ValueError,
837                     "bad marshal data (digit out of range in long)");
838     return NULL;
839 }
840 
841 static double
r_float_bin(RFILE * p)842 r_float_bin(RFILE *p)
843 {
844     const unsigned char *buf = (const unsigned char *) r_string(8, p);
845     if (buf == NULL)
846         return -1;
847     return _PyFloat_Unpack8(buf, 1);
848 }
849 
850 /* Issue #33720: Disable inlining for reducing the C stack consumption
851    on PGO builds. */
852 _Py_NO_INLINE static double
r_float_str(RFILE * p)853 r_float_str(RFILE *p)
854 {
855     int n;
856     char buf[256];
857     const char *ptr;
858     n = r_byte(p);
859     if (n == EOF) {
860         PyErr_SetString(PyExc_EOFError,
861             "EOF read where object expected");
862         return -1;
863     }
864     ptr = r_string(n, p);
865     if (ptr == NULL) {
866         return -1;
867     }
868     memcpy(buf, ptr, n);
869     buf[n] = '\0';
870     return PyOS_string_to_double(buf, NULL, NULL);
871 }
872 
873 /* allocate the reflist index for a new object. Return -1 on failure */
874 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)875 r_ref_reserve(int flag, RFILE *p)
876 {
877     if (flag) { /* currently only FLAG_REF is defined */
878         Py_ssize_t idx = PyList_GET_SIZE(p->refs);
879         if (idx >= 0x7ffffffe) {
880             PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
881             return -1;
882         }
883         if (PyList_Append(p->refs, Py_None) < 0)
884             return -1;
885         return idx;
886     } else
887         return 0;
888 }
889 
890 /* insert the new object 'o' to the reflist at previously
891  * allocated index 'idx'.
892  * 'o' can be NULL, in which case nothing is done.
893  * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
894  * if 'o' was non-NULL, and the function fails, 'o' is released and
895  * NULL returned. This simplifies error checking at the call site since
896  * a single test for NULL for the function result is enough.
897  */
898 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)899 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
900 {
901     if (o != NULL && flag) { /* currently only FLAG_REF is defined */
902         PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
903         Py_INCREF(o);
904         PyList_SET_ITEM(p->refs, idx, o);
905         Py_DECREF(tmp);
906     }
907     return o;
908 }
909 
910 /* combination of both above, used when an object can be
911  * created whenever it is seen in the file, as opposed to
912  * after having loaded its sub-objects.
913  */
914 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)915 r_ref(PyObject *o, int flag, RFILE *p)
916 {
917     assert(flag & FLAG_REF);
918     if (o == NULL)
919         return NULL;
920     if (PyList_Append(p->refs, o) < 0) {
921         Py_DECREF(o); /* release the new object */
922         return NULL;
923     }
924     return o;
925 }
926 
927 static PyObject *
r_object(RFILE * p)928 r_object(RFILE *p)
929 {
930     /* NULL is a valid return value, it does not necessarily means that
931        an exception is set. */
932     PyObject *v, *v2;
933     Py_ssize_t idx = 0;
934     long i, n;
935     int type, code = r_byte(p);
936     int flag, is_interned = 0;
937     PyObject *retval = NULL;
938 
939     if (code == EOF) {
940         PyErr_SetString(PyExc_EOFError,
941                         "EOF read where object expected");
942         return NULL;
943     }
944 
945     p->depth++;
946 
947     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
948         p->depth--;
949         PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
950         return NULL;
951     }
952 
953     flag = code & FLAG_REF;
954     type = code & ~FLAG_REF;
955 
956 #define R_REF(O) do{\
957     if (flag) \
958         O = r_ref(O, flag, p);\
959 } while (0)
960 
961     switch (type) {
962 
963     case TYPE_NULL:
964         break;
965 
966     case TYPE_NONE:
967         Py_INCREF(Py_None);
968         retval = Py_None;
969         break;
970 
971     case TYPE_STOPITER:
972         Py_INCREF(PyExc_StopIteration);
973         retval = PyExc_StopIteration;
974         break;
975 
976     case TYPE_ELLIPSIS:
977         Py_INCREF(Py_Ellipsis);
978         retval = Py_Ellipsis;
979         break;
980 
981     case TYPE_FALSE:
982         Py_INCREF(Py_False);
983         retval = Py_False;
984         break;
985 
986     case TYPE_TRUE:
987         Py_INCREF(Py_True);
988         retval = Py_True;
989         break;
990 
991     case TYPE_INT:
992         n = r_long(p);
993         retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
994         R_REF(retval);
995         break;
996 
997     case TYPE_INT64:
998         retval = r_long64(p);
999         R_REF(retval);
1000         break;
1001 
1002     case TYPE_LONG:
1003         retval = r_PyLong(p);
1004         R_REF(retval);
1005         break;
1006 
1007     case TYPE_FLOAT:
1008         {
1009             double x = r_float_str(p);
1010             if (x == -1.0 && PyErr_Occurred())
1011                 break;
1012             retval = PyFloat_FromDouble(x);
1013             R_REF(retval);
1014             break;
1015         }
1016 
1017     case TYPE_BINARY_FLOAT:
1018         {
1019             double x = r_float_bin(p);
1020             if (x == -1.0 && PyErr_Occurred())
1021                 break;
1022             retval = PyFloat_FromDouble(x);
1023             R_REF(retval);
1024             break;
1025         }
1026 
1027     case TYPE_COMPLEX:
1028         {
1029             Py_complex c;
1030             c.real = r_float_str(p);
1031             if (c.real == -1.0 && PyErr_Occurred())
1032                 break;
1033             c.imag = r_float_str(p);
1034             if (c.imag == -1.0 && PyErr_Occurred())
1035                 break;
1036             retval = PyComplex_FromCComplex(c);
1037             R_REF(retval);
1038             break;
1039         }
1040 
1041     case TYPE_BINARY_COMPLEX:
1042         {
1043             Py_complex c;
1044             c.real = r_float_bin(p);
1045             if (c.real == -1.0 && PyErr_Occurred())
1046                 break;
1047             c.imag = r_float_bin(p);
1048             if (c.imag == -1.0 && PyErr_Occurred())
1049                 break;
1050             retval = PyComplex_FromCComplex(c);
1051             R_REF(retval);
1052             break;
1053         }
1054 
1055     case TYPE_STRING:
1056         {
1057             const char *ptr;
1058             n = r_long(p);
1059             if (PyErr_Occurred())
1060                 break;
1061             if (n < 0 || n > SIZE32_MAX) {
1062                 PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1063                 break;
1064             }
1065             v = PyBytes_FromStringAndSize((char *)NULL, n);
1066             if (v == NULL)
1067                 break;
1068             ptr = r_string(n, p);
1069             if (ptr == NULL) {
1070                 Py_DECREF(v);
1071                 break;
1072             }
1073             memcpy(PyBytes_AS_STRING(v), ptr, n);
1074             retval = v;
1075             R_REF(retval);
1076             break;
1077         }
1078 
1079     case TYPE_ASCII_INTERNED:
1080         is_interned = 1;
1081         /* fall through */
1082     case TYPE_ASCII:
1083         n = r_long(p);
1084         if (PyErr_Occurred())
1085             break;
1086         if (n < 0 || n > SIZE32_MAX) {
1087             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1088             break;
1089         }
1090         goto _read_ascii;
1091 
1092     case TYPE_SHORT_ASCII_INTERNED:
1093         is_interned = 1;
1094         /* fall through */
1095     case TYPE_SHORT_ASCII:
1096         n = r_byte(p);
1097         if (n == EOF) {
1098             PyErr_SetString(PyExc_EOFError,
1099                 "EOF read where object expected");
1100             break;
1101         }
1102     _read_ascii:
1103         {
1104             const char *ptr;
1105             ptr = r_string(n, p);
1106             if (ptr == NULL)
1107                 break;
1108             v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1109             if (v == NULL)
1110                 break;
1111             if (is_interned)
1112                 PyUnicode_InternInPlace(&v);
1113             retval = v;
1114             R_REF(retval);
1115             break;
1116         }
1117 
1118     case TYPE_INTERNED:
1119         is_interned = 1;
1120         /* fall through */
1121     case TYPE_UNICODE:
1122         {
1123         const char *buffer;
1124 
1125         n = r_long(p);
1126         if (PyErr_Occurred())
1127             break;
1128         if (n < 0 || n > SIZE32_MAX) {
1129             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1130             break;
1131         }
1132         if (n != 0) {
1133             buffer = r_string(n, p);
1134             if (buffer == NULL)
1135                 break;
1136             v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1137         }
1138         else {
1139             v = PyUnicode_New(0, 0);
1140         }
1141         if (v == NULL)
1142             break;
1143         if (is_interned)
1144             PyUnicode_InternInPlace(&v);
1145         retval = v;
1146         R_REF(retval);
1147         break;
1148         }
1149 
1150     case TYPE_SMALL_TUPLE:
1151         n = (unsigned char) r_byte(p);
1152         if (PyErr_Occurred())
1153             break;
1154         goto _read_tuple;
1155     case TYPE_TUPLE:
1156         n = r_long(p);
1157         if (PyErr_Occurred())
1158             break;
1159         if (n < 0 || n > SIZE32_MAX) {
1160             PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1161             break;
1162         }
1163     _read_tuple:
1164         v = PyTuple_New(n);
1165         R_REF(v);
1166         if (v == NULL)
1167             break;
1168 
1169         for (i = 0; i < n; i++) {
1170             v2 = r_object(p);
1171             if ( v2 == NULL ) {
1172                 if (!PyErr_Occurred())
1173                     PyErr_SetString(PyExc_TypeError,
1174                         "NULL object in marshal data for tuple");
1175                 Py_DECREF(v);
1176                 v = NULL;
1177                 break;
1178             }
1179             PyTuple_SET_ITEM(v, i, v2);
1180         }
1181         retval = v;
1182         break;
1183 
1184     case TYPE_LIST:
1185         n = r_long(p);
1186         if (PyErr_Occurred())
1187             break;
1188         if (n < 0 || n > SIZE32_MAX) {
1189             PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1190             break;
1191         }
1192         v = PyList_New(n);
1193         R_REF(v);
1194         if (v == NULL)
1195             break;
1196         for (i = 0; i < n; i++) {
1197             v2 = r_object(p);
1198             if ( v2 == NULL ) {
1199                 if (!PyErr_Occurred())
1200                     PyErr_SetString(PyExc_TypeError,
1201                         "NULL object in marshal data for list");
1202                 Py_DECREF(v);
1203                 v = NULL;
1204                 break;
1205             }
1206             PyList_SET_ITEM(v, i, v2);
1207         }
1208         retval = v;
1209         break;
1210 
1211     case TYPE_DICT:
1212         v = PyDict_New();
1213         R_REF(v);
1214         if (v == NULL)
1215             break;
1216         for (;;) {
1217             PyObject *key, *val;
1218             key = r_object(p);
1219             if (key == NULL)
1220                 break;
1221             val = r_object(p);
1222             if (val == NULL) {
1223                 Py_DECREF(key);
1224                 break;
1225             }
1226             if (PyDict_SetItem(v, key, val) < 0) {
1227                 Py_DECREF(key);
1228                 Py_DECREF(val);
1229                 break;
1230             }
1231             Py_DECREF(key);
1232             Py_DECREF(val);
1233         }
1234         if (PyErr_Occurred()) {
1235             Py_DECREF(v);
1236             v = NULL;
1237         }
1238         retval = v;
1239         break;
1240 
1241     case TYPE_SET:
1242     case TYPE_FROZENSET:
1243         n = r_long(p);
1244         if (PyErr_Occurred())
1245             break;
1246         if (n < 0 || n > SIZE32_MAX) {
1247             PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1248             break;
1249         }
1250 
1251         if (n == 0 && type == TYPE_FROZENSET) {
1252             /* call frozenset() to get the empty frozenset singleton */
1253             v = _PyObject_CallNoArg((PyObject*)&PyFrozenSet_Type);
1254             if (v == NULL)
1255                 break;
1256             R_REF(v);
1257             retval = v;
1258         }
1259         else {
1260             v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1261             if (type == TYPE_SET) {
1262                 R_REF(v);
1263             } else {
1264                 /* must use delayed registration of frozensets because they must
1265                  * be init with a refcount of 1
1266                  */
1267                 idx = r_ref_reserve(flag, p);
1268                 if (idx < 0)
1269                     Py_CLEAR(v); /* signal error */
1270             }
1271             if (v == NULL)
1272                 break;
1273 
1274             for (i = 0; i < n; i++) {
1275                 v2 = r_object(p);
1276                 if ( v2 == NULL ) {
1277                     if (!PyErr_Occurred())
1278                         PyErr_SetString(PyExc_TypeError,
1279                             "NULL object in marshal data for set");
1280                     Py_DECREF(v);
1281                     v = NULL;
1282                     break;
1283                 }
1284                 if (PySet_Add(v, v2) == -1) {
1285                     Py_DECREF(v);
1286                     Py_DECREF(v2);
1287                     v = NULL;
1288                     break;
1289                 }
1290                 Py_DECREF(v2);
1291             }
1292             if (type != TYPE_SET)
1293                 v = r_ref_insert(v, idx, flag, p);
1294             retval = v;
1295         }
1296         break;
1297 
1298     case TYPE_CODE:
1299         {
1300             int argcount;
1301             int posonlyargcount;
1302             int kwonlyargcount;
1303             int nlocals;
1304             int stacksize;
1305             int flags;
1306             PyObject *code = NULL;
1307             PyObject *consts = NULL;
1308             PyObject *names = NULL;
1309             PyObject *varnames = NULL;
1310             PyObject *freevars = NULL;
1311             PyObject *cellvars = NULL;
1312             PyObject *filename = NULL;
1313             PyObject *name = NULL;
1314             int firstlineno;
1315             PyObject *lnotab = NULL;
1316 
1317             idx = r_ref_reserve(flag, p);
1318             if (idx < 0)
1319                 break;
1320 
1321             v = NULL;
1322 
1323             /* XXX ignore long->int overflows for now */
1324             argcount = (int)r_long(p);
1325             if (PyErr_Occurred())
1326                 goto code_error;
1327             posonlyargcount = (int)r_long(p);
1328             if (PyErr_Occurred()) {
1329                 goto code_error;
1330             }
1331             kwonlyargcount = (int)r_long(p);
1332             if (PyErr_Occurred())
1333                 goto code_error;
1334             nlocals = (int)r_long(p);
1335             if (PyErr_Occurred())
1336                 goto code_error;
1337             stacksize = (int)r_long(p);
1338             if (PyErr_Occurred())
1339                 goto code_error;
1340             flags = (int)r_long(p);
1341             if (PyErr_Occurred())
1342                 goto code_error;
1343             code = r_object(p);
1344             if (code == NULL)
1345                 goto code_error;
1346             consts = r_object(p);
1347             if (consts == NULL)
1348                 goto code_error;
1349             names = r_object(p);
1350             if (names == NULL)
1351                 goto code_error;
1352             varnames = r_object(p);
1353             if (varnames == NULL)
1354                 goto code_error;
1355             freevars = r_object(p);
1356             if (freevars == NULL)
1357                 goto code_error;
1358             cellvars = r_object(p);
1359             if (cellvars == NULL)
1360                 goto code_error;
1361             filename = r_object(p);
1362             if (filename == NULL)
1363                 goto code_error;
1364             name = r_object(p);
1365             if (name == NULL)
1366                 goto code_error;
1367             firstlineno = (int)r_long(p);
1368             if (firstlineno == -1 && PyErr_Occurred())
1369                 break;
1370             lnotab = r_object(p);
1371             if (lnotab == NULL)
1372                 goto code_error;
1373 
1374             if (PySys_Audit("code.__new__", "OOOiiiiii",
1375                             code, filename, name, argcount, posonlyargcount,
1376                             kwonlyargcount, nlocals, stacksize, flags) < 0) {
1377                 goto code_error;
1378             }
1379 
1380             v = (PyObject *) PyCode_NewWithPosOnlyArgs(
1381                             argcount, posonlyargcount, kwonlyargcount,
1382                             nlocals, stacksize, flags,
1383                             code, consts, names, varnames,
1384                             freevars, cellvars, filename, name,
1385                             firstlineno, lnotab);
1386             v = r_ref_insert(v, idx, flag, p);
1387 
1388           code_error:
1389             Py_XDECREF(code);
1390             Py_XDECREF(consts);
1391             Py_XDECREF(names);
1392             Py_XDECREF(varnames);
1393             Py_XDECREF(freevars);
1394             Py_XDECREF(cellvars);
1395             Py_XDECREF(filename);
1396             Py_XDECREF(name);
1397             Py_XDECREF(lnotab);
1398         }
1399         retval = v;
1400         break;
1401 
1402     case TYPE_REF:
1403         n = r_long(p);
1404         if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1405             if (n == -1 && PyErr_Occurred())
1406                 break;
1407             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1408             break;
1409         }
1410         v = PyList_GET_ITEM(p->refs, n);
1411         if (v == Py_None) {
1412             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1413             break;
1414         }
1415         Py_INCREF(v);
1416         retval = v;
1417         break;
1418 
1419     default:
1420         /* Bogus data got written, which isn't ideal.
1421            This will let you keep working and recover. */
1422         PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1423         break;
1424 
1425     }
1426     p->depth--;
1427     return retval;
1428 }
1429 
1430 static PyObject *
read_object(RFILE * p)1431 read_object(RFILE *p)
1432 {
1433     PyObject *v;
1434     if (PyErr_Occurred()) {
1435         fprintf(stderr, "XXX readobject called with exception set\n");
1436         return NULL;
1437     }
1438     v = r_object(p);
1439     if (v == NULL && !PyErr_Occurred())
1440         PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1441     return v;
1442 }
1443 
1444 int
PyMarshal_ReadShortFromFile(FILE * fp)1445 PyMarshal_ReadShortFromFile(FILE *fp)
1446 {
1447     RFILE rf;
1448     int res;
1449     assert(fp);
1450     rf.readable = NULL;
1451     rf.fp = fp;
1452     rf.end = rf.ptr = NULL;
1453     rf.buf = NULL;
1454     res = r_short(&rf);
1455     if (rf.buf != NULL)
1456         PyMem_FREE(rf.buf);
1457     return res;
1458 }
1459 
1460 long
PyMarshal_ReadLongFromFile(FILE * fp)1461 PyMarshal_ReadLongFromFile(FILE *fp)
1462 {
1463     RFILE rf;
1464     long res;
1465     rf.fp = fp;
1466     rf.readable = NULL;
1467     rf.ptr = rf.end = NULL;
1468     rf.buf = NULL;
1469     res = r_long(&rf);
1470     if (rf.buf != NULL)
1471         PyMem_FREE(rf.buf);
1472     return res;
1473 }
1474 
1475 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1476 static off_t
getfilesize(FILE * fp)1477 getfilesize(FILE *fp)
1478 {
1479     struct _Py_stat_struct st;
1480     if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1481         return -1;
1482 #if SIZEOF_OFF_T == 4
1483     else if (st.st_size >= INT_MAX)
1484         return (off_t)INT_MAX;
1485 #endif
1486     else
1487         return (off_t)st.st_size;
1488 }
1489 
1490 /* If we can get the size of the file up-front, and it's reasonably small,
1491  * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1492  * than reading a byte at a time from file; speeds .pyc imports.
1493  * CAUTION:  since this may read the entire remainder of the file, don't
1494  * call it unless you know you're done with the file.
1495  */
1496 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1497 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1498 {
1499 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1500 #define REASONABLE_FILE_LIMIT (1L << 18)
1501     off_t filesize;
1502     filesize = getfilesize(fp);
1503     if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1504         char* pBuf = (char *)PyMem_MALLOC(filesize);
1505         if (pBuf != NULL) {
1506             size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1507             PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1508             PyMem_FREE(pBuf);
1509             return v;
1510         }
1511 
1512     }
1513     /* We don't have fstat, or we do but the file is larger than
1514      * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1515      */
1516     return PyMarshal_ReadObjectFromFile(fp);
1517 
1518 #undef REASONABLE_FILE_LIMIT
1519 }
1520 
1521 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1522 PyMarshal_ReadObjectFromFile(FILE *fp)
1523 {
1524     RFILE rf;
1525     PyObject *result;
1526     rf.fp = fp;
1527     rf.readable = NULL;
1528     rf.depth = 0;
1529     rf.ptr = rf.end = NULL;
1530     rf.buf = NULL;
1531     rf.refs = PyList_New(0);
1532     if (rf.refs == NULL)
1533         return NULL;
1534     result = r_object(&rf);
1535     Py_DECREF(rf.refs);
1536     if (rf.buf != NULL)
1537         PyMem_FREE(rf.buf);
1538     return result;
1539 }
1540 
1541 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1542 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1543 {
1544     RFILE rf;
1545     PyObject *result;
1546     rf.fp = NULL;
1547     rf.readable = NULL;
1548     rf.ptr = str;
1549     rf.end = str + len;
1550     rf.buf = NULL;
1551     rf.depth = 0;
1552     rf.refs = PyList_New(0);
1553     if (rf.refs == NULL)
1554         return NULL;
1555     result = r_object(&rf);
1556     Py_DECREF(rf.refs);
1557     if (rf.buf != NULL)
1558         PyMem_FREE(rf.buf);
1559     return result;
1560 }
1561 
1562 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1563 PyMarshal_WriteObjectToString(PyObject *x, int version)
1564 {
1565     WFILE wf;
1566 
1567     memset(&wf, 0, sizeof(wf));
1568     wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1569     if (wf.str == NULL)
1570         return NULL;
1571     wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1572     wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1573     wf.error = WFERR_OK;
1574     wf.version = version;
1575     if (w_init_refs(&wf, version)) {
1576         Py_DECREF(wf.str);
1577         return NULL;
1578     }
1579     w_object(x, &wf);
1580     w_clear_refs(&wf);
1581     if (wf.str != NULL) {
1582         const char *base = PyBytes_AS_STRING(wf.str);
1583         if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1584             return NULL;
1585     }
1586     if (wf.error != WFERR_OK) {
1587         Py_XDECREF(wf.str);
1588         if (wf.error == WFERR_NOMEMORY)
1589             PyErr_NoMemory();
1590         else
1591             PyErr_SetString(PyExc_ValueError,
1592               (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1593                :"object too deeply nested to marshal");
1594         return NULL;
1595     }
1596     return wf.str;
1597 }
1598 
1599 /* And an interface for Python programs... */
1600 /*[clinic input]
1601 marshal.dump
1602 
1603     value: object
1604         Must be a supported type.
1605     file: object
1606         Must be a writeable binary file.
1607     version: int(c_default="Py_MARSHAL_VERSION") = version
1608         Indicates the data format that dump should use.
1609     /
1610 
1611 Write the value on the open file.
1612 
1613 If the value has (or contains an object that has) an unsupported type, a
1614 ValueError exception is raised - but garbage data will also be written
1615 to the file. The object will not be properly read back by load().
1616 [clinic start generated code]*/
1617 
1618 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version)1619 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1620                   int version)
1621 /*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
1622 {
1623     /* XXX Quick hack -- need to do this differently */
1624     PyObject *s;
1625     PyObject *res;
1626     _Py_IDENTIFIER(write);
1627 
1628     s = PyMarshal_WriteObjectToString(value, version);
1629     if (s == NULL)
1630         return NULL;
1631     res = _PyObject_CallMethodIdOneArg(file, &PyId_write, s);
1632     Py_DECREF(s);
1633     return res;
1634 }
1635 
1636 /*[clinic input]
1637 marshal.load
1638 
1639     file: object
1640         Must be readable binary file.
1641     /
1642 
1643 Read one value from the open file and return it.
1644 
1645 If no valid value is read (e.g. because the data has a different Python
1646 version's incompatible marshal format), raise EOFError, ValueError or
1647 TypeError.
1648 
1649 Note: If an object containing an unsupported type was marshalled with
1650 dump(), load() will substitute None for the unmarshallable type.
1651 [clinic start generated code]*/
1652 
1653 static PyObject *
marshal_load(PyObject * module,PyObject * file)1654 marshal_load(PyObject *module, PyObject *file)
1655 /*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/
1656 {
1657     PyObject *data, *result;
1658     _Py_IDENTIFIER(read);
1659     RFILE rf;
1660 
1661     /*
1662      * Make a call to the read method, but read zero bytes.
1663      * This is to ensure that the object passed in at least
1664      * has a read method which returns bytes.
1665      * This can be removed if we guarantee good error handling
1666      * for r_string()
1667      */
1668     data = _PyObject_CallMethodId(file, &PyId_read, "i", 0);
1669     if (data == NULL)
1670         return NULL;
1671     if (!PyBytes_Check(data)) {
1672         PyErr_Format(PyExc_TypeError,
1673                      "file.read() returned not bytes but %.100s",
1674                      Py_TYPE(data)->tp_name);
1675         result = NULL;
1676     }
1677     else {
1678         rf.depth = 0;
1679         rf.fp = NULL;
1680         rf.readable = file;
1681         rf.ptr = rf.end = NULL;
1682         rf.buf = NULL;
1683         if ((rf.refs = PyList_New(0)) != NULL) {
1684             result = read_object(&rf);
1685             Py_DECREF(rf.refs);
1686             if (rf.buf != NULL)
1687                 PyMem_FREE(rf.buf);
1688         } else
1689             result = NULL;
1690     }
1691     Py_DECREF(data);
1692     return result;
1693 }
1694 
1695 /*[clinic input]
1696 marshal.dumps
1697 
1698     value: object
1699         Must be a supported type.
1700     version: int(c_default="Py_MARSHAL_VERSION") = version
1701         Indicates the data format that dumps should use.
1702     /
1703 
1704 Return the bytes object that would be written to a file by dump(value, file).
1705 
1706 Raise a ValueError exception if value has (or contains an object that has) an
1707 unsupported type.
1708 [clinic start generated code]*/
1709 
1710 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version)1711 marshal_dumps_impl(PyObject *module, PyObject *value, int version)
1712 /*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
1713 {
1714     return PyMarshal_WriteObjectToString(value, version);
1715 }
1716 
1717 /*[clinic input]
1718 marshal.loads
1719 
1720     bytes: Py_buffer
1721     /
1722 
1723 Convert the bytes-like object to a value.
1724 
1725 If no valid value is found, raise EOFError, ValueError or TypeError.  Extra
1726 bytes in the input are ignored.
1727 [clinic start generated code]*/
1728 
1729 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes)1730 marshal_loads_impl(PyObject *module, Py_buffer *bytes)
1731 /*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/
1732 {
1733     RFILE rf;
1734     char *s = bytes->buf;
1735     Py_ssize_t n = bytes->len;
1736     PyObject* result;
1737     rf.fp = NULL;
1738     rf.readable = NULL;
1739     rf.ptr = s;
1740     rf.end = s + n;
1741     rf.depth = 0;
1742     if ((rf.refs = PyList_New(0)) == NULL)
1743         return NULL;
1744     result = read_object(&rf);
1745     Py_DECREF(rf.refs);
1746     return result;
1747 }
1748 
1749 static PyMethodDef marshal_methods[] = {
1750     MARSHAL_DUMP_METHODDEF
1751     MARSHAL_LOAD_METHODDEF
1752     MARSHAL_DUMPS_METHODDEF
1753     MARSHAL_LOADS_METHODDEF
1754     {NULL,              NULL}           /* sentinel */
1755 };
1756 
1757 
1758 PyDoc_STRVAR(module_doc,
1759 "This module contains functions that can read and write Python values in\n\
1760 a binary format. The format is specific to Python, but independent of\n\
1761 machine architecture issues.\n\
1762 \n\
1763 Not all Python object types are supported; in general, only objects\n\
1764 whose value is independent from a particular invocation of Python can be\n\
1765 written and read by this module. The following types are supported:\n\
1766 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1767 tuples, lists, sets, dictionaries, and code objects, where it\n\
1768 should be understood that tuples, lists and dictionaries are only\n\
1769 supported as long as the values contained therein are themselves\n\
1770 supported; and recursive lists and dictionaries should not be written\n\
1771 (they will cause infinite loops).\n\
1772 \n\
1773 Variables:\n\
1774 \n\
1775 version -- indicates the format that the module uses. Version 0 is the\n\
1776     historical format, version 1 shares interned strings and version 2\n\
1777     uses a binary format for floating point numbers.\n\
1778     Version 3 shares common object references (New in version 3.4).\n\
1779 \n\
1780 Functions:\n\
1781 \n\
1782 dump() -- write value to a file\n\
1783 load() -- read value from a file\n\
1784 dumps() -- marshal value as a bytes object\n\
1785 loads() -- read value from a bytes-like object");
1786 
1787 
1788 
1789 static struct PyModuleDef marshalmodule = {
1790     PyModuleDef_HEAD_INIT,
1791     "marshal",
1792     module_doc,
1793     0,
1794     marshal_methods,
1795     NULL,
1796     NULL,
1797     NULL,
1798     NULL
1799 };
1800 
1801 PyMODINIT_FUNC
PyMarshal_Init(void)1802 PyMarshal_Init(void)
1803 {
1804     PyObject *mod = PyModule_Create(&marshalmodule);
1805     if (mod == NULL)
1806         return NULL;
1807     if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
1808         Py_DECREF(mod);
1809         return NULL;
1810     }
1811     return mod;
1812 }
1813