• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* Write Python objects to files and read them back.
3    This is intended for writing and reading compiled Python code only;
4    a true persistent storage facility would be much harder, since
5    it would have to take circular links and sharing into account. */
6 
7 #define PY_SSIZE_T_CLEAN
8 
9 #include "Python.h"
10 #include "longintrepr.h"
11 #include "code.h"
12 #include "marshal.h"
13 
14 #define ABS(x) ((x) < 0 ? -(x) : (x))
15 
16 /* High water mark to determine when the marshalled object is dangerously deep
17  * and risks coring the interpreter.  When the object stack gets this deep,
18  * raise an exception instead of continuing.
19  * On Windows debug builds, reduce this value.
20  */
21 #if defined(MS_WINDOWS) && defined(_DEBUG)
22 #define MAX_MARSHAL_STACK_DEPTH 1000
23 #else
24 #define MAX_MARSHAL_STACK_DEPTH 2000
25 #endif
26 
27 #define TYPE_NULL               '0'
28 #define TYPE_NONE               'N'
29 #define TYPE_FALSE              'F'
30 #define TYPE_TRUE               'T'
31 #define TYPE_STOPITER           'S'
32 #define TYPE_ELLIPSIS           '.'
33 #define TYPE_INT                'i'
34 #define TYPE_INT64              'I'
35 #define TYPE_FLOAT              'f'
36 #define TYPE_BINARY_FLOAT       'g'
37 #define TYPE_COMPLEX            'x'
38 #define TYPE_BINARY_COMPLEX     'y'
39 #define TYPE_LONG               'l'
40 #define TYPE_STRING             's'
41 #define TYPE_INTERNED           't'
42 #define TYPE_STRINGREF          'R'
43 #define TYPE_TUPLE              '('
44 #define TYPE_LIST               '['
45 #define TYPE_DICT               '{'
46 #define TYPE_CODE               'c'
47 #define TYPE_UNICODE            'u'
48 #define TYPE_UNKNOWN            '?'
49 #define TYPE_SET                '<'
50 #define TYPE_FROZENSET          '>'
51 
52 #define WFERR_OK 0
53 #define WFERR_UNMARSHALLABLE 1
54 #define WFERR_NESTEDTOODEEP 2
55 #define WFERR_NOMEMORY 3
56 
57 typedef struct {
58     FILE *fp;
59     int error;  /* see WFERR_* values */
60     int depth;
61     /* If fp == NULL, the following are valid: */
62     PyObject *str;
63     char *ptr;
64     char *end;
65     PyObject *strings; /* dict on marshal, list on unmarshal */
66     int version;
67 } WFILE;
68 
69 #define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \
70                       else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \
71                            else w_more(c, p)
72 
73 static void
w_more(int c,WFILE * p)74 w_more(int c, WFILE *p)
75 {
76     Py_ssize_t size, newsize;
77     if (p->str == NULL)
78         return; /* An error already occurred */
79     size = PyString_Size(p->str);
80     newsize = size + size + 1024;
81     if (newsize > 32*1024*1024) {
82         newsize = size + (size >> 3);           /* 12.5% overallocation */
83     }
84     if (_PyString_Resize(&p->str, newsize) != 0) {
85         p->ptr = p->end = NULL;
86     }
87     else {
88         p->ptr = PyString_AS_STRING((PyStringObject *)p->str) + size;
89         p->end =
90             PyString_AS_STRING((PyStringObject *)p->str) + newsize;
91         *p->ptr++ = Py_SAFE_DOWNCAST(c, int, char);
92     }
93 }
94 
95 static void
w_string(const char * s,Py_ssize_t n,WFILE * p)96 w_string(const char *s, Py_ssize_t n, WFILE *p)
97 {
98     if (p->fp != NULL) {
99         fwrite(s, 1, n, p->fp);
100     }
101     else {
102         while (--n >= 0) {
103             w_byte(*s, p);
104             s++;
105         }
106     }
107 }
108 
109 static void
w_short(int x,WFILE * p)110 w_short(int x, WFILE *p)
111 {
112     w_byte((char)( x      & 0xff), p);
113     w_byte((char)((x>> 8) & 0xff), p);
114 }
115 
116 static void
w_long(long x,WFILE * p)117 w_long(long x, WFILE *p)
118 {
119     w_byte((char)( x      & 0xff), p);
120     w_byte((char)((x>> 8) & 0xff), p);
121     w_byte((char)((x>>16) & 0xff), p);
122     w_byte((char)((x>>24) & 0xff), p);
123 }
124 
125 #if SIZEOF_LONG > 4
126 static void
w_long64(long x,WFILE * p)127 w_long64(long x, WFILE *p)
128 {
129     w_long(x, p);
130     w_long(x>>32, p);
131 }
132 #endif
133 
134 #define SIZE32_MAX  0x7FFFFFFF
135 
136 #if SIZEOF_SIZE_T > 4
137 # define W_SIZE(n, p)  do {                     \
138         if ((n) > SIZE32_MAX) {                 \
139             (p)->depth--;                       \
140             (p)->error = WFERR_UNMARSHALLABLE;  \
141             return;                             \
142         }                                       \
143         w_long((long)(n), p);                   \
144     } while(0)
145 #else
146 # define W_SIZE  w_long
147 #endif
148 
149 static void
w_pstring(const char * s,Py_ssize_t n,WFILE * p)150 w_pstring(const char *s, Py_ssize_t n, WFILE *p)
151 {
152         W_SIZE(n, p);
153         w_string(s, n, p);
154 }
155 
156 /* We assume that Python longs are stored internally in base some power of
157    2**15; for the sake of portability we'll always read and write them in base
158    exactly 2**15. */
159 
160 #define PyLong_MARSHAL_SHIFT 15
161 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
162 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
163 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
164 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
165 #endif
166 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
167 
168 static void
w_PyLong(const PyLongObject * ob,WFILE * p)169 w_PyLong(const PyLongObject *ob, WFILE *p)
170 {
171     Py_ssize_t i, j, n, l;
172     digit d;
173 
174     w_byte(TYPE_LONG, p);
175     if (Py_SIZE(ob) == 0) {
176         w_long((long)0, p);
177         return;
178     }
179 
180     /* set l to number of base PyLong_MARSHAL_BASE digits */
181     n = ABS(Py_SIZE(ob));
182     l = (n-1) * PyLong_MARSHAL_RATIO;
183     d = ob->ob_digit[n-1];
184     assert(d != 0); /* a PyLong is always normalized */
185     do {
186         d >>= PyLong_MARSHAL_SHIFT;
187         l++;
188     } while (d != 0);
189     if (l > SIZE32_MAX) {
190         p->depth--;
191         p->error = WFERR_UNMARSHALLABLE;
192         return;
193     }
194     w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
195 
196     for (i=0; i < n-1; i++) {
197         d = ob->ob_digit[i];
198         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
199             w_short(d & PyLong_MARSHAL_MASK, p);
200             d >>= PyLong_MARSHAL_SHIFT;
201         }
202         assert (d == 0);
203     }
204     d = ob->ob_digit[n-1];
205     do {
206         w_short(d & PyLong_MARSHAL_MASK, p);
207         d >>= PyLong_MARSHAL_SHIFT;
208     } while (d != 0);
209 }
210 
211 static void
w_object(PyObject * v,WFILE * p)212 w_object(PyObject *v, WFILE *p)
213 {
214     Py_ssize_t i, n;
215 
216     p->depth++;
217 
218     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
219         p->error = WFERR_NESTEDTOODEEP;
220     }
221     else if (v == NULL) {
222         w_byte(TYPE_NULL, p);
223     }
224     else if (v == Py_None) {
225         w_byte(TYPE_NONE, p);
226     }
227     else if (v == PyExc_StopIteration) {
228         w_byte(TYPE_STOPITER, p);
229     }
230     else if (v == Py_Ellipsis) {
231         w_byte(TYPE_ELLIPSIS, p);
232     }
233     else if (v == Py_False) {
234         w_byte(TYPE_FALSE, p);
235     }
236     else if (v == Py_True) {
237         w_byte(TYPE_TRUE, p);
238     }
239     else if (PyInt_CheckExact(v)) {
240         long x = PyInt_AS_LONG((PyIntObject *)v);
241 #if SIZEOF_LONG > 4
242         long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
243         if (y && y != -1) {
244             w_byte(TYPE_INT64, p);
245             w_long64(x, p);
246         }
247         else
248 #endif
249             {
250             w_byte(TYPE_INT, p);
251             w_long(x, p);
252         }
253     }
254     else if (PyLong_CheckExact(v)) {
255         PyLongObject *ob = (PyLongObject *)v;
256         w_PyLong(ob, p);
257     }
258     else if (PyFloat_CheckExact(v)) {
259         if (p->version > 1) {
260             unsigned char buf[8];
261             if (_PyFloat_Pack8(PyFloat_AsDouble(v),
262                                buf, 1) < 0) {
263                 p->error = WFERR_UNMARSHALLABLE;
264                 return;
265             }
266             w_byte(TYPE_BINARY_FLOAT, p);
267             w_string((char*)buf, 8, p);
268         }
269         else {
270             char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
271                                               'g', 17, 0, NULL);
272             if (!buf) {
273                 p->error = WFERR_NOMEMORY;
274                 return;
275             }
276             n = strlen(buf);
277             w_byte(TYPE_FLOAT, p);
278             w_byte((int)n, p);
279             w_string(buf, n, p);
280             PyMem_Free(buf);
281         }
282     }
283 #ifndef WITHOUT_COMPLEX
284     else if (PyComplex_CheckExact(v)) {
285         if (p->version > 1) {
286             unsigned char buf[8];
287             if (_PyFloat_Pack8(PyComplex_RealAsDouble(v),
288                                buf, 1) < 0) {
289                 p->error = WFERR_UNMARSHALLABLE;
290                 return;
291             }
292             w_byte(TYPE_BINARY_COMPLEX, p);
293             w_string((char*)buf, 8, p);
294             if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v),
295                                buf, 1) < 0) {
296                 p->error = WFERR_UNMARSHALLABLE;
297                 return;
298             }
299             w_string((char*)buf, 8, p);
300         }
301         else {
302             char *buf;
303             w_byte(TYPE_COMPLEX, p);
304             buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),
305                                         'g', 17, 0, NULL);
306             if (!buf) {
307                 p->error = WFERR_NOMEMORY;
308                 return;
309             }
310             n = strlen(buf);
311             w_byte((int)n, p);
312             w_string(buf, n, p);
313             PyMem_Free(buf);
314             buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
315                                         'g', 17, 0, NULL);
316             if (!buf) {
317                 p->error = WFERR_NOMEMORY;
318                 return;
319             }
320             n = strlen(buf);
321             w_byte((int)n, p);
322             w_string(buf, n, p);
323             PyMem_Free(buf);
324         }
325     }
326 #endif
327     else if (PyString_CheckExact(v)) {
328         if (p->strings && PyString_CHECK_INTERNED(v)) {
329             PyObject *o = PyDict_GetItem(p->strings, v);
330             if (o) {
331                 long w = PyInt_AsLong(o);
332                 w_byte(TYPE_STRINGREF, p);
333                 w_long(w, p);
334                 goto exit;
335             }
336             else {
337                 int ok;
338                 o = PyInt_FromSsize_t(PyDict_Size(p->strings));
339                 ok = o &&
340                      PyDict_SetItem(p->strings, v, o) >= 0;
341                 Py_XDECREF(o);
342                 if (!ok) {
343                     p->depth--;
344                     p->error = WFERR_UNMARSHALLABLE;
345                     return;
346                 }
347                 w_byte(TYPE_INTERNED, p);
348             }
349         }
350         else {
351             w_byte(TYPE_STRING, p);
352         }
353         w_pstring(PyBytes_AS_STRING(v), PyString_GET_SIZE(v), p);
354     }
355 #ifdef Py_USING_UNICODE
356     else if (PyUnicode_CheckExact(v)) {
357         PyObject *utf8;
358         utf8 = PyUnicode_AsUTF8String(v);
359         if (utf8 == NULL) {
360             p->depth--;
361             p->error = WFERR_UNMARSHALLABLE;
362             return;
363         }
364         w_byte(TYPE_UNICODE, p);
365         w_pstring(PyString_AS_STRING(utf8), PyString_GET_SIZE(utf8), p);
366         Py_DECREF(utf8);
367     }
368 #endif
369     else if (PyTuple_CheckExact(v)) {
370         w_byte(TYPE_TUPLE, p);
371         n = PyTuple_Size(v);
372         W_SIZE(n, p);
373         for (i = 0; i < n; i++) {
374             w_object(PyTuple_GET_ITEM(v, i), p);
375         }
376     }
377     else if (PyList_CheckExact(v)) {
378         w_byte(TYPE_LIST, p);
379         n = PyList_GET_SIZE(v);
380         W_SIZE(n, p);
381         for (i = 0; i < n; i++) {
382             w_object(PyList_GET_ITEM(v, i), p);
383         }
384     }
385     else if (PyDict_CheckExact(v)) {
386         Py_ssize_t pos;
387         PyObject *key, *value;
388         w_byte(TYPE_DICT, p);
389         /* This one is NULL object terminated! */
390         pos = 0;
391         while (PyDict_Next(v, &pos, &key, &value)) {
392             w_object(key, p);
393             w_object(value, p);
394         }
395         w_object((PyObject *)NULL, p);
396     }
397     else if (PyAnySet_CheckExact(v)) {
398         PyObject *value, *it;
399 
400         if (PyObject_TypeCheck(v, &PySet_Type))
401             w_byte(TYPE_SET, p);
402         else
403             w_byte(TYPE_FROZENSET, p);
404         n = PyObject_Size(v);
405         if (n == -1) {
406             p->depth--;
407             p->error = WFERR_UNMARSHALLABLE;
408             return;
409         }
410         W_SIZE(n, p);
411         it = PyObject_GetIter(v);
412         if (it == NULL) {
413             p->depth--;
414             p->error = WFERR_UNMARSHALLABLE;
415             return;
416         }
417         while ((value = PyIter_Next(it)) != NULL) {
418             w_object(value, p);
419             Py_DECREF(value);
420         }
421         Py_DECREF(it);
422         if (PyErr_Occurred()) {
423             p->depth--;
424             p->error = WFERR_UNMARSHALLABLE;
425             return;
426         }
427     }
428     else if (PyCode_Check(v)) {
429         PyCodeObject *co = (PyCodeObject *)v;
430         w_byte(TYPE_CODE, p);
431         w_long(co->co_argcount, p);
432         w_long(co->co_nlocals, p);
433         w_long(co->co_stacksize, p);
434         w_long(co->co_flags, p);
435         w_object(co->co_code, p);
436         w_object(co->co_consts, p);
437         w_object(co->co_names, p);
438         w_object(co->co_varnames, p);
439         w_object(co->co_freevars, p);
440         w_object(co->co_cellvars, p);
441         w_object(co->co_filename, p);
442         w_object(co->co_name, p);
443         w_long(co->co_firstlineno, p);
444         w_object(co->co_lnotab, p);
445     }
446     else if (PyObject_CheckReadBuffer(v)) {
447         /* Write unknown buffer-style objects as a string */
448         char *s;
449         PyBufferProcs *pb = v->ob_type->tp_as_buffer;
450         w_byte(TYPE_STRING, p);
451         n = (*pb->bf_getreadbuffer)(v, 0, (void **)&s);
452         w_pstring(s, n, p);
453     }
454     else {
455         w_byte(TYPE_UNKNOWN, p);
456         p->error = WFERR_UNMARSHALLABLE;
457     }
458    exit:
459     p->depth--;
460 }
461 
462 /* version currently has no effect for writing longs. */
463 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)464 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
465 {
466     WFILE wf;
467     wf.fp = fp;
468     wf.error = WFERR_OK;
469     wf.depth = 0;
470     wf.strings = NULL;
471     wf.version = version;
472     w_long(x, &wf);
473 }
474 
475 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)476 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
477 {
478     WFILE wf;
479     wf.fp = fp;
480     wf.error = WFERR_OK;
481     wf.depth = 0;
482     wf.strings = (version > 0) ? PyDict_New() : NULL;
483     wf.version = version;
484     w_object(x, &wf);
485     Py_XDECREF(wf.strings);
486 }
487 
488 typedef WFILE RFILE; /* Same struct with different invariants */
489 
490 #define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF)
491 
492 #define r_byte(p) ((p)->fp ? getc((p)->fp) : rs_byte(p))
493 
494 static Py_ssize_t
r_string(char * s,Py_ssize_t n,RFILE * p)495 r_string(char *s, Py_ssize_t n, RFILE *p)
496 {
497     if (p->fp != NULL)
498         /* The result fits into int because it must be <=n. */
499         return fread(s, 1, n, p->fp);
500     if (p->end - p->ptr < n)
501         n = p->end - p->ptr;
502     memcpy(s, p->ptr, n);
503     p->ptr += n;
504     return n;
505 }
506 
507 static int
r_short(RFILE * p)508 r_short(RFILE *p)
509 {
510     register short x;
511     x = r_byte(p);
512     x |= r_byte(p) << 8;
513     /* Sign-extension, in case short greater than 16 bits */
514     x |= -(x & 0x8000);
515     return x;
516 }
517 
518 static long
r_long(RFILE * p)519 r_long(RFILE *p)
520 {
521     register long x;
522     register FILE *fp = p->fp;
523     if (fp) {
524         x = getc(fp);
525         x |= (long)getc(fp) << 8;
526         x |= (long)getc(fp) << 16;
527         x |= (long)getc(fp) << 24;
528     }
529     else {
530         x = rs_byte(p);
531         x |= (long)rs_byte(p) << 8;
532         x |= (long)rs_byte(p) << 16;
533         x |= (long)rs_byte(p) << 24;
534     }
535 #if SIZEOF_LONG > 4
536     /* Sign extension for 64-bit machines */
537     x |= -(x & 0x80000000L);
538 #endif
539     return x;
540 }
541 
542 /* r_long64 deals with the TYPE_INT64 code.  On a machine with
543    sizeof(long) > 4, it returns a Python int object, else a Python long
544    object.  Note that w_long64 writes out TYPE_INT if 32 bits is enough,
545    so there's no inefficiency here in returning a PyLong on 32-bit boxes
546    for everything written via TYPE_INT64 (i.e., if an int is written via
547    TYPE_INT64, it *needs* more than 32 bits).
548 */
549 static PyObject *
r_long64(RFILE * p)550 r_long64(RFILE *p)
551 {
552     long lo4 = r_long(p);
553     long hi4 = r_long(p);
554 #if SIZEOF_LONG > 4
555     long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL);
556     return PyInt_FromLong(x);
557 #else
558     unsigned char buf[8];
559     int one = 1;
560     int is_little_endian = (int)*(char*)&one;
561     if (is_little_endian) {
562         memcpy(buf, &lo4, 4);
563         memcpy(buf+4, &hi4, 4);
564     }
565     else {
566         memcpy(buf, &hi4, 4);
567         memcpy(buf+4, &lo4, 4);
568     }
569     return _PyLong_FromByteArray(buf, 8, is_little_endian, 1);
570 #endif
571 }
572 
573 static PyObject *
r_PyLong(RFILE * p)574 r_PyLong(RFILE *p)
575 {
576     PyLongObject *ob;
577     long n, size, i;
578     int j, md, shorts_in_top_digit;
579     digit d;
580 
581     n = r_long(p);
582     if (n == 0)
583         return (PyObject *)_PyLong_New(0);
584     if (n < -SIZE32_MAX || n > SIZE32_MAX) {
585         PyErr_SetString(PyExc_ValueError,
586                        "bad marshal data (long size out of range)");
587         return NULL;
588     }
589 
590     size = 1 + (ABS(n) - 1) / PyLong_MARSHAL_RATIO;
591     shorts_in_top_digit = 1 + (ABS(n) - 1) % PyLong_MARSHAL_RATIO;
592     ob = _PyLong_New(size);
593     if (ob == NULL)
594         return NULL;
595     Py_SIZE(ob) = n > 0 ? size : -size;
596 
597     for (i = 0; i < size-1; i++) {
598         d = 0;
599         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
600             md = r_short(p);
601             if (md < 0 || md > PyLong_MARSHAL_BASE)
602                 goto bad_digit;
603             d += (digit)md << j*PyLong_MARSHAL_SHIFT;
604         }
605         ob->ob_digit[i] = d;
606     }
607     d = 0;
608     for (j=0; j < shorts_in_top_digit; j++) {
609         md = r_short(p);
610         if (md < 0 || md > PyLong_MARSHAL_BASE)
611             goto bad_digit;
612         /* topmost marshal digit should be nonzero */
613         if (md == 0 && j == shorts_in_top_digit - 1) {
614             Py_DECREF(ob);
615             PyErr_SetString(PyExc_ValueError,
616                 "bad marshal data (unnormalized long data)");
617             return NULL;
618         }
619         d += (digit)md << j*PyLong_MARSHAL_SHIFT;
620     }
621     /* top digit should be nonzero, else the resulting PyLong won't be
622        normalized */
623     ob->ob_digit[size-1] = d;
624     return (PyObject *)ob;
625   bad_digit:
626     Py_DECREF(ob);
627     PyErr_SetString(PyExc_ValueError,
628                     "bad marshal data (digit out of range in long)");
629     return NULL;
630 }
631 
632 
633 static PyObject *
r_object(RFILE * p)634 r_object(RFILE *p)
635 {
636     /* NULL is a valid return value, it does not necessarily means that
637        an exception is set. */
638     PyObject *v, *v2;
639     long i, n;
640     int type = r_byte(p);
641     PyObject *retval;
642 
643     p->depth++;
644 
645     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
646         p->depth--;
647         PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
648         return NULL;
649     }
650 
651     switch (type) {
652 
653     case EOF:
654         PyErr_SetString(PyExc_EOFError,
655                         "EOF read where object expected");
656         retval = NULL;
657         break;
658 
659     case TYPE_NULL:
660         retval = NULL;
661         break;
662 
663     case TYPE_NONE:
664         Py_INCREF(Py_None);
665         retval = Py_None;
666         break;
667 
668     case TYPE_STOPITER:
669         Py_INCREF(PyExc_StopIteration);
670         retval = PyExc_StopIteration;
671         break;
672 
673     case TYPE_ELLIPSIS:
674         Py_INCREF(Py_Ellipsis);
675         retval = Py_Ellipsis;
676         break;
677 
678     case TYPE_FALSE:
679         Py_INCREF(Py_False);
680         retval = Py_False;
681         break;
682 
683     case TYPE_TRUE:
684         Py_INCREF(Py_True);
685         retval = Py_True;
686         break;
687 
688     case TYPE_INT:
689         retval = PyInt_FromLong(r_long(p));
690         break;
691 
692     case TYPE_INT64:
693         retval = r_long64(p);
694         break;
695 
696     case TYPE_LONG:
697         retval = r_PyLong(p);
698         break;
699 
700     case TYPE_FLOAT:
701         {
702             char buf[256];
703             double dx;
704             n = r_byte(p);
705             if (n == EOF || r_string(buf, n, p) != n) {
706                 PyErr_SetString(PyExc_EOFError,
707                     "EOF read where object expected");
708                 retval = NULL;
709                 break;
710             }
711             buf[n] = '\0';
712             dx = PyOS_string_to_double(buf, NULL, NULL);
713             if (dx == -1.0 && PyErr_Occurred()) {
714                 retval = NULL;
715                 break;
716             }
717             retval = PyFloat_FromDouble(dx);
718             break;
719         }
720 
721     case TYPE_BINARY_FLOAT:
722         {
723             unsigned char buf[8];
724             double x;
725             if (r_string((char*)buf, 8, p) != 8) {
726                 PyErr_SetString(PyExc_EOFError,
727                     "EOF read where object expected");
728                 retval = NULL;
729                 break;
730             }
731             x = _PyFloat_Unpack8(buf, 1);
732             if (x == -1.0 && PyErr_Occurred()) {
733                 retval = NULL;
734                 break;
735             }
736             retval = PyFloat_FromDouble(x);
737             break;
738         }
739 
740 #ifndef WITHOUT_COMPLEX
741     case TYPE_COMPLEX:
742         {
743             char buf[256];
744             Py_complex c;
745             n = r_byte(p);
746             if (n == EOF || r_string(buf, n, p) != n) {
747                 PyErr_SetString(PyExc_EOFError,
748                     "EOF read where object expected");
749                 retval = NULL;
750                 break;
751             }
752             buf[n] = '\0';
753             c.real = PyOS_string_to_double(buf, NULL, NULL);
754             if (c.real == -1.0 && PyErr_Occurred()) {
755                 retval = NULL;
756                 break;
757             }
758             n = r_byte(p);
759             if (n == EOF || r_string(buf, n, p) != n) {
760                 PyErr_SetString(PyExc_EOFError,
761                     "EOF read where object expected");
762                 retval = NULL;
763                 break;
764             }
765             buf[n] = '\0';
766             c.imag = PyOS_string_to_double(buf, NULL, NULL);
767             if (c.imag == -1.0 && PyErr_Occurred()) {
768                 retval = NULL;
769                 break;
770             }
771             retval = PyComplex_FromCComplex(c);
772             break;
773         }
774 
775     case TYPE_BINARY_COMPLEX:
776         {
777             unsigned char buf[8];
778             Py_complex c;
779             if (r_string((char*)buf, 8, p) != 8) {
780                 PyErr_SetString(PyExc_EOFError,
781                     "EOF read where object expected");
782                 retval = NULL;
783                 break;
784             }
785             c.real = _PyFloat_Unpack8(buf, 1);
786             if (c.real == -1.0 && PyErr_Occurred()) {
787                 retval = NULL;
788                 break;
789             }
790             if (r_string((char*)buf, 8, p) != 8) {
791                 PyErr_SetString(PyExc_EOFError,
792                     "EOF read where object expected");
793                 retval = NULL;
794                 break;
795             }
796             c.imag = _PyFloat_Unpack8(buf, 1);
797             if (c.imag == -1.0 && PyErr_Occurred()) {
798                 retval = NULL;
799                 break;
800             }
801             retval = PyComplex_FromCComplex(c);
802             break;
803         }
804 #endif
805 
806     case TYPE_INTERNED:
807     case TYPE_STRING:
808         n = r_long(p);
809         if (n < 0 || n > SIZE32_MAX) {
810             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
811             retval = NULL;
812             break;
813         }
814         v = PyString_FromStringAndSize((char *)NULL, n);
815         if (v == NULL) {
816             retval = NULL;
817             break;
818         }
819         if (r_string(PyString_AS_STRING(v), n, p) != n) {
820             Py_DECREF(v);
821             PyErr_SetString(PyExc_EOFError,
822                             "EOF read where object expected");
823             retval = NULL;
824             break;
825         }
826         if (type == TYPE_INTERNED) {
827             PyString_InternInPlace(&v);
828             if (PyList_Append(p->strings, v) < 0) {
829                 retval = NULL;
830                 break;
831             }
832         }
833         retval = v;
834         break;
835 
836     case TYPE_STRINGREF:
837         n = r_long(p);
838         if (n < 0 || n >= PyList_GET_SIZE(p->strings)) {
839             PyErr_SetString(PyExc_ValueError, "bad marshal data (string ref out of range)");
840             retval = NULL;
841             break;
842         }
843         v = PyList_GET_ITEM(p->strings, n);
844         Py_INCREF(v);
845         retval = v;
846         break;
847 
848 #ifdef Py_USING_UNICODE
849     case TYPE_UNICODE:
850         {
851         char *buffer;
852 
853         n = r_long(p);
854         if (n < 0 || n > SIZE32_MAX) {
855             PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)");
856             retval = NULL;
857             break;
858         }
859         buffer = PyMem_NEW(char, n);
860         if (buffer == NULL) {
861             retval = PyErr_NoMemory();
862             break;
863         }
864         if (r_string(buffer, n, p) != n) {
865             PyMem_DEL(buffer);
866             PyErr_SetString(PyExc_EOFError,
867                 "EOF read where object expected");
868             retval = NULL;
869             break;
870         }
871         v = PyUnicode_DecodeUTF8(buffer, n, NULL);
872         PyMem_DEL(buffer);
873         retval = v;
874         break;
875         }
876 #endif
877 
878     case TYPE_TUPLE:
879         n = r_long(p);
880         if (n < 0 || n > SIZE32_MAX) {
881             PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
882             retval = NULL;
883             break;
884         }
885         v = PyTuple_New(n);
886         if (v == NULL) {
887             retval = NULL;
888             break;
889         }
890         for (i = 0; i < n; i++) {
891             v2 = r_object(p);
892             if ( v2 == NULL ) {
893                 if (!PyErr_Occurred())
894                     PyErr_SetString(PyExc_TypeError,
895                         "NULL object in marshal data for tuple");
896                 Py_DECREF(v);
897                 v = NULL;
898                 break;
899             }
900             PyTuple_SET_ITEM(v, i, v2);
901         }
902         retval = v;
903         break;
904 
905     case TYPE_LIST:
906         n = r_long(p);
907         if (n < 0 || n > SIZE32_MAX) {
908             PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
909             retval = NULL;
910             break;
911         }
912         v = PyList_New(n);
913         if (v == NULL) {
914             retval = NULL;
915             break;
916         }
917         for (i = 0; i < n; i++) {
918             v2 = r_object(p);
919             if ( v2 == NULL ) {
920                 if (!PyErr_Occurred())
921                     PyErr_SetString(PyExc_TypeError,
922                         "NULL object in marshal data for list");
923                 Py_DECREF(v);
924                 v = NULL;
925                 break;
926             }
927             PyList_SET_ITEM(v, i, v2);
928         }
929         retval = v;
930         break;
931 
932     case TYPE_DICT:
933         v = PyDict_New();
934         if (v == NULL) {
935             retval = NULL;
936             break;
937         }
938         for (;;) {
939             PyObject *key, *val;
940             key = r_object(p);
941             if (key == NULL)
942                 break;
943             val = r_object(p);
944             if (val != NULL)
945                 PyDict_SetItem(v, key, val);
946             Py_DECREF(key);
947             Py_XDECREF(val);
948         }
949         if (PyErr_Occurred()) {
950             Py_DECREF(v);
951             v = NULL;
952         }
953         retval = v;
954         break;
955 
956     case TYPE_SET:
957     case TYPE_FROZENSET:
958         n = r_long(p);
959         if (n < 0 || n > SIZE32_MAX) {
960             PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
961             retval = NULL;
962             break;
963         }
964         v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
965         if (v == NULL) {
966             retval = NULL;
967             break;
968         }
969         for (i = 0; i < n; i++) {
970             v2 = r_object(p);
971             if ( v2 == NULL ) {
972                 if (!PyErr_Occurred())
973                     PyErr_SetString(PyExc_TypeError,
974                         "NULL object in marshal data for set");
975                 Py_DECREF(v);
976                 v = NULL;
977                 break;
978             }
979             if (PySet_Add(v, v2) == -1) {
980                 Py_DECREF(v);
981                 Py_DECREF(v2);
982                 v = NULL;
983                 break;
984             }
985             Py_DECREF(v2);
986         }
987         retval = v;
988         break;
989 
990     case TYPE_CODE:
991         if (PyEval_GetRestricted()) {
992             PyErr_SetString(PyExc_RuntimeError,
993                 "cannot unmarshal code objects in "
994                 "restricted execution mode");
995             retval = NULL;
996             break;
997         }
998         else {
999             int argcount;
1000             int nlocals;
1001             int stacksize;
1002             int flags;
1003             PyObject *code = NULL;
1004             PyObject *consts = NULL;
1005             PyObject *names = NULL;
1006             PyObject *varnames = NULL;
1007             PyObject *freevars = NULL;
1008             PyObject *cellvars = NULL;
1009             PyObject *filename = NULL;
1010             PyObject *name = NULL;
1011             int firstlineno;
1012             PyObject *lnotab = NULL;
1013 
1014             v = NULL;
1015 
1016             /* XXX ignore long->int overflows for now */
1017             argcount = (int)r_long(p);
1018             nlocals = (int)r_long(p);
1019             stacksize = (int)r_long(p);
1020             flags = (int)r_long(p);
1021             code = r_object(p);
1022             if (code == NULL)
1023                 goto code_error;
1024             consts = r_object(p);
1025             if (consts == NULL)
1026                 goto code_error;
1027             names = r_object(p);
1028             if (names == NULL)
1029                 goto code_error;
1030             varnames = r_object(p);
1031             if (varnames == NULL)
1032                 goto code_error;
1033             freevars = r_object(p);
1034             if (freevars == NULL)
1035                 goto code_error;
1036             cellvars = r_object(p);
1037             if (cellvars == NULL)
1038                 goto code_error;
1039             filename = r_object(p);
1040             if (filename == NULL)
1041                 goto code_error;
1042             name = r_object(p);
1043             if (name == NULL)
1044                 goto code_error;
1045             firstlineno = (int)r_long(p);
1046             lnotab = r_object(p);
1047             if (lnotab == NULL)
1048                 goto code_error;
1049 
1050             v = (PyObject *) PyCode_New(
1051                             argcount, nlocals, stacksize, flags,
1052                             code, consts, names, varnames,
1053                             freevars, cellvars, filename, name,
1054                             firstlineno, lnotab);
1055 
1056           code_error:
1057             Py_XDECREF(code);
1058             Py_XDECREF(consts);
1059             Py_XDECREF(names);
1060             Py_XDECREF(varnames);
1061             Py_XDECREF(freevars);
1062             Py_XDECREF(cellvars);
1063             Py_XDECREF(filename);
1064             Py_XDECREF(name);
1065             Py_XDECREF(lnotab);
1066 
1067         }
1068         retval = v;
1069         break;
1070 
1071     default:
1072         /* Bogus data got written, which isn't ideal.
1073            This will let you keep working and recover. */
1074         PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1075         retval = NULL;
1076         break;
1077 
1078     }
1079     p->depth--;
1080     return retval;
1081 }
1082 
1083 static PyObject *
read_object(RFILE * p)1084 read_object(RFILE *p)
1085 {
1086     PyObject *v;
1087     if (PyErr_Occurred()) {
1088         fprintf(stderr, "XXX readobject called with exception set\n");
1089         return NULL;
1090     }
1091     v = r_object(p);
1092     if (v == NULL && !PyErr_Occurred())
1093         PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1094     return v;
1095 }
1096 
1097 int
PyMarshal_ReadShortFromFile(FILE * fp)1098 PyMarshal_ReadShortFromFile(FILE *fp)
1099 {
1100     RFILE rf;
1101     assert(fp);
1102     rf.fp = fp;
1103     rf.strings = NULL;
1104     rf.end = rf.ptr = NULL;
1105     return r_short(&rf);
1106 }
1107 
1108 long
PyMarshal_ReadLongFromFile(FILE * fp)1109 PyMarshal_ReadLongFromFile(FILE *fp)
1110 {
1111     RFILE rf;
1112     rf.fp = fp;
1113     rf.strings = NULL;
1114     rf.ptr = rf.end = NULL;
1115     return r_long(&rf);
1116 }
1117 
1118 #ifdef HAVE_FSTAT
1119 /* Return size of file in bytes; < 0 if unknown. */
1120 static off_t
getfilesize(FILE * fp)1121 getfilesize(FILE *fp)
1122 {
1123     struct stat st;
1124     if (fstat(fileno(fp), &st) != 0)
1125         return -1;
1126     else
1127         return st.st_size;
1128 }
1129 #endif
1130 
1131 /* If we can get the size of the file up-front, and it's reasonably small,
1132  * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1133  * than reading a byte at a time from file; speeds .pyc imports.
1134  * CAUTION:  since this may read the entire remainder of the file, don't
1135  * call it unless you know you're done with the file.
1136  */
1137 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1138 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1139 {
1140 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1141 #define REASONABLE_FILE_LIMIT (1L << 18)
1142 #ifdef HAVE_FSTAT
1143     off_t filesize;
1144     filesize = getfilesize(fp);
1145     if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1146         char* pBuf = (char *)PyMem_MALLOC(filesize);
1147         if (pBuf != NULL) {
1148             size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1149             PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1150             PyMem_FREE(pBuf);
1151             return v;
1152         }
1153 
1154     }
1155 #endif
1156     /* We don't have fstat, or we do but the file is larger than
1157      * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1158      */
1159     return PyMarshal_ReadObjectFromFile(fp);
1160 
1161 #undef REASONABLE_FILE_LIMIT
1162 }
1163 
1164 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1165 PyMarshal_ReadObjectFromFile(FILE *fp)
1166 {
1167     RFILE rf;
1168     PyObject *result;
1169     rf.fp = fp;
1170     rf.strings = PyList_New(0);
1171     rf.depth = 0;
1172     rf.ptr = rf.end = NULL;
1173     result = r_object(&rf);
1174     Py_DECREF(rf.strings);
1175     return result;
1176 }
1177 
1178 PyObject *
PyMarshal_ReadObjectFromString(char * str,Py_ssize_t len)1179 PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len)
1180 {
1181     RFILE rf;
1182     PyObject *result;
1183     rf.fp = NULL;
1184     rf.ptr = str;
1185     rf.end = str + len;
1186     rf.strings = PyList_New(0);
1187     rf.depth = 0;
1188     result = r_object(&rf);
1189     Py_DECREF(rf.strings);
1190     return result;
1191 }
1192 
1193 static void
set_error(int error)1194 set_error(int error)
1195 {
1196     switch (error) {
1197     case WFERR_NOMEMORY:
1198         PyErr_NoMemory();
1199         break;
1200     case WFERR_UNMARSHALLABLE:
1201         PyErr_SetString(PyExc_ValueError, "unmarshallable object");
1202         break;
1203     case WFERR_NESTEDTOODEEP:
1204     default:
1205         PyErr_SetString(PyExc_ValueError,
1206             "object too deeply nested to marshal");
1207         break;
1208     }
1209 }
1210 
1211 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1212 PyMarshal_WriteObjectToString(PyObject *x, int version)
1213 {
1214     WFILE wf;
1215     wf.fp = NULL;
1216     wf.str = PyString_FromStringAndSize((char *)NULL, 50);
1217     if (wf.str == NULL)
1218         return NULL;
1219     wf.ptr = PyString_AS_STRING((PyStringObject *)wf.str);
1220     wf.end = wf.ptr + PyString_Size(wf.str);
1221     wf.error = WFERR_OK;
1222     wf.depth = 0;
1223     wf.version = version;
1224     wf.strings = (version > 0) ? PyDict_New() : NULL;
1225     w_object(x, &wf);
1226     Py_XDECREF(wf.strings);
1227     if (wf.str != NULL) {
1228         char *base = PyString_AS_STRING((PyStringObject *)wf.str);
1229         if (wf.ptr - base > PY_SSIZE_T_MAX) {
1230             Py_DECREF(wf.str);
1231             PyErr_SetString(PyExc_OverflowError,
1232                             "too much marshall data for a string");
1233             return NULL;
1234         }
1235         if (_PyString_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)))
1236             return NULL;
1237     }
1238     if (wf.error != WFERR_OK) {
1239         Py_XDECREF(wf.str);
1240         set_error(wf.error);
1241         return NULL;
1242     }
1243     return wf.str;
1244 }
1245 
1246 /* And an interface for Python programs... */
1247 
1248 static PyObject *
marshal_dump(PyObject * self,PyObject * args)1249 marshal_dump(PyObject *self, PyObject *args)
1250 {
1251     WFILE wf;
1252     PyObject *x;
1253     PyObject *f;
1254     int version = Py_MARSHAL_VERSION;
1255     if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version))
1256         return NULL;
1257     if (!PyFile_Check(f)) {
1258         PyErr_SetString(PyExc_TypeError,
1259                         "marshal.dump() 2nd arg must be file");
1260         return NULL;
1261     }
1262     wf.fp = PyFile_AsFile(f);
1263     wf.str = NULL;
1264     wf.ptr = wf.end = NULL;
1265     wf.error = WFERR_OK;
1266     wf.depth = 0;
1267     wf.strings = (version > 0) ? PyDict_New() : 0;
1268     wf.version = version;
1269     w_object(x, &wf);
1270     Py_XDECREF(wf.strings);
1271     if (wf.error != WFERR_OK) {
1272         set_error(wf.error);
1273         return NULL;
1274     }
1275     Py_INCREF(Py_None);
1276     return Py_None;
1277 }
1278 
1279 PyDoc_STRVAR(dump_doc,
1280 "dump(value, file[, version])\n\
1281 \n\
1282 Write the value on the open file. The value must be a supported type.\n\
1283 The file must be an open file object such as sys.stdout or returned by\n\
1284 open() or os.popen(). It must be opened in binary mode ('wb' or 'w+b').\n\
1285 \n\
1286 If the value has (or contains an object that has) an unsupported type, a\n\
1287 ValueError exception is raised — but garbage data will also be written\n\
1288 to the file. The object will not be properly read back by load()\n\
1289 \n\
1290 New in version 2.4: The version argument indicates the data format that\n\
1291 dump should use.");
1292 
1293 static PyObject *
marshal_load(PyObject * self,PyObject * f)1294 marshal_load(PyObject *self, PyObject *f)
1295 {
1296     RFILE rf;
1297     PyObject *result;
1298     if (!PyFile_Check(f)) {
1299         PyErr_SetString(PyExc_TypeError,
1300                         "marshal.load() arg must be file");
1301         return NULL;
1302     }
1303     rf.fp = PyFile_AsFile(f);
1304     rf.strings = PyList_New(0);
1305     rf.depth = 0;
1306     result = read_object(&rf);
1307     Py_DECREF(rf.strings);
1308     return result;
1309 }
1310 
1311 PyDoc_STRVAR(load_doc,
1312 "load(file)\n\
1313 \n\
1314 Read one value from the open file and return it. If no valid value is\n\
1315 read (e.g. because the data has a different Python version’s\n\
1316 incompatible marshal format), raise EOFError, ValueError or TypeError.\n\
1317 The file must be an open file object opened in binary mode ('rb' or\n\
1318 'r+b').\n\
1319 \n\
1320 Note: If an object containing an unsupported type was marshalled with\n\
1321 dump(), load() will substitute None for the unmarshallable type.");
1322 
1323 
1324 static PyObject *
marshal_dumps(PyObject * self,PyObject * args)1325 marshal_dumps(PyObject *self, PyObject *args)
1326 {
1327     PyObject *x;
1328     int version = Py_MARSHAL_VERSION;
1329     if (!PyArg_ParseTuple(args, "O|i:dumps", &x, &version))
1330         return NULL;
1331     return PyMarshal_WriteObjectToString(x, version);
1332 }
1333 
1334 PyDoc_STRVAR(dumps_doc,
1335 "dumps(value[, version])\n\
1336 \n\
1337 Return the string that would be written to a file by dump(value, file).\n\
1338 The value must be a supported type. Raise a ValueError exception if\n\
1339 value has (or contains an object that has) an unsupported type.\n\
1340 \n\
1341 New in version 2.4: The version argument indicates the data format that\n\
1342 dumps should use.");
1343 
1344 
1345 static PyObject *
marshal_loads(PyObject * self,PyObject * args)1346 marshal_loads(PyObject *self, PyObject *args)
1347 {
1348     RFILE rf;
1349     char *s;
1350     Py_ssize_t n;
1351     PyObject* result;
1352     if (!PyArg_ParseTuple(args, "s#:loads", &s, &n))
1353         return NULL;
1354     rf.fp = NULL;
1355     rf.ptr = s;
1356     rf.end = s + n;
1357     rf.strings = PyList_New(0);
1358     rf.depth = 0;
1359     result = read_object(&rf);
1360     Py_DECREF(rf.strings);
1361     return result;
1362 }
1363 
1364 PyDoc_STRVAR(loads_doc,
1365 "loads(string)\n\
1366 \n\
1367 Convert the string to a value. If no valid value is found, raise\n\
1368 EOFError, ValueError or TypeError. Extra characters in the string are\n\
1369 ignored.");
1370 
1371 static PyMethodDef marshal_methods[] = {
1372     {"dump",            marshal_dump,   METH_VARARGS,   dump_doc},
1373     {"load",            marshal_load,   METH_O,         load_doc},
1374     {"dumps",           marshal_dumps,  METH_VARARGS,   dumps_doc},
1375     {"loads",           marshal_loads,  METH_VARARGS,   loads_doc},
1376     {NULL,              NULL}           /* sentinel */
1377 };
1378 
1379 PyDoc_STRVAR(marshal_doc,
1380 "This module contains functions that can read and write Python values in\n\
1381 a binary format. The format is specific to Python, but independent of\n\
1382 machine architecture issues.\n\
1383 \n\
1384 Not all Python object types are supported; in general, only objects\n\
1385 whose value is independent from a particular invocation of Python can be\n\
1386 written and read by this module. The following types are supported:\n\
1387 None, integers, long integers, floating point numbers, strings, Unicode\n\
1388 objects, tuples, lists, sets, dictionaries, and code objects, where it\n\
1389 should be understood that tuples, lists and dictionaries are only\n\
1390 supported as long as the values contained therein are themselves\n\
1391 supported; and recursive lists and dictionaries should not be written\n\
1392 (they will cause infinite loops).\n\
1393 \n\
1394 Variables:\n\
1395 \n\
1396 version -- indicates the format that the module uses. Version 0 is the\n\
1397     historical format, version 1 (added in Python 2.4) shares interned\n\
1398     strings and version 2 (added in Python 2.5) uses a binary format for\n\
1399     floating point numbers. (New in version 2.4)\n\
1400 \n\
1401 Functions:\n\
1402 \n\
1403 dump() -- write value to a file\n\
1404 load() -- read value from a file\n\
1405 dumps() -- write value to a string\n\
1406 loads() -- read value from a string");
1407 
1408 
1409 PyMODINIT_FUNC
PyMarshal_Init(void)1410 PyMarshal_Init(void)
1411 {
1412     PyObject *mod = Py_InitModule3("marshal", marshal_methods,
1413         marshal_doc);
1414     if (mod == NULL)
1415         return;
1416     PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION);
1417 }
1418