• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include "structmember.h"
3 #include "osdefs.h"
4 #include "marshal.h"
5 #include <time.h>
6 
7 
8 #define IS_SOURCE   0x0
9 #define IS_BYTECODE 0x1
10 #define IS_PACKAGE  0x2
11 
12 struct st_zip_searchorder {
13     char suffix[14];
14     int type;
15 };
16 
17 /* zip_searchorder defines how we search for a module in the Zip
18    archive: we first search for a package __init__, then for
19    non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20    are swapped by initzipimport() if we run in optimized mode. Also,
21    '/' is replaced by SEP there. */
22 static struct st_zip_searchorder zip_searchorder[] = {
23     {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24     {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25     {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26     {".pyc", IS_BYTECODE},
27     {".pyo", IS_BYTECODE},
28     {".py", IS_SOURCE},
29     {"", 0}
30 };
31 
32 /* zipimporter object definition and support */
33 
34 typedef struct _zipimporter ZipImporter;
35 
36 struct _zipimporter {
37     PyObject_HEAD
38     PyObject *archive;  /* pathname of the Zip archive */
39     PyObject *prefix;   /* file prefix: "a/sub/directory/" */
40     PyObject *files;    /* dict with file info {path: toc_entry} */
41 };
42 
43 static PyObject *ZipImportError;
44 static PyObject *zip_directory_cache = NULL;
45 
46 // GOOGLE(nanzhang): Changed two functions below to be visible to launcher so
47 // that launcher can access the zip metadata section.
48 /* forward decls */
49 PyObject *read_directory(const char *archive);
50 PyObject *get_data(const char *archive, PyObject *toc_entry);
51 static PyObject *get_module_code(ZipImporter *self, char *fullname,
52                                  int *p_ispackage, char **p_modpath);
53 
54 
55 #define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56 
57 
58 /* zipimporter.__init__
59    Split the "subdirectory" from the Zip archive path, lookup a matching
60    entry in sys.path_importer_cache, fetch the file directory from there
61    if found, or else read it from the archive. */
62 static int
zipimporter_init(ZipImporter * self,PyObject * args,PyObject * kwds)63 zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
64 {
65     char *path, *p, *prefix, buf[MAXPATHLEN+2];
66     size_t len;
67 
68     if (!_PyArg_NoKeywords("zipimporter()", kwds))
69         return -1;
70 
71     if (!PyArg_ParseTuple(args, "s:zipimporter",
72                           &path))
73         return -1;
74 
75     len = strlen(path);
76     if (len == 0) {
77         PyErr_SetString(ZipImportError, "archive path is empty");
78         return -1;
79     }
80     if (len >= MAXPATHLEN) {
81         PyErr_SetString(ZipImportError,
82                         "archive path too long");
83         return -1;
84     }
85     strcpy(buf, path);
86 
87 #ifdef ALTSEP
88     for (p = buf; *p; p++) {
89         if (*p == ALTSEP)
90             *p = SEP;
91     }
92 #endif
93 
94     path = NULL;
95     prefix = NULL;
96     for (;;) {
97 #ifndef RISCOS
98         struct stat statbuf;
99         int rv;
100 
101         rv = stat(buf, &statbuf);
102         if (rv == 0) {
103             /* it exists */
104             if (S_ISREG(statbuf.st_mode))
105                 /* it's a file */
106                 path = buf;
107             break;
108         }
109 #else
110         if (object_exists(buf)) {
111             /* it exists */
112             if (isfile(buf))
113                 /* it's a file */
114                 path = buf;
115             break;
116         }
117 #endif
118         /* back up one path element */
119         p = strrchr(buf, SEP);
120         if (prefix != NULL)
121             *prefix = SEP;
122         if (p == NULL)
123             break;
124         *p = '\0';
125         prefix = p;
126     }
127     if (path != NULL) {
128         PyObject *files;
129         files = PyDict_GetItemString(zip_directory_cache, path);
130         if (files == NULL) {
131             files = read_directory(buf);
132             if (files == NULL)
133                 return -1;
134             if (PyDict_SetItemString(zip_directory_cache, path,
135                                      files) != 0)
136                 return -1;
137         }
138         else
139             Py_INCREF(files);
140         self->files = files;
141     }
142     else {
143         PyErr_SetString(ZipImportError, "not a Zip file");
144         return -1;
145     }
146 
147     if (prefix == NULL)
148         prefix = "";
149     else {
150         prefix++;
151         len = strlen(prefix);
152         if (prefix[len-1] != SEP) {
153             /* add trailing SEP */
154             prefix[len] = SEP;
155             prefix[len + 1] = '\0';
156         }
157     }
158 
159     self->archive = PyString_FromString(buf);
160     if (self->archive == NULL)
161         return -1;
162 
163     self->prefix = PyString_FromString(prefix);
164     if (self->prefix == NULL)
165         return -1;
166 
167     return 0;
168 }
169 
170 /* GC support. */
171 static int
zipimporter_traverse(PyObject * obj,visitproc visit,void * arg)172 zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
173 {
174     ZipImporter *self = (ZipImporter *)obj;
175     Py_VISIT(self->files);
176     return 0;
177 }
178 
179 static void
zipimporter_dealloc(ZipImporter * self)180 zipimporter_dealloc(ZipImporter *self)
181 {
182     PyObject_GC_UnTrack(self);
183     Py_XDECREF(self->archive);
184     Py_XDECREF(self->prefix);
185     Py_XDECREF(self->files);
186     Py_TYPE(self)->tp_free((PyObject *)self);
187 }
188 
189 static PyObject *
zipimporter_repr(ZipImporter * self)190 zipimporter_repr(ZipImporter *self)
191 {
192     char buf[500];
193     char *archive = "???";
194     char *prefix = "";
195 
196     if (self->archive != NULL && PyString_Check(self->archive))
197         archive = PyString_AsString(self->archive);
198     if (self->prefix != NULL && PyString_Check(self->prefix))
199         prefix = PyString_AsString(self->prefix);
200     if (prefix != NULL && *prefix)
201         PyOS_snprintf(buf, sizeof(buf),
202                       "<zipimporter object \"%.300s%c%.150s\">",
203                       archive, SEP, prefix);
204     else
205         PyOS_snprintf(buf, sizeof(buf),
206                       "<zipimporter object \"%.300s\">",
207                       archive);
208     return PyString_FromString(buf);
209 }
210 
211 /* return fullname.split(".")[-1] */
212 static char *
get_subname(char * fullname)213 get_subname(char *fullname)
214 {
215     char *subname = strrchr(fullname, '.');
216     if (subname == NULL)
217         subname = fullname;
218     else
219         subname++;
220     return subname;
221 }
222 
223 /* Given a (sub)modulename, write the potential file path in the
224    archive (without extension) to the path buffer. Return the
225    length of the resulting string. */
226 static int
make_filename(char * prefix,char * name,char * path)227 make_filename(char *prefix, char *name, char *path)
228 {
229     size_t len;
230     char *p;
231 
232     len = strlen(prefix);
233 
234     /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
235     if (len + strlen(name) + 13 >= MAXPATHLEN) {
236         PyErr_SetString(ZipImportError, "path too long");
237         return -1;
238     }
239 
240     strcpy(path, prefix);
241     strcpy(path + len, name);
242     for (p = path + len; *p; p++) {
243         if (*p == '.')
244             *p = SEP;
245     }
246     len += strlen(name);
247     assert(len < INT_MAX);
248     return (int)len;
249 }
250 
251 enum zi_module_info {
252     MI_ERROR,
253     MI_NOT_FOUND,
254     MI_MODULE,
255     MI_PACKAGE
256 };
257 
258 /* Return some information about a module. */
259 static enum zi_module_info
get_module_info(ZipImporter * self,char * fullname)260 get_module_info(ZipImporter *self, char *fullname)
261 {
262     char *subname, path[MAXPATHLEN + 1];
263     int len;
264     struct st_zip_searchorder *zso;
265 
266     subname = get_subname(fullname);
267 
268     len = make_filename(PyString_AsString(self->prefix), subname, path);
269     if (len < 0)
270         return MI_ERROR;
271 
272     for (zso = zip_searchorder; *zso->suffix; zso++) {
273         strcpy(path + len, zso->suffix);
274         if (PyDict_GetItemString(self->files, path) != NULL) {
275             if (zso->type & IS_PACKAGE)
276                 return MI_PACKAGE;
277             else
278                 return MI_MODULE;
279         }
280     }
281     return MI_NOT_FOUND;
282 }
283 
284 /* Check whether we can satisfy the import of the module named by
285    'fullname'. Return self if we can, None if we can't. */
286 static PyObject *
zipimporter_find_module(PyObject * obj,PyObject * args)287 zipimporter_find_module(PyObject *obj, PyObject *args)
288 {
289     ZipImporter *self = (ZipImporter *)obj;
290     PyObject *path = NULL;
291     char *fullname;
292     enum zi_module_info mi;
293 
294     if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
295                           &fullname, &path))
296         return NULL;
297 
298     mi = get_module_info(self, fullname);
299     if (mi == MI_ERROR)
300         return NULL;
301     if (mi == MI_NOT_FOUND) {
302         Py_INCREF(Py_None);
303         return Py_None;
304     }
305     Py_INCREF(self);
306     return (PyObject *)self;
307 }
308 
309 /* Load and return the module named by 'fullname'. */
310 static PyObject *
zipimporter_load_module(PyObject * obj,PyObject * args)311 zipimporter_load_module(PyObject *obj, PyObject *args)
312 {
313     ZipImporter *self = (ZipImporter *)obj;
314     PyObject *code, *mod, *dict;
315     char *fullname, *modpath;
316     int ispackage;
317 
318     if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
319                           &fullname))
320         return NULL;
321 
322     code = get_module_code(self, fullname, &ispackage, &modpath);
323     if (code == NULL)
324         return NULL;
325 
326     mod = PyImport_AddModule(fullname);
327     if (mod == NULL) {
328         Py_DECREF(code);
329         return NULL;
330     }
331     dict = PyModule_GetDict(mod);
332 
333     /* mod.__loader__ = self */
334     if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
335         goto error;
336 
337     if (ispackage) {
338         /* add __path__ to the module *before* the code gets
339            executed */
340         PyObject *pkgpath, *fullpath;
341         char *prefix = PyString_AsString(self->prefix);
342         char *subname = get_subname(fullname);
343         int err;
344 
345         fullpath = PyString_FromFormat("%s%c%s%s",
346                                 PyString_AsString(self->archive),
347                                 SEP,
348                                 *prefix ? prefix : "",
349                                 subname);
350         if (fullpath == NULL)
351             goto error;
352 
353         pkgpath = Py_BuildValue("[O]", fullpath);
354         Py_DECREF(fullpath);
355         if (pkgpath == NULL)
356             goto error;
357         err = PyDict_SetItemString(dict, "__path__", pkgpath);
358         Py_DECREF(pkgpath);
359         if (err != 0)
360             goto error;
361     }
362     mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
363     Py_DECREF(code);
364     if (Py_VerboseFlag)
365         PySys_WriteStderr("import %s # loaded from Zip %s\n",
366                           fullname, modpath);
367     return mod;
368 error:
369     Py_DECREF(code);
370     Py_DECREF(mod);
371     return NULL;
372 }
373 
374 /* Return a string matching __file__ for the named module */
375 static PyObject *
zipimporter_get_filename(PyObject * obj,PyObject * args)376 zipimporter_get_filename(PyObject *obj, PyObject *args)
377 {
378     ZipImporter *self = (ZipImporter *)obj;
379     PyObject *code;
380     char *fullname, *modpath;
381     int ispackage;
382 
383     if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
384                          &fullname))
385         return NULL;
386 
387     /* Deciding the filename requires working out where the code
388        would come from if the module was actually loaded */
389     code = get_module_code(self, fullname, &ispackage, &modpath);
390     if (code == NULL)
391         return NULL;
392     Py_DECREF(code); /* Only need the path info */
393 
394     return PyString_FromString(modpath);
395 }
396 
397 /* Return a bool signifying whether the module is a package or not. */
398 static PyObject *
zipimporter_is_package(PyObject * obj,PyObject * args)399 zipimporter_is_package(PyObject *obj, PyObject *args)
400 {
401     ZipImporter *self = (ZipImporter *)obj;
402     char *fullname;
403     enum zi_module_info mi;
404 
405     if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
406                           &fullname))
407         return NULL;
408 
409     mi = get_module_info(self, fullname);
410     if (mi == MI_ERROR)
411         return NULL;
412     if (mi == MI_NOT_FOUND) {
413         PyErr_Format(ZipImportError, "can't find module '%.200s'",
414                      fullname);
415         return NULL;
416     }
417     return PyBool_FromLong(mi == MI_PACKAGE);
418 }
419 
420 static PyObject *
zipimporter_get_data(PyObject * obj,PyObject * args)421 zipimporter_get_data(PyObject *obj, PyObject *args)
422 {
423     ZipImporter *self = (ZipImporter *)obj;
424     char *path;
425 #ifdef ALTSEP
426     char *p, buf[MAXPATHLEN + 1];
427 #endif
428     PyObject *toc_entry;
429     Py_ssize_t len;
430 
431     if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
432         return NULL;
433 
434 #ifdef ALTSEP
435     if (strlen(path) >= MAXPATHLEN) {
436         PyErr_SetString(ZipImportError, "path too long");
437         return NULL;
438     }
439     strcpy(buf, path);
440     for (p = buf; *p; p++) {
441         if (*p == ALTSEP)
442             *p = SEP;
443     }
444     path = buf;
445 #endif
446     len = PyString_Size(self->archive);
447     if ((size_t)len < strlen(path) &&
448         strncmp(path, PyString_AsString(self->archive), len) == 0 &&
449         path[len] == SEP) {
450         path = path + len + 1;
451     }
452 
453     toc_entry = PyDict_GetItemString(self->files, path);
454     if (toc_entry == NULL) {
455         PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
456         return NULL;
457     }
458     return get_data(PyString_AsString(self->archive), toc_entry);
459 }
460 
461 static PyObject *
zipimporter_get_code(PyObject * obj,PyObject * args)462 zipimporter_get_code(PyObject *obj, PyObject *args)
463 {
464     ZipImporter *self = (ZipImporter *)obj;
465     char *fullname;
466 
467     if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
468         return NULL;
469 
470     return get_module_code(self, fullname, NULL, NULL);
471 }
472 
473 static PyObject *
zipimporter_get_source(PyObject * obj,PyObject * args)474 zipimporter_get_source(PyObject *obj, PyObject *args)
475 {
476     ZipImporter *self = (ZipImporter *)obj;
477     PyObject *toc_entry;
478     char *fullname, *subname, path[MAXPATHLEN+1];
479     int len;
480     enum zi_module_info mi;
481 
482     if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
483         return NULL;
484 
485     mi = get_module_info(self, fullname);
486     if (mi == MI_ERROR)
487         return NULL;
488     if (mi == MI_NOT_FOUND) {
489         PyErr_Format(ZipImportError, "can't find module '%.200s'",
490                      fullname);
491         return NULL;
492     }
493     subname = get_subname(fullname);
494 
495     len = make_filename(PyString_AsString(self->prefix), subname, path);
496     if (len < 0)
497         return NULL;
498 
499     if (mi == MI_PACKAGE) {
500         path[len] = SEP;
501         strcpy(path + len + 1, "__init__.py");
502     }
503     else
504         strcpy(path + len, ".py");
505 
506     toc_entry = PyDict_GetItemString(self->files, path);
507     if (toc_entry != NULL)
508         return get_data(PyString_AsString(self->archive), toc_entry);
509 
510     /* we have the module, but no source */
511     Py_INCREF(Py_None);
512     return Py_None;
513 }
514 
515 PyDoc_STRVAR(doc_find_module,
516 "find_module(fullname, path=None) -> self or None.\n\
517 \n\
518 Search for a module specified by 'fullname'. 'fullname' must be the\n\
519 fully qualified (dotted) module name. It returns the zipimporter\n\
520 instance itself if the module was found, or None if it wasn't.\n\
521 The optional 'path' argument is ignored -- it's there for compatibility\n\
522 with the importer protocol.");
523 
524 PyDoc_STRVAR(doc_load_module,
525 "load_module(fullname) -> module.\n\
526 \n\
527 Load the module specified by 'fullname'. 'fullname' must be the\n\
528 fully qualified (dotted) module name. It returns the imported\n\
529 module, or raises ZipImportError if it wasn't found.");
530 
531 PyDoc_STRVAR(doc_get_data,
532 "get_data(pathname) -> string with file data.\n\
533 \n\
534 Return the data associated with 'pathname'. Raise IOError if\n\
535 the file wasn't found.");
536 
537 PyDoc_STRVAR(doc_is_package,
538 "is_package(fullname) -> bool.\n\
539 \n\
540 Return True if the module specified by fullname is a package.\n\
541 Raise ZipImportError if the module couldn't be found.");
542 
543 PyDoc_STRVAR(doc_get_code,
544 "get_code(fullname) -> code object.\n\
545 \n\
546 Return the code object for the specified module. Raise ZipImportError\n\
547 if the module couldn't be found.");
548 
549 PyDoc_STRVAR(doc_get_source,
550 "get_source(fullname) -> source string.\n\
551 \n\
552 Return the source code for the specified module. Raise ZipImportError\n\
553 if the module couldn't be found, return None if the archive does\n\
554 contain the module, but has no source for it.");
555 
556 
557 PyDoc_STRVAR(doc_get_filename,
558 "get_filename(fullname) -> filename string.\n\
559 \n\
560 Return the filename for the specified module.");
561 
562 static PyMethodDef zipimporter_methods[] = {
563     {"find_module", zipimporter_find_module, METH_VARARGS,
564      doc_find_module},
565     {"load_module", zipimporter_load_module, METH_VARARGS,
566      doc_load_module},
567     {"get_data", zipimporter_get_data, METH_VARARGS,
568      doc_get_data},
569     {"get_code", zipimporter_get_code, METH_VARARGS,
570      doc_get_code},
571     {"get_source", zipimporter_get_source, METH_VARARGS,
572      doc_get_source},
573     {"get_filename", zipimporter_get_filename, METH_VARARGS,
574      doc_get_filename},
575     {"is_package", zipimporter_is_package, METH_VARARGS,
576      doc_is_package},
577     {NULL,              NULL}   /* sentinel */
578 };
579 
580 static PyMemberDef zipimporter_members[] = {
581     {"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
582     {"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
583     {"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
584     {NULL}
585 };
586 
587 PyDoc_STRVAR(zipimporter_doc,
588 "zipimporter(archivepath) -> zipimporter object\n\
589 \n\
590 Create a new zipimporter instance. 'archivepath' must be a path to\n\
591 a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
592 '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
593 valid directory inside the archive.\n\
594 \n\
595 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
596 archive.\n\
597 \n\
598 The 'archive' attribute of zipimporter objects contains the name of the\n\
599 zipfile targeted.");
600 
601 #define DEFERRED_ADDRESS(ADDR) 0
602 
603 static PyTypeObject ZipImporter_Type = {
604     PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
605     "zipimport.zipimporter",
606     sizeof(ZipImporter),
607     0,                                          /* tp_itemsize */
608     (destructor)zipimporter_dealloc,            /* tp_dealloc */
609     0,                                          /* tp_print */
610     0,                                          /* tp_getattr */
611     0,                                          /* tp_setattr */
612     0,                                          /* tp_compare */
613     (reprfunc)zipimporter_repr,                 /* tp_repr */
614     0,                                          /* tp_as_number */
615     0,                                          /* tp_as_sequence */
616     0,                                          /* tp_as_mapping */
617     0,                                          /* tp_hash */
618     0,                                          /* tp_call */
619     0,                                          /* tp_str */
620     PyObject_GenericGetAttr,                    /* tp_getattro */
621     0,                                          /* tp_setattro */
622     0,                                          /* tp_as_buffer */
623     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
624         Py_TPFLAGS_HAVE_GC,                     /* tp_flags */
625     zipimporter_doc,                            /* tp_doc */
626     zipimporter_traverse,                       /* tp_traverse */
627     0,                                          /* tp_clear */
628     0,                                          /* tp_richcompare */
629     0,                                          /* tp_weaklistoffset */
630     0,                                          /* tp_iter */
631     0,                                          /* tp_iternext */
632     zipimporter_methods,                        /* tp_methods */
633     zipimporter_members,                        /* tp_members */
634     0,                                          /* tp_getset */
635     0,                                          /* tp_base */
636     0,                                          /* tp_dict */
637     0,                                          /* tp_descr_get */
638     0,                                          /* tp_descr_set */
639     0,                                          /* tp_dictoffset */
640     (initproc)zipimporter_init,                 /* tp_init */
641     PyType_GenericAlloc,                        /* tp_alloc */
642     PyType_GenericNew,                          /* tp_new */
643     PyObject_GC_Del,                            /* tp_free */
644 };
645 
646 
647 /* implementation */
648 
649 /* Given a buffer, return the unsigned int that is represented by the first
650    4 bytes, encoded as little endian. This partially reimplements
651    marshal.c:r_long() */
652 static unsigned int
get_uint32(const unsigned char * buf)653 get_uint32(const unsigned char *buf)
654 {
655     unsigned int x;
656     x =  buf[0];
657     x |= (unsigned int)buf[1] <<  8;
658     x |= (unsigned int)buf[2] << 16;
659     x |= (unsigned int)buf[3] << 24;
660     return x;
661 }
662 
663 /* Given a buffer, return the unsigned int that is represented by the first
664    2 bytes, encoded as little endian. This partially reimplements
665    marshal.c:r_short() */
666 static unsigned short
get_uint16(const unsigned char * buf)667 get_uint16(const unsigned char *buf)
668 {
669     unsigned short x;
670     x =  buf[0];
671     x |= (unsigned short)buf[1] <<  8;
672     return x;
673 }
674 
675 static void
set_file_error(const char * archive,int eof)676 set_file_error(const char *archive, int eof)
677 {
678     if (eof) {
679         PyErr_SetString(PyExc_EOFError, "EOF read where not expected");
680     }
681     else {
682         PyErr_SetFromErrnoWithFilename(PyExc_IOError, archive);
683     }
684 }
685 
686 /*
687    read_directory(archive) -> files dict (new reference)
688 
689    Given a path to a Zip archive, build a dict, mapping file names
690    (local to the archive, using SEP as a separator) to toc entries.
691 
692    A toc_entry is a tuple:
693 
694    (__file__,      # value to use for __file__, available for all files
695     compress,      # compression kind; 0 for uncompressed
696     data_size,     # size of compressed data on disk
697     file_size,     # size of decompressed data
698     file_offset,   # offset of file header from start of archive
699     time,          # mod time of file (in dos format)
700     date,          # mod data of file (in dos format)
701     crc,           # crc checksum of the data
702    )
703 
704    Directories can be recognized by the trailing SEP in the name,
705    data_size and file_offset are 0.
706 */
707 PyObject *
read_directory(const char * archive)708 read_directory(const char *archive)
709 {
710     PyObject *files = NULL;
711     FILE *fp;
712     unsigned short compress, time, date, name_size;
713     unsigned int crc, data_size, file_size, header_size, header_offset;
714     unsigned long file_offset, header_position;
715     unsigned long arc_offset;  /* Absolute offset to start of the zip-archive. */
716     unsigned int count, i;
717     unsigned char buffer[46];
718     size_t length;
719     char path[MAXPATHLEN + 5];
720     char name[MAXPATHLEN + 5];
721     const char *errmsg = NULL;
722 
723     if (strlen(archive) > MAXPATHLEN) {
724         PyErr_SetString(PyExc_OverflowError,
725                         "Zip path name is too long");
726         return NULL;
727     }
728     strcpy(path, archive);
729 
730     fp = fopen(archive, "rb");
731     if (fp == NULL) {
732         PyErr_Format(ZipImportError, "can't open Zip file: "
733                      "'%.200s'", archive);
734         return NULL;
735     }
736 
737     if (fseek(fp, -22, SEEK_END) == -1) {
738         goto file_error;
739     }
740     header_position = (unsigned long)ftell(fp);
741     if (header_position == (unsigned long)-1) {
742         goto file_error;
743     }
744     assert(header_position <= (unsigned long)LONG_MAX);
745     if (fread(buffer, 1, 22, fp) != 22) {
746         goto file_error;
747     }
748     if (get_uint32(buffer) != 0x06054B50u) {
749         /* Bad: End of Central Dir signature */
750         errmsg = "not a Zip file";
751         goto invalid_header;
752     }
753 
754     header_size = get_uint32(buffer + 12);
755     header_offset = get_uint32(buffer + 16);
756     if (header_position < header_size) {
757         errmsg = "bad central directory size";
758         goto invalid_header;
759     }
760     if (header_position < header_offset) {
761         errmsg = "bad central directory offset";
762         goto invalid_header;
763     }
764     if (header_position - header_size < header_offset) {
765         errmsg = "bad central directory size or offset";
766         goto invalid_header;
767     }
768     header_position -= header_size;
769     arc_offset = header_position - header_offset;
770 
771     files = PyDict_New();
772     if (files == NULL) {
773         goto error;
774     }
775 
776     length = (long)strlen(path);
777     path[length] = SEP;
778 
779     /* Start of Central Directory */
780     count = 0;
781     if (fseek(fp, (long)header_position, 0) == -1) {
782         goto file_error;
783     }
784     for (;;) {
785         PyObject *t;
786         size_t n;
787         int err;
788 
789         n = fread(buffer, 1, 46, fp);
790         if (n < 4) {
791             goto eof_error;
792         }
793         /* Start of file header */
794         if (get_uint32(buffer) != 0x02014B50u) {
795             break;              /* Bad: Central Dir File Header */
796         }
797         if (n != 46) {
798             goto eof_error;
799         }
800         compress = get_uint16(buffer + 10);
801         time = get_uint16(buffer + 12);
802         date = get_uint16(buffer + 14);
803         crc = get_uint32(buffer + 16);
804         data_size = get_uint32(buffer + 20);
805         file_size = get_uint32(buffer + 24);
806         name_size = get_uint16(buffer + 28);
807         header_size = (unsigned int)name_size +
808            get_uint16(buffer + 30) /* extra field */ +
809            get_uint16(buffer + 32) /* comment */;
810 
811         file_offset = get_uint32(buffer + 42);
812         if (file_offset > header_offset) {
813             errmsg = "bad local header offset";
814             goto invalid_header;
815         }
816         file_offset += arc_offset;
817 
818         if (name_size > MAXPATHLEN) {
819             name_size = MAXPATHLEN;
820         }
821         if (fread(name, 1, name_size, fp) != name_size) {
822             goto file_error;
823         }
824         name[name_size] = '\0';  /* Add terminating null byte */
825         if (SEP != '/') {
826             for (i = 0; i < name_size; i++) {
827                 if (name[i] == '/') {
828                     name[i] = SEP;
829                 }
830             }
831         }
832         /* Skip the rest of the header.
833          * On Windows, calling fseek to skip over the fields we don't use is
834          * slower than reading the data because fseek flushes stdio's
835          * internal buffers.  See issue #8745. */
836         assert(header_size <= 3*0xFFFFu);
837         for (i = name_size; i < header_size; i++) {
838             if (getc(fp) == EOF) {
839                 goto file_error;
840             }
841         }
842 
843         strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
844 
845         t = Py_BuildValue("sHIIkHHI", path, compress, data_size,
846                           file_size, file_offset, time, date, crc);
847         if (t == NULL) {
848             goto error;
849         }
850         err = PyDict_SetItemString(files, name, t);
851         Py_DECREF(t);
852         if (err != 0) {
853             goto error;
854         }
855         count++;
856     }
857     fclose(fp);
858     if (Py_VerboseFlag) {
859         PySys_WriteStderr("# zipimport: found %u names in %.200s\n",
860                            count, archive);
861     }
862     return files;
863 
864 eof_error:
865     set_file_error(archive, !ferror(fp));
866     goto error;
867 
868 file_error:
869     PyErr_Format(ZipImportError, "can't read Zip file: %.200s", archive);
870     goto error;
871 
872 invalid_header:
873     assert(errmsg != NULL);
874     PyErr_Format(ZipImportError, "%s: %.200s", errmsg, archive);
875     goto error;
876 
877 error:
878     fclose(fp);
879     Py_XDECREF(files);
880     return NULL;
881 }
882 
883 /* Return the zlib.decompress function object, or NULL if zlib couldn't
884    be imported. The function is cached when found, so subsequent calls
885    don't import zlib again. */
886 static PyObject *
get_decompress_func(void)887 get_decompress_func(void)
888 {
889     static int importing_zlib = 0;
890     PyObject *zlib;
891     PyObject *decompress;
892 
893     if (importing_zlib != 0)
894         /* Someone has a zlib.py[co] in their Zip file;
895            let's avoid a stack overflow. */
896         return NULL;
897     importing_zlib = 1;
898     zlib = PyImport_ImportModuleNoBlock("zlib");
899     importing_zlib = 0;
900     if (zlib != NULL) {
901         decompress = PyObject_GetAttrString(zlib,
902                                             "decompress");
903         Py_DECREF(zlib);
904     }
905     else {
906         PyErr_Clear();
907         decompress = NULL;
908     }
909     if (Py_VerboseFlag)
910         PySys_WriteStderr("# zipimport: zlib %s\n",
911             zlib != NULL ? "available": "UNAVAILABLE");
912     return decompress;
913 }
914 
915 /* Given a path to a Zip file and a toc_entry, return the (uncompressed)
916    data as a new reference. */
917 PyObject *
get_data(const char * archive,PyObject * toc_entry)918 get_data(const char *archive, PyObject *toc_entry)
919 {
920     PyObject *raw_data = NULL, *data, *decompress;
921     char *buf;
922     FILE *fp;
923     const char *datapath;
924     unsigned short compress, time, date;
925     unsigned int crc;
926     Py_ssize_t data_size, file_size;
927     long file_offset, header_size;
928     unsigned char buffer[30];
929     const char *errmsg = NULL;
930 
931     if (!PyArg_ParseTuple(toc_entry, "sHnnlHHI", &datapath, &compress,
932                           &data_size, &file_size, &file_offset, &time,
933                           &date, &crc)) {
934         return NULL;
935     }
936     if (data_size < 0) {
937         PyErr_Format(ZipImportError, "negative data size");
938         return NULL;
939     }
940 
941     fp = fopen(archive, "rb");
942     if (!fp) {
943         PyErr_Format(PyExc_IOError,
944            "zipimport: can not open file %s", archive);
945         return NULL;
946     }
947 
948     /* Check to make sure the local file header is correct */
949     if (fseek(fp, file_offset, 0) == -1) {
950         goto file_error;
951     }
952     if (fread(buffer, 1, 30, fp) != 30) {
953         goto eof_error;
954     }
955     if (get_uint32(buffer) != 0x04034B50u) {
956         /* Bad: Local File Header */
957         errmsg = "bad local file header";
958         goto invalid_header;
959     }
960 
961     header_size = (unsigned int)30 +
962         get_uint16(buffer + 26) /* file name */ +
963         get_uint16(buffer + 28) /* extra field */;
964     if (file_offset > LONG_MAX - header_size) {
965         errmsg = "bad local file header size";
966         goto invalid_header;
967     }
968     file_offset += header_size;  /* Start of file data */
969 
970     if (data_size > LONG_MAX - 1) {
971         fclose(fp);
972         PyErr_NoMemory();
973         return NULL;
974     }
975     raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
976                                           data_size : data_size + 1);
977 
978     if (raw_data == NULL) {
979         goto error;
980     }
981     buf = PyString_AsString(raw_data);
982 
983     if (fseek(fp, file_offset, 0) == -1) {
984         goto file_error;
985     }
986     if (fread(buf, 1, data_size, fp) != (size_t)data_size) {
987         PyErr_SetString(PyExc_IOError,
988                         "zipimport: can't read data");
989         goto error;
990     }
991 
992     fclose(fp);
993     fp = NULL;
994 
995     if (compress != 0) {
996         buf[data_size] = 'Z';  /* saw this in zipfile.py */
997         data_size++;
998     }
999     buf[data_size] = '\0';
1000 
1001     if (compress == 0)  /* data is not compressed */
1002         return raw_data;
1003 
1004     /* Decompress with zlib */
1005     decompress = get_decompress_func();
1006     if (decompress == NULL) {
1007         PyErr_SetString(ZipImportError,
1008                         "can't decompress data; "
1009                         "zlib not available");
1010         goto error;
1011     }
1012     data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
1013     Py_DECREF(decompress);
1014     Py_DECREF(raw_data);
1015     return data;
1016 
1017 eof_error:
1018     set_file_error(archive, !ferror(fp));
1019     goto error;
1020 
1021 file_error:
1022     PyErr_Format(ZipImportError, "can't read Zip file: %.200s", archive);
1023     goto error;
1024 
1025 invalid_header:
1026     assert(errmsg != NULL);
1027     PyErr_Format(ZipImportError, "%s: %.200s", errmsg, archive);
1028     goto error;
1029 
1030 error:
1031     if (fp != NULL) {
1032         fclose(fp);
1033     }
1034     Py_XDECREF(raw_data);
1035     return NULL;
1036 }
1037 
1038 /* Lenient date/time comparison function. The precision of the mtime
1039    in the archive is lower than the mtime stored in a .pyc: we
1040    must allow a difference of at most one second. */
1041 static int
eq_mtime(time_t t1,time_t t2)1042 eq_mtime(time_t t1, time_t t2)
1043 {
1044     time_t d = t1 - t2;
1045     if (d < 0)
1046         d = -d;
1047     /* dostime only stores even seconds, so be lenient */
1048     return d <= 1;
1049 }
1050 
1051 /* Given the contents of a .py[co] file in a buffer, unmarshal the data
1052    and return the code object. Return None if it the magic word doesn't
1053    match (we do this instead of raising an exception as we fall back
1054    to .py if available and we don't want to mask other errors).
1055    Returns a new reference. */
1056 static PyObject *
unmarshal_code(const char * pathname,PyObject * data,time_t mtime)1057 unmarshal_code(const char *pathname, PyObject *data, time_t mtime)
1058 {
1059     PyObject *code;
1060     unsigned char *buf = (unsigned char *)PyString_AsString(data);
1061     Py_ssize_t size = PyString_Size(data);
1062 
1063     if (size < 8) {
1064         PyErr_SetString(ZipImportError,
1065                         "bad pyc data");
1066         return NULL;
1067     }
1068 
1069     if (get_uint32(buf) != (unsigned int)PyImport_GetMagicNumber()) {
1070         if (Py_VerboseFlag) {
1071             PySys_WriteStderr("# %s has bad magic\n",
1072                               pathname);
1073         }
1074         Py_INCREF(Py_None);
1075         return Py_None;  /* signal caller to try alternative */
1076     }
1077 
1078     if (mtime != 0 && !eq_mtime(get_uint32(buf + 4), mtime)) {
1079         if (Py_VerboseFlag) {
1080             PySys_WriteStderr("# %s has bad mtime\n",
1081                               pathname);
1082         }
1083         Py_INCREF(Py_None);
1084         return Py_None;  /* signal caller to try alternative */
1085     }
1086 
1087     code = PyMarshal_ReadObjectFromString((char *)buf + 8, size - 8);
1088     if (code == NULL) {
1089         return NULL;
1090     }
1091     if (!PyCode_Check(code)) {
1092         Py_DECREF(code);
1093         PyErr_Format(PyExc_TypeError,
1094              "compiled module %.200s is not a code object",
1095              pathname);
1096         return NULL;
1097     }
1098     return code;
1099 }
1100 
1101 /* Replace any occurrences of "\r\n?" in the input string with "\n".
1102    This converts DOS and Mac line endings to Unix line endings.
1103    Also append a trailing "\n" to be compatible with
1104    PyParser_SimpleParseFile(). Returns a new reference. */
1105 static PyObject *
normalize_line_endings(PyObject * source)1106 normalize_line_endings(PyObject *source)
1107 {
1108     char *buf, *q, *p = PyString_AsString(source);
1109     PyObject *fixed_source;
1110 
1111     if (!p)
1112         return NULL;
1113 
1114     /* one char extra for trailing \n and one for terminating \0 */
1115     buf = (char *)PyMem_Malloc(PyString_Size(source) + 2);
1116     if (buf == NULL) {
1117         PyErr_SetString(PyExc_MemoryError,
1118                         "zipimport: no memory to allocate "
1119                         "source buffer");
1120         return NULL;
1121     }
1122     /* replace "\r\n?" by "\n" */
1123     for (q = buf; *p != '\0'; p++) {
1124         if (*p == '\r') {
1125             *q++ = '\n';
1126             if (*(p + 1) == '\n')
1127                 p++;
1128         }
1129         else
1130             *q++ = *p;
1131     }
1132     *q++ = '\n';  /* add trailing \n */
1133     *q = '\0';
1134     fixed_source = PyString_FromString(buf);
1135     PyMem_Free(buf);
1136     return fixed_source;
1137 }
1138 
1139 /* Given a string buffer containing Python source code, compile it
1140    return and return a code object as a new reference. */
1141 static PyObject *
compile_source(char * pathname,PyObject * source)1142 compile_source(char *pathname, PyObject *source)
1143 {
1144     PyObject *code, *fixed_source;
1145 
1146     fixed_source = normalize_line_endings(source);
1147     if (fixed_source == NULL)
1148         return NULL;
1149 
1150     code = Py_CompileString(PyString_AsString(fixed_source), pathname,
1151                             Py_file_input);
1152     Py_DECREF(fixed_source);
1153     return code;
1154 }
1155 
1156 /* Convert the date/time values found in the Zip archive to a value
1157    that's compatible with the time stamp stored in .pyc files. */
1158 static time_t
parse_dostime(int dostime,int dosdate)1159 parse_dostime(int dostime, int dosdate)
1160 {
1161     struct tm stm;
1162 
1163     memset((void *) &stm, '\0', sizeof(stm));
1164 
1165     stm.tm_sec   =  (dostime        & 0x1f) * 2;
1166     stm.tm_min   =  (dostime >> 5)  & 0x3f;
1167     stm.tm_hour  =  (dostime >> 11) & 0x1f;
1168     stm.tm_mday  =   dosdate        & 0x1f;
1169     stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1170     stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1171     stm.tm_isdst =   -1; /* wday/yday is ignored */
1172 
1173     return mktime(&stm);
1174 }
1175 
1176 /* Given a path to a .pyc or .pyo file in the archive, return the
1177    modification time of the matching .py file, or 0 if no source
1178    is available. */
1179 static time_t
get_mtime_of_source(ZipImporter * self,char * path)1180 get_mtime_of_source(ZipImporter *self, char *path)
1181 {
1182     PyObject *toc_entry;
1183     time_t mtime = 0;
1184     Py_ssize_t lastchar = strlen(path) - 1;
1185     char savechar = path[lastchar];
1186     path[lastchar] = '\0';  /* strip 'c' or 'o' from *.py[co] */
1187     toc_entry = PyDict_GetItemString(self->files, path);
1188     if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1189         PyTuple_Size(toc_entry) == 8) {
1190         /* fetch the time stamp of the .py file for comparison
1191            with an embedded pyc time stamp */
1192         int time, date;
1193         time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1194         date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1195         mtime = parse_dostime(time, date);
1196     }
1197     path[lastchar] = savechar;
1198     return mtime;
1199 }
1200 
1201 /* Return the code object for the module named by 'fullname' from the
1202    Zip archive as a new reference. */
1203 static PyObject *
get_code_from_data(ZipImporter * self,int ispackage,int isbytecode,time_t mtime,PyObject * toc_entry)1204 get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1205                    time_t mtime, PyObject *toc_entry)
1206 {
1207     PyObject *data, *code;
1208     char *modpath;
1209     char *archive = PyString_AsString(self->archive);
1210 
1211     if (archive == NULL)
1212         return NULL;
1213 
1214     data = get_data(archive, toc_entry);
1215     if (data == NULL)
1216         return NULL;
1217 
1218     modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1219 
1220     if (isbytecode) {
1221         code = unmarshal_code(modpath, data, mtime);
1222     }
1223     else {
1224         code = compile_source(modpath, data);
1225     }
1226     Py_DECREF(data);
1227     return code;
1228 }
1229 
1230 /* Get the code object associated with the module specified by
1231    'fullname'. */
1232 static PyObject *
get_module_code(ZipImporter * self,char * fullname,int * p_ispackage,char ** p_modpath)1233 get_module_code(ZipImporter *self, char *fullname,
1234                 int *p_ispackage, char **p_modpath)
1235 {
1236     PyObject *toc_entry;
1237     char *subname, path[MAXPATHLEN + 1];
1238     int len;
1239     struct st_zip_searchorder *zso;
1240 
1241     subname = get_subname(fullname);
1242 
1243     len = make_filename(PyString_AsString(self->prefix), subname, path);
1244     if (len < 0)
1245         return NULL;
1246 
1247     for (zso = zip_searchorder; *zso->suffix; zso++) {
1248         PyObject *code = NULL;
1249 
1250         strcpy(path + len, zso->suffix);
1251         if (Py_VerboseFlag > 1)
1252             PySys_WriteStderr("# trying %s%c%s\n",
1253                               PyString_AsString(self->archive),
1254                               SEP, path);
1255         toc_entry = PyDict_GetItemString(self->files, path);
1256         if (toc_entry != NULL) {
1257             time_t mtime = 0;
1258             int ispackage = zso->type & IS_PACKAGE;
1259             int isbytecode = zso->type & IS_BYTECODE;
1260 
1261             if (isbytecode)
1262                 mtime = get_mtime_of_source(self, path);
1263             if (p_ispackage != NULL)
1264                 *p_ispackage = ispackage;
1265             code = get_code_from_data(self, ispackage,
1266                                       isbytecode, mtime,
1267                                       toc_entry);
1268             if (code == Py_None) {
1269                 /* bad magic number or non-matching mtime
1270                    in byte code, try next */
1271                 Py_DECREF(code);
1272                 continue;
1273             }
1274             if (code != NULL && p_modpath != NULL)
1275                 *p_modpath = PyString_AsString(
1276                     PyTuple_GetItem(toc_entry, 0));
1277             return code;
1278         }
1279     }
1280     PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1281     return NULL;
1282 }
1283 
1284 
1285 /* Module init */
1286 
1287 PyDoc_STRVAR(zipimport_doc,
1288 "zipimport provides support for importing Python modules from Zip archives.\n\
1289 \n\
1290 This module exports three objects:\n\
1291 - zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1292 - ZipImportError: exception raised by zipimporter objects. It's a\n\
1293   subclass of ImportError, so it can be caught as ImportError, too.\n\
1294 - _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1295   info dicts, as used in zipimporter._files.\n\
1296 \n\
1297 It is usually not needed to use the zipimport module explicitly; it is\n\
1298 used by the builtin import mechanism for sys.path items that are paths\n\
1299 to Zip archives.");
1300 
1301 PyMODINIT_FUNC
initzipimport(void)1302 initzipimport(void)
1303 {
1304     PyObject *mod;
1305 
1306     if (PyType_Ready(&ZipImporter_Type) < 0)
1307         return;
1308 
1309     /* Correct directory separator */
1310     zip_searchorder[0].suffix[0] = SEP;
1311     zip_searchorder[1].suffix[0] = SEP;
1312     zip_searchorder[2].suffix[0] = SEP;
1313     if (Py_OptimizeFlag) {
1314         /* Reverse *.pyc and *.pyo */
1315         struct st_zip_searchorder tmp;
1316         tmp = zip_searchorder[0];
1317         zip_searchorder[0] = zip_searchorder[1];
1318         zip_searchorder[1] = tmp;
1319         tmp = zip_searchorder[3];
1320         zip_searchorder[3] = zip_searchorder[4];
1321         zip_searchorder[4] = tmp;
1322     }
1323 
1324     mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1325                          NULL, PYTHON_API_VERSION);
1326     if (mod == NULL)
1327         return;
1328 
1329     ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1330                                         PyExc_ImportError, NULL);
1331     if (ZipImportError == NULL)
1332         return;
1333 
1334     Py_INCREF(ZipImportError);
1335     if (PyModule_AddObject(mod, "ZipImportError",
1336                            ZipImportError) < 0)
1337         return;
1338 
1339     Py_INCREF(&ZipImporter_Type);
1340     if (PyModule_AddObject(mod, "zipimporter",
1341                            (PyObject *)&ZipImporter_Type) < 0)
1342         return;
1343 
1344     zip_directory_cache = PyDict_New();
1345     if (zip_directory_cache == NULL)
1346         return;
1347     Py_INCREF(zip_directory_cache);
1348     if (PyModule_AddObject(mod, "_zip_directory_cache",
1349                            zip_directory_cache) < 0)
1350         return;
1351 }
1352