1 #include "Python.h"
2 #include "structmember.h"
3 #include "osdefs.h"
4 #include "marshal.h"
5 #include <time.h>
6
7
8 #define IS_SOURCE 0x0
9 #define IS_BYTECODE 0x1
10 #define IS_PACKAGE 0x2
11
12 struct st_zip_searchorder {
13 char suffix[14];
14 int type;
15 };
16
17 /* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
22 static struct st_zip_searchorder zip_searchorder[] = {
23 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
30 };
31
32 /* zipimporter object definition and support */
33
34 typedef struct _zipimporter ZipImporter;
35
36 struct _zipimporter {
37 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
41 };
42
43 static PyObject *ZipImportError;
44 static PyObject *zip_directory_cache = NULL;
45
46 // GOOGLE(nanzhang): Changed two functions below to be visible to launcher so
47 // that launcher can access the zip metadata section.
48 /* forward decls */
49 PyObject *read_directory(const char *archive);
50 PyObject *get_data(const char *archive, PyObject *toc_entry);
51 static PyObject *get_module_code(ZipImporter *self, char *fullname,
52 int *p_ispackage, char **p_modpath);
53
54
55 #define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56
57
58 /* zipimporter.__init__
59 Split the "subdirectory" from the Zip archive path, lookup a matching
60 entry in sys.path_importer_cache, fetch the file directory from there
61 if found, or else read it from the archive. */
62 static int
zipimporter_init(ZipImporter * self,PyObject * args,PyObject * kwds)63 zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
64 {
65 char *path, *p, *prefix, buf[MAXPATHLEN+2];
66 size_t len;
67
68 if (!_PyArg_NoKeywords("zipimporter()", kwds))
69 return -1;
70
71 if (!PyArg_ParseTuple(args, "s:zipimporter",
72 &path))
73 return -1;
74
75 len = strlen(path);
76 if (len == 0) {
77 PyErr_SetString(ZipImportError, "archive path is empty");
78 return -1;
79 }
80 if (len >= MAXPATHLEN) {
81 PyErr_SetString(ZipImportError,
82 "archive path too long");
83 return -1;
84 }
85 strcpy(buf, path);
86
87 #ifdef ALTSEP
88 for (p = buf; *p; p++) {
89 if (*p == ALTSEP)
90 *p = SEP;
91 }
92 #endif
93
94 path = NULL;
95 prefix = NULL;
96 for (;;) {
97 #ifndef RISCOS
98 struct stat statbuf;
99 int rv;
100
101 rv = stat(buf, &statbuf);
102 if (rv == 0) {
103 /* it exists */
104 if (S_ISREG(statbuf.st_mode))
105 /* it's a file */
106 path = buf;
107 break;
108 }
109 #else
110 if (object_exists(buf)) {
111 /* it exists */
112 if (isfile(buf))
113 /* it's a file */
114 path = buf;
115 break;
116 }
117 #endif
118 /* back up one path element */
119 p = strrchr(buf, SEP);
120 if (prefix != NULL)
121 *prefix = SEP;
122 if (p == NULL)
123 break;
124 *p = '\0';
125 prefix = p;
126 }
127 if (path != NULL) {
128 PyObject *files;
129 files = PyDict_GetItemString(zip_directory_cache, path);
130 if (files == NULL) {
131 files = read_directory(buf);
132 if (files == NULL)
133 return -1;
134 if (PyDict_SetItemString(zip_directory_cache, path,
135 files) != 0)
136 return -1;
137 }
138 else
139 Py_INCREF(files);
140 self->files = files;
141 }
142 else {
143 PyErr_SetString(ZipImportError, "not a Zip file");
144 return -1;
145 }
146
147 if (prefix == NULL)
148 prefix = "";
149 else {
150 prefix++;
151 len = strlen(prefix);
152 if (prefix[len-1] != SEP) {
153 /* add trailing SEP */
154 prefix[len] = SEP;
155 prefix[len + 1] = '\0';
156 }
157 }
158
159 self->archive = PyString_FromString(buf);
160 if (self->archive == NULL)
161 return -1;
162
163 self->prefix = PyString_FromString(prefix);
164 if (self->prefix == NULL)
165 return -1;
166
167 return 0;
168 }
169
170 /* GC support. */
171 static int
zipimporter_traverse(PyObject * obj,visitproc visit,void * arg)172 zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
173 {
174 ZipImporter *self = (ZipImporter *)obj;
175 Py_VISIT(self->files);
176 return 0;
177 }
178
179 static void
zipimporter_dealloc(ZipImporter * self)180 zipimporter_dealloc(ZipImporter *self)
181 {
182 PyObject_GC_UnTrack(self);
183 Py_XDECREF(self->archive);
184 Py_XDECREF(self->prefix);
185 Py_XDECREF(self->files);
186 Py_TYPE(self)->tp_free((PyObject *)self);
187 }
188
189 static PyObject *
zipimporter_repr(ZipImporter * self)190 zipimporter_repr(ZipImporter *self)
191 {
192 char buf[500];
193 char *archive = "???";
194 char *prefix = "";
195
196 if (self->archive != NULL && PyString_Check(self->archive))
197 archive = PyString_AsString(self->archive);
198 if (self->prefix != NULL && PyString_Check(self->prefix))
199 prefix = PyString_AsString(self->prefix);
200 if (prefix != NULL && *prefix)
201 PyOS_snprintf(buf, sizeof(buf),
202 "<zipimporter object \"%.300s%c%.150s\">",
203 archive, SEP, prefix);
204 else
205 PyOS_snprintf(buf, sizeof(buf),
206 "<zipimporter object \"%.300s\">",
207 archive);
208 return PyString_FromString(buf);
209 }
210
211 /* return fullname.split(".")[-1] */
212 static char *
get_subname(char * fullname)213 get_subname(char *fullname)
214 {
215 char *subname = strrchr(fullname, '.');
216 if (subname == NULL)
217 subname = fullname;
218 else
219 subname++;
220 return subname;
221 }
222
223 /* Given a (sub)modulename, write the potential file path in the
224 archive (without extension) to the path buffer. Return the
225 length of the resulting string. */
226 static int
make_filename(char * prefix,char * name,char * path)227 make_filename(char *prefix, char *name, char *path)
228 {
229 size_t len;
230 char *p;
231
232 len = strlen(prefix);
233
234 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
235 if (len + strlen(name) + 13 >= MAXPATHLEN) {
236 PyErr_SetString(ZipImportError, "path too long");
237 return -1;
238 }
239
240 strcpy(path, prefix);
241 strcpy(path + len, name);
242 for (p = path + len; *p; p++) {
243 if (*p == '.')
244 *p = SEP;
245 }
246 len += strlen(name);
247 assert(len < INT_MAX);
248 return (int)len;
249 }
250
251 enum zi_module_info {
252 MI_ERROR,
253 MI_NOT_FOUND,
254 MI_MODULE,
255 MI_PACKAGE
256 };
257
258 /* Return some information about a module. */
259 static enum zi_module_info
get_module_info(ZipImporter * self,char * fullname)260 get_module_info(ZipImporter *self, char *fullname)
261 {
262 char *subname, path[MAXPATHLEN + 1];
263 int len;
264 struct st_zip_searchorder *zso;
265
266 subname = get_subname(fullname);
267
268 len = make_filename(PyString_AsString(self->prefix), subname, path);
269 if (len < 0)
270 return MI_ERROR;
271
272 for (zso = zip_searchorder; *zso->suffix; zso++) {
273 strcpy(path + len, zso->suffix);
274 if (PyDict_GetItemString(self->files, path) != NULL) {
275 if (zso->type & IS_PACKAGE)
276 return MI_PACKAGE;
277 else
278 return MI_MODULE;
279 }
280 }
281 return MI_NOT_FOUND;
282 }
283
284 /* Check whether we can satisfy the import of the module named by
285 'fullname'. Return self if we can, None if we can't. */
286 static PyObject *
zipimporter_find_module(PyObject * obj,PyObject * args)287 zipimporter_find_module(PyObject *obj, PyObject *args)
288 {
289 ZipImporter *self = (ZipImporter *)obj;
290 PyObject *path = NULL;
291 char *fullname;
292 enum zi_module_info mi;
293
294 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
295 &fullname, &path))
296 return NULL;
297
298 mi = get_module_info(self, fullname);
299 if (mi == MI_ERROR)
300 return NULL;
301 if (mi == MI_NOT_FOUND) {
302 Py_INCREF(Py_None);
303 return Py_None;
304 }
305 Py_INCREF(self);
306 return (PyObject *)self;
307 }
308
309 /* Load and return the module named by 'fullname'. */
310 static PyObject *
zipimporter_load_module(PyObject * obj,PyObject * args)311 zipimporter_load_module(PyObject *obj, PyObject *args)
312 {
313 ZipImporter *self = (ZipImporter *)obj;
314 PyObject *code, *mod, *dict;
315 char *fullname, *modpath;
316 int ispackage;
317
318 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
319 &fullname))
320 return NULL;
321
322 code = get_module_code(self, fullname, &ispackage, &modpath);
323 if (code == NULL)
324 return NULL;
325
326 mod = PyImport_AddModule(fullname);
327 if (mod == NULL) {
328 Py_DECREF(code);
329 return NULL;
330 }
331 dict = PyModule_GetDict(mod);
332
333 /* mod.__loader__ = self */
334 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
335 goto error;
336
337 if (ispackage) {
338 /* add __path__ to the module *before* the code gets
339 executed */
340 PyObject *pkgpath, *fullpath;
341 char *prefix = PyString_AsString(self->prefix);
342 char *subname = get_subname(fullname);
343 int err;
344
345 fullpath = PyString_FromFormat("%s%c%s%s",
346 PyString_AsString(self->archive),
347 SEP,
348 *prefix ? prefix : "",
349 subname);
350 if (fullpath == NULL)
351 goto error;
352
353 pkgpath = Py_BuildValue("[O]", fullpath);
354 Py_DECREF(fullpath);
355 if (pkgpath == NULL)
356 goto error;
357 err = PyDict_SetItemString(dict, "__path__", pkgpath);
358 Py_DECREF(pkgpath);
359 if (err != 0)
360 goto error;
361 }
362 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
363 Py_DECREF(code);
364 if (Py_VerboseFlag)
365 PySys_WriteStderr("import %s # loaded from Zip %s\n",
366 fullname, modpath);
367 return mod;
368 error:
369 Py_DECREF(code);
370 Py_DECREF(mod);
371 return NULL;
372 }
373
374 /* Return a string matching __file__ for the named module */
375 static PyObject *
zipimporter_get_filename(PyObject * obj,PyObject * args)376 zipimporter_get_filename(PyObject *obj, PyObject *args)
377 {
378 ZipImporter *self = (ZipImporter *)obj;
379 PyObject *code;
380 char *fullname, *modpath;
381 int ispackage;
382
383 if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
384 &fullname))
385 return NULL;
386
387 /* Deciding the filename requires working out where the code
388 would come from if the module was actually loaded */
389 code = get_module_code(self, fullname, &ispackage, &modpath);
390 if (code == NULL)
391 return NULL;
392 Py_DECREF(code); /* Only need the path info */
393
394 return PyString_FromString(modpath);
395 }
396
397 /* Return a bool signifying whether the module is a package or not. */
398 static PyObject *
zipimporter_is_package(PyObject * obj,PyObject * args)399 zipimporter_is_package(PyObject *obj, PyObject *args)
400 {
401 ZipImporter *self = (ZipImporter *)obj;
402 char *fullname;
403 enum zi_module_info mi;
404
405 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
406 &fullname))
407 return NULL;
408
409 mi = get_module_info(self, fullname);
410 if (mi == MI_ERROR)
411 return NULL;
412 if (mi == MI_NOT_FOUND) {
413 PyErr_Format(ZipImportError, "can't find module '%.200s'",
414 fullname);
415 return NULL;
416 }
417 return PyBool_FromLong(mi == MI_PACKAGE);
418 }
419
420 static PyObject *
zipimporter_get_data(PyObject * obj,PyObject * args)421 zipimporter_get_data(PyObject *obj, PyObject *args)
422 {
423 ZipImporter *self = (ZipImporter *)obj;
424 char *path;
425 #ifdef ALTSEP
426 char *p, buf[MAXPATHLEN + 1];
427 #endif
428 PyObject *toc_entry;
429 Py_ssize_t len;
430
431 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
432 return NULL;
433
434 #ifdef ALTSEP
435 if (strlen(path) >= MAXPATHLEN) {
436 PyErr_SetString(ZipImportError, "path too long");
437 return NULL;
438 }
439 strcpy(buf, path);
440 for (p = buf; *p; p++) {
441 if (*p == ALTSEP)
442 *p = SEP;
443 }
444 path = buf;
445 #endif
446 len = PyString_Size(self->archive);
447 if ((size_t)len < strlen(path) &&
448 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
449 path[len] == SEP) {
450 path = path + len + 1;
451 }
452
453 toc_entry = PyDict_GetItemString(self->files, path);
454 if (toc_entry == NULL) {
455 PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
456 return NULL;
457 }
458 return get_data(PyString_AsString(self->archive), toc_entry);
459 }
460
461 static PyObject *
zipimporter_get_code(PyObject * obj,PyObject * args)462 zipimporter_get_code(PyObject *obj, PyObject *args)
463 {
464 ZipImporter *self = (ZipImporter *)obj;
465 char *fullname;
466
467 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
468 return NULL;
469
470 return get_module_code(self, fullname, NULL, NULL);
471 }
472
473 static PyObject *
zipimporter_get_source(PyObject * obj,PyObject * args)474 zipimporter_get_source(PyObject *obj, PyObject *args)
475 {
476 ZipImporter *self = (ZipImporter *)obj;
477 PyObject *toc_entry;
478 char *fullname, *subname, path[MAXPATHLEN+1];
479 int len;
480 enum zi_module_info mi;
481
482 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
483 return NULL;
484
485 mi = get_module_info(self, fullname);
486 if (mi == MI_ERROR)
487 return NULL;
488 if (mi == MI_NOT_FOUND) {
489 PyErr_Format(ZipImportError, "can't find module '%.200s'",
490 fullname);
491 return NULL;
492 }
493 subname = get_subname(fullname);
494
495 len = make_filename(PyString_AsString(self->prefix), subname, path);
496 if (len < 0)
497 return NULL;
498
499 if (mi == MI_PACKAGE) {
500 path[len] = SEP;
501 strcpy(path + len + 1, "__init__.py");
502 }
503 else
504 strcpy(path + len, ".py");
505
506 toc_entry = PyDict_GetItemString(self->files, path);
507 if (toc_entry != NULL)
508 return get_data(PyString_AsString(self->archive), toc_entry);
509
510 /* we have the module, but no source */
511 Py_INCREF(Py_None);
512 return Py_None;
513 }
514
515 PyDoc_STRVAR(doc_find_module,
516 "find_module(fullname, path=None) -> self or None.\n\
517 \n\
518 Search for a module specified by 'fullname'. 'fullname' must be the\n\
519 fully qualified (dotted) module name. It returns the zipimporter\n\
520 instance itself if the module was found, or None if it wasn't.\n\
521 The optional 'path' argument is ignored -- it's there for compatibility\n\
522 with the importer protocol.");
523
524 PyDoc_STRVAR(doc_load_module,
525 "load_module(fullname) -> module.\n\
526 \n\
527 Load the module specified by 'fullname'. 'fullname' must be the\n\
528 fully qualified (dotted) module name. It returns the imported\n\
529 module, or raises ZipImportError if it wasn't found.");
530
531 PyDoc_STRVAR(doc_get_data,
532 "get_data(pathname) -> string with file data.\n\
533 \n\
534 Return the data associated with 'pathname'. Raise IOError if\n\
535 the file wasn't found.");
536
537 PyDoc_STRVAR(doc_is_package,
538 "is_package(fullname) -> bool.\n\
539 \n\
540 Return True if the module specified by fullname is a package.\n\
541 Raise ZipImportError if the module couldn't be found.");
542
543 PyDoc_STRVAR(doc_get_code,
544 "get_code(fullname) -> code object.\n\
545 \n\
546 Return the code object for the specified module. Raise ZipImportError\n\
547 if the module couldn't be found.");
548
549 PyDoc_STRVAR(doc_get_source,
550 "get_source(fullname) -> source string.\n\
551 \n\
552 Return the source code for the specified module. Raise ZipImportError\n\
553 if the module couldn't be found, return None if the archive does\n\
554 contain the module, but has no source for it.");
555
556
557 PyDoc_STRVAR(doc_get_filename,
558 "get_filename(fullname) -> filename string.\n\
559 \n\
560 Return the filename for the specified module.");
561
562 static PyMethodDef zipimporter_methods[] = {
563 {"find_module", zipimporter_find_module, METH_VARARGS,
564 doc_find_module},
565 {"load_module", zipimporter_load_module, METH_VARARGS,
566 doc_load_module},
567 {"get_data", zipimporter_get_data, METH_VARARGS,
568 doc_get_data},
569 {"get_code", zipimporter_get_code, METH_VARARGS,
570 doc_get_code},
571 {"get_source", zipimporter_get_source, METH_VARARGS,
572 doc_get_source},
573 {"get_filename", zipimporter_get_filename, METH_VARARGS,
574 doc_get_filename},
575 {"is_package", zipimporter_is_package, METH_VARARGS,
576 doc_is_package},
577 {NULL, NULL} /* sentinel */
578 };
579
580 static PyMemberDef zipimporter_members[] = {
581 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
582 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
583 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
584 {NULL}
585 };
586
587 PyDoc_STRVAR(zipimporter_doc,
588 "zipimporter(archivepath) -> zipimporter object\n\
589 \n\
590 Create a new zipimporter instance. 'archivepath' must be a path to\n\
591 a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
592 '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
593 valid directory inside the archive.\n\
594 \n\
595 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
596 archive.\n\
597 \n\
598 The 'archive' attribute of zipimporter objects contains the name of the\n\
599 zipfile targeted.");
600
601 #define DEFERRED_ADDRESS(ADDR) 0
602
603 static PyTypeObject ZipImporter_Type = {
604 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
605 "zipimport.zipimporter",
606 sizeof(ZipImporter),
607 0, /* tp_itemsize */
608 (destructor)zipimporter_dealloc, /* tp_dealloc */
609 0, /* tp_print */
610 0, /* tp_getattr */
611 0, /* tp_setattr */
612 0, /* tp_compare */
613 (reprfunc)zipimporter_repr, /* tp_repr */
614 0, /* tp_as_number */
615 0, /* tp_as_sequence */
616 0, /* tp_as_mapping */
617 0, /* tp_hash */
618 0, /* tp_call */
619 0, /* tp_str */
620 PyObject_GenericGetAttr, /* tp_getattro */
621 0, /* tp_setattro */
622 0, /* tp_as_buffer */
623 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
624 Py_TPFLAGS_HAVE_GC, /* tp_flags */
625 zipimporter_doc, /* tp_doc */
626 zipimporter_traverse, /* tp_traverse */
627 0, /* tp_clear */
628 0, /* tp_richcompare */
629 0, /* tp_weaklistoffset */
630 0, /* tp_iter */
631 0, /* tp_iternext */
632 zipimporter_methods, /* tp_methods */
633 zipimporter_members, /* tp_members */
634 0, /* tp_getset */
635 0, /* tp_base */
636 0, /* tp_dict */
637 0, /* tp_descr_get */
638 0, /* tp_descr_set */
639 0, /* tp_dictoffset */
640 (initproc)zipimporter_init, /* tp_init */
641 PyType_GenericAlloc, /* tp_alloc */
642 PyType_GenericNew, /* tp_new */
643 PyObject_GC_Del, /* tp_free */
644 };
645
646
647 /* implementation */
648
649 /* Given a buffer, return the unsigned int that is represented by the first
650 4 bytes, encoded as little endian. This partially reimplements
651 marshal.c:r_long() */
652 static unsigned int
get_uint32(const unsigned char * buf)653 get_uint32(const unsigned char *buf)
654 {
655 unsigned int x;
656 x = buf[0];
657 x |= (unsigned int)buf[1] << 8;
658 x |= (unsigned int)buf[2] << 16;
659 x |= (unsigned int)buf[3] << 24;
660 return x;
661 }
662
663 /* Given a buffer, return the unsigned int that is represented by the first
664 2 bytes, encoded as little endian. This partially reimplements
665 marshal.c:r_short() */
666 static unsigned short
get_uint16(const unsigned char * buf)667 get_uint16(const unsigned char *buf)
668 {
669 unsigned short x;
670 x = buf[0];
671 x |= (unsigned short)buf[1] << 8;
672 return x;
673 }
674
675 static void
set_file_error(const char * archive,int eof)676 set_file_error(const char *archive, int eof)
677 {
678 if (eof) {
679 PyErr_SetString(PyExc_EOFError, "EOF read where not expected");
680 }
681 else {
682 PyErr_SetFromErrnoWithFilename(PyExc_IOError, archive);
683 }
684 }
685
686 /*
687 read_directory(archive) -> files dict (new reference)
688
689 Given a path to a Zip archive, build a dict, mapping file names
690 (local to the archive, using SEP as a separator) to toc entries.
691
692 A toc_entry is a tuple:
693
694 (__file__, # value to use for __file__, available for all files
695 compress, # compression kind; 0 for uncompressed
696 data_size, # size of compressed data on disk
697 file_size, # size of decompressed data
698 file_offset, # offset of file header from start of archive
699 time, # mod time of file (in dos format)
700 date, # mod data of file (in dos format)
701 crc, # crc checksum of the data
702 )
703
704 Directories can be recognized by the trailing SEP in the name,
705 data_size and file_offset are 0.
706 */
707 PyObject *
read_directory(const char * archive)708 read_directory(const char *archive)
709 {
710 PyObject *files = NULL;
711 FILE *fp;
712 unsigned short compress, time, date, name_size;
713 unsigned int crc, data_size, file_size, header_size, header_offset;
714 unsigned long file_offset, header_position;
715 unsigned long arc_offset; /* Absolute offset to start of the zip-archive. */
716 unsigned int count, i;
717 unsigned char buffer[46];
718 size_t length;
719 char path[MAXPATHLEN + 5];
720 char name[MAXPATHLEN + 5];
721 const char *errmsg = NULL;
722
723 if (strlen(archive) > MAXPATHLEN) {
724 PyErr_SetString(PyExc_OverflowError,
725 "Zip path name is too long");
726 return NULL;
727 }
728 strcpy(path, archive);
729
730 fp = fopen(archive, "rb");
731 if (fp == NULL) {
732 PyErr_Format(ZipImportError, "can't open Zip file: "
733 "'%.200s'", archive);
734 return NULL;
735 }
736
737 if (fseek(fp, -22, SEEK_END) == -1) {
738 goto file_error;
739 }
740 header_position = (unsigned long)ftell(fp);
741 if (header_position == (unsigned long)-1) {
742 goto file_error;
743 }
744 assert(header_position <= (unsigned long)LONG_MAX);
745 if (fread(buffer, 1, 22, fp) != 22) {
746 goto file_error;
747 }
748 if (get_uint32(buffer) != 0x06054B50u) {
749 /* Bad: End of Central Dir signature */
750 errmsg = "not a Zip file";
751 goto invalid_header;
752 }
753
754 header_size = get_uint32(buffer + 12);
755 header_offset = get_uint32(buffer + 16);
756 if (header_position < header_size) {
757 errmsg = "bad central directory size";
758 goto invalid_header;
759 }
760 if (header_position < header_offset) {
761 errmsg = "bad central directory offset";
762 goto invalid_header;
763 }
764 if (header_position - header_size < header_offset) {
765 errmsg = "bad central directory size or offset";
766 goto invalid_header;
767 }
768 header_position -= header_size;
769 arc_offset = header_position - header_offset;
770
771 files = PyDict_New();
772 if (files == NULL) {
773 goto error;
774 }
775
776 length = (long)strlen(path);
777 path[length] = SEP;
778
779 /* Start of Central Directory */
780 count = 0;
781 if (fseek(fp, (long)header_position, 0) == -1) {
782 goto file_error;
783 }
784 for (;;) {
785 PyObject *t;
786 size_t n;
787 int err;
788
789 n = fread(buffer, 1, 46, fp);
790 if (n < 4) {
791 goto eof_error;
792 }
793 /* Start of file header */
794 if (get_uint32(buffer) != 0x02014B50u) {
795 break; /* Bad: Central Dir File Header */
796 }
797 if (n != 46) {
798 goto eof_error;
799 }
800 compress = get_uint16(buffer + 10);
801 time = get_uint16(buffer + 12);
802 date = get_uint16(buffer + 14);
803 crc = get_uint32(buffer + 16);
804 data_size = get_uint32(buffer + 20);
805 file_size = get_uint32(buffer + 24);
806 name_size = get_uint16(buffer + 28);
807 header_size = (unsigned int)name_size +
808 get_uint16(buffer + 30) /* extra field */ +
809 get_uint16(buffer + 32) /* comment */;
810
811 file_offset = get_uint32(buffer + 42);
812 if (file_offset > header_offset) {
813 errmsg = "bad local header offset";
814 goto invalid_header;
815 }
816 file_offset += arc_offset;
817
818 if (name_size > MAXPATHLEN) {
819 name_size = MAXPATHLEN;
820 }
821 if (fread(name, 1, name_size, fp) != name_size) {
822 goto file_error;
823 }
824 name[name_size] = '\0'; /* Add terminating null byte */
825 if (SEP != '/') {
826 for (i = 0; i < name_size; i++) {
827 if (name[i] == '/') {
828 name[i] = SEP;
829 }
830 }
831 }
832 /* Skip the rest of the header.
833 * On Windows, calling fseek to skip over the fields we don't use is
834 * slower than reading the data because fseek flushes stdio's
835 * internal buffers. See issue #8745. */
836 assert(header_size <= 3*0xFFFFu);
837 for (i = name_size; i < header_size; i++) {
838 if (getc(fp) == EOF) {
839 goto file_error;
840 }
841 }
842
843 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
844
845 t = Py_BuildValue("sHIIkHHI", path, compress, data_size,
846 file_size, file_offset, time, date, crc);
847 if (t == NULL) {
848 goto error;
849 }
850 err = PyDict_SetItemString(files, name, t);
851 Py_DECREF(t);
852 if (err != 0) {
853 goto error;
854 }
855 count++;
856 }
857 fclose(fp);
858 if (Py_VerboseFlag) {
859 PySys_WriteStderr("# zipimport: found %u names in %.200s\n",
860 count, archive);
861 }
862 return files;
863
864 eof_error:
865 set_file_error(archive, !ferror(fp));
866 goto error;
867
868 file_error:
869 PyErr_Format(ZipImportError, "can't read Zip file: %.200s", archive);
870 goto error;
871
872 invalid_header:
873 assert(errmsg != NULL);
874 PyErr_Format(ZipImportError, "%s: %.200s", errmsg, archive);
875 goto error;
876
877 error:
878 fclose(fp);
879 Py_XDECREF(files);
880 return NULL;
881 }
882
883 /* Return the zlib.decompress function object, or NULL if zlib couldn't
884 be imported. The function is cached when found, so subsequent calls
885 don't import zlib again. */
886 static PyObject *
get_decompress_func(void)887 get_decompress_func(void)
888 {
889 static int importing_zlib = 0;
890 PyObject *zlib;
891 PyObject *decompress;
892
893 if (importing_zlib != 0)
894 /* Someone has a zlib.py[co] in their Zip file;
895 let's avoid a stack overflow. */
896 return NULL;
897 importing_zlib = 1;
898 zlib = PyImport_ImportModuleNoBlock("zlib");
899 importing_zlib = 0;
900 if (zlib != NULL) {
901 decompress = PyObject_GetAttrString(zlib,
902 "decompress");
903 Py_DECREF(zlib);
904 }
905 else {
906 PyErr_Clear();
907 decompress = NULL;
908 }
909 if (Py_VerboseFlag)
910 PySys_WriteStderr("# zipimport: zlib %s\n",
911 zlib != NULL ? "available": "UNAVAILABLE");
912 return decompress;
913 }
914
915 /* Given a path to a Zip file and a toc_entry, return the (uncompressed)
916 data as a new reference. */
917 PyObject *
get_data(const char * archive,PyObject * toc_entry)918 get_data(const char *archive, PyObject *toc_entry)
919 {
920 PyObject *raw_data = NULL, *data, *decompress;
921 char *buf;
922 FILE *fp;
923 const char *datapath;
924 unsigned short compress, time, date;
925 unsigned int crc;
926 Py_ssize_t data_size, file_size;
927 long file_offset, header_size;
928 unsigned char buffer[30];
929 const char *errmsg = NULL;
930
931 if (!PyArg_ParseTuple(toc_entry, "sHnnlHHI", &datapath, &compress,
932 &data_size, &file_size, &file_offset, &time,
933 &date, &crc)) {
934 return NULL;
935 }
936 if (data_size < 0) {
937 PyErr_Format(ZipImportError, "negative data size");
938 return NULL;
939 }
940
941 fp = fopen(archive, "rb");
942 if (!fp) {
943 PyErr_Format(PyExc_IOError,
944 "zipimport: can not open file %s", archive);
945 return NULL;
946 }
947
948 /* Check to make sure the local file header is correct */
949 if (fseek(fp, file_offset, 0) == -1) {
950 goto file_error;
951 }
952 if (fread(buffer, 1, 30, fp) != 30) {
953 goto eof_error;
954 }
955 if (get_uint32(buffer) != 0x04034B50u) {
956 /* Bad: Local File Header */
957 errmsg = "bad local file header";
958 goto invalid_header;
959 }
960
961 header_size = (unsigned int)30 +
962 get_uint16(buffer + 26) /* file name */ +
963 get_uint16(buffer + 28) /* extra field */;
964 if (file_offset > LONG_MAX - header_size) {
965 errmsg = "bad local file header size";
966 goto invalid_header;
967 }
968 file_offset += header_size; /* Start of file data */
969
970 if (data_size > LONG_MAX - 1) {
971 fclose(fp);
972 PyErr_NoMemory();
973 return NULL;
974 }
975 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
976 data_size : data_size + 1);
977
978 if (raw_data == NULL) {
979 goto error;
980 }
981 buf = PyString_AsString(raw_data);
982
983 if (fseek(fp, file_offset, 0) == -1) {
984 goto file_error;
985 }
986 if (fread(buf, 1, data_size, fp) != (size_t)data_size) {
987 PyErr_SetString(PyExc_IOError,
988 "zipimport: can't read data");
989 goto error;
990 }
991
992 fclose(fp);
993 fp = NULL;
994
995 if (compress != 0) {
996 buf[data_size] = 'Z'; /* saw this in zipfile.py */
997 data_size++;
998 }
999 buf[data_size] = '\0';
1000
1001 if (compress == 0) /* data is not compressed */
1002 return raw_data;
1003
1004 /* Decompress with zlib */
1005 decompress = get_decompress_func();
1006 if (decompress == NULL) {
1007 PyErr_SetString(ZipImportError,
1008 "can't decompress data; "
1009 "zlib not available");
1010 goto error;
1011 }
1012 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
1013 Py_DECREF(decompress);
1014 Py_DECREF(raw_data);
1015 return data;
1016
1017 eof_error:
1018 set_file_error(archive, !ferror(fp));
1019 goto error;
1020
1021 file_error:
1022 PyErr_Format(ZipImportError, "can't read Zip file: %.200s", archive);
1023 goto error;
1024
1025 invalid_header:
1026 assert(errmsg != NULL);
1027 PyErr_Format(ZipImportError, "%s: %.200s", errmsg, archive);
1028 goto error;
1029
1030 error:
1031 if (fp != NULL) {
1032 fclose(fp);
1033 }
1034 Py_XDECREF(raw_data);
1035 return NULL;
1036 }
1037
1038 /* Lenient date/time comparison function. The precision of the mtime
1039 in the archive is lower than the mtime stored in a .pyc: we
1040 must allow a difference of at most one second. */
1041 static int
eq_mtime(time_t t1,time_t t2)1042 eq_mtime(time_t t1, time_t t2)
1043 {
1044 time_t d = t1 - t2;
1045 if (d < 0)
1046 d = -d;
1047 /* dostime only stores even seconds, so be lenient */
1048 return d <= 1;
1049 }
1050
1051 /* Given the contents of a .py[co] file in a buffer, unmarshal the data
1052 and return the code object. Return None if it the magic word doesn't
1053 match (we do this instead of raising an exception as we fall back
1054 to .py if available and we don't want to mask other errors).
1055 Returns a new reference. */
1056 static PyObject *
unmarshal_code(const char * pathname,PyObject * data,time_t mtime)1057 unmarshal_code(const char *pathname, PyObject *data, time_t mtime)
1058 {
1059 PyObject *code;
1060 unsigned char *buf = (unsigned char *)PyString_AsString(data);
1061 Py_ssize_t size = PyString_Size(data);
1062
1063 if (size < 8) {
1064 PyErr_SetString(ZipImportError,
1065 "bad pyc data");
1066 return NULL;
1067 }
1068
1069 if (get_uint32(buf) != (unsigned int)PyImport_GetMagicNumber()) {
1070 if (Py_VerboseFlag) {
1071 PySys_WriteStderr("# %s has bad magic\n",
1072 pathname);
1073 }
1074 Py_INCREF(Py_None);
1075 return Py_None; /* signal caller to try alternative */
1076 }
1077
1078 if (mtime != 0 && !eq_mtime(get_uint32(buf + 4), mtime)) {
1079 if (Py_VerboseFlag) {
1080 PySys_WriteStderr("# %s has bad mtime\n",
1081 pathname);
1082 }
1083 Py_INCREF(Py_None);
1084 return Py_None; /* signal caller to try alternative */
1085 }
1086
1087 code = PyMarshal_ReadObjectFromString((char *)buf + 8, size - 8);
1088 if (code == NULL) {
1089 return NULL;
1090 }
1091 if (!PyCode_Check(code)) {
1092 Py_DECREF(code);
1093 PyErr_Format(PyExc_TypeError,
1094 "compiled module %.200s is not a code object",
1095 pathname);
1096 return NULL;
1097 }
1098 return code;
1099 }
1100
1101 /* Replace any occurrences of "\r\n?" in the input string with "\n".
1102 This converts DOS and Mac line endings to Unix line endings.
1103 Also append a trailing "\n" to be compatible with
1104 PyParser_SimpleParseFile(). Returns a new reference. */
1105 static PyObject *
normalize_line_endings(PyObject * source)1106 normalize_line_endings(PyObject *source)
1107 {
1108 char *buf, *q, *p = PyString_AsString(source);
1109 PyObject *fixed_source;
1110
1111 if (!p)
1112 return NULL;
1113
1114 /* one char extra for trailing \n and one for terminating \0 */
1115 buf = (char *)PyMem_Malloc(PyString_Size(source) + 2);
1116 if (buf == NULL) {
1117 PyErr_SetString(PyExc_MemoryError,
1118 "zipimport: no memory to allocate "
1119 "source buffer");
1120 return NULL;
1121 }
1122 /* replace "\r\n?" by "\n" */
1123 for (q = buf; *p != '\0'; p++) {
1124 if (*p == '\r') {
1125 *q++ = '\n';
1126 if (*(p + 1) == '\n')
1127 p++;
1128 }
1129 else
1130 *q++ = *p;
1131 }
1132 *q++ = '\n'; /* add trailing \n */
1133 *q = '\0';
1134 fixed_source = PyString_FromString(buf);
1135 PyMem_Free(buf);
1136 return fixed_source;
1137 }
1138
1139 /* Given a string buffer containing Python source code, compile it
1140 return and return a code object as a new reference. */
1141 static PyObject *
compile_source(char * pathname,PyObject * source)1142 compile_source(char *pathname, PyObject *source)
1143 {
1144 PyObject *code, *fixed_source;
1145
1146 fixed_source = normalize_line_endings(source);
1147 if (fixed_source == NULL)
1148 return NULL;
1149
1150 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
1151 Py_file_input);
1152 Py_DECREF(fixed_source);
1153 return code;
1154 }
1155
1156 /* Convert the date/time values found in the Zip archive to a value
1157 that's compatible with the time stamp stored in .pyc files. */
1158 static time_t
parse_dostime(int dostime,int dosdate)1159 parse_dostime(int dostime, int dosdate)
1160 {
1161 struct tm stm;
1162
1163 memset((void *) &stm, '\0', sizeof(stm));
1164
1165 stm.tm_sec = (dostime & 0x1f) * 2;
1166 stm.tm_min = (dostime >> 5) & 0x3f;
1167 stm.tm_hour = (dostime >> 11) & 0x1f;
1168 stm.tm_mday = dosdate & 0x1f;
1169 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1170 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1171 stm.tm_isdst = -1; /* wday/yday is ignored */
1172
1173 return mktime(&stm);
1174 }
1175
1176 /* Given a path to a .pyc or .pyo file in the archive, return the
1177 modification time of the matching .py file, or 0 if no source
1178 is available. */
1179 static time_t
get_mtime_of_source(ZipImporter * self,char * path)1180 get_mtime_of_source(ZipImporter *self, char *path)
1181 {
1182 PyObject *toc_entry;
1183 time_t mtime = 0;
1184 Py_ssize_t lastchar = strlen(path) - 1;
1185 char savechar = path[lastchar];
1186 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1187 toc_entry = PyDict_GetItemString(self->files, path);
1188 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1189 PyTuple_Size(toc_entry) == 8) {
1190 /* fetch the time stamp of the .py file for comparison
1191 with an embedded pyc time stamp */
1192 int time, date;
1193 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1194 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1195 mtime = parse_dostime(time, date);
1196 }
1197 path[lastchar] = savechar;
1198 return mtime;
1199 }
1200
1201 /* Return the code object for the module named by 'fullname' from the
1202 Zip archive as a new reference. */
1203 static PyObject *
get_code_from_data(ZipImporter * self,int ispackage,int isbytecode,time_t mtime,PyObject * toc_entry)1204 get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1205 time_t mtime, PyObject *toc_entry)
1206 {
1207 PyObject *data, *code;
1208 char *modpath;
1209 char *archive = PyString_AsString(self->archive);
1210
1211 if (archive == NULL)
1212 return NULL;
1213
1214 data = get_data(archive, toc_entry);
1215 if (data == NULL)
1216 return NULL;
1217
1218 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1219
1220 if (isbytecode) {
1221 code = unmarshal_code(modpath, data, mtime);
1222 }
1223 else {
1224 code = compile_source(modpath, data);
1225 }
1226 Py_DECREF(data);
1227 return code;
1228 }
1229
1230 /* Get the code object associated with the module specified by
1231 'fullname'. */
1232 static PyObject *
get_module_code(ZipImporter * self,char * fullname,int * p_ispackage,char ** p_modpath)1233 get_module_code(ZipImporter *self, char *fullname,
1234 int *p_ispackage, char **p_modpath)
1235 {
1236 PyObject *toc_entry;
1237 char *subname, path[MAXPATHLEN + 1];
1238 int len;
1239 struct st_zip_searchorder *zso;
1240
1241 subname = get_subname(fullname);
1242
1243 len = make_filename(PyString_AsString(self->prefix), subname, path);
1244 if (len < 0)
1245 return NULL;
1246
1247 for (zso = zip_searchorder; *zso->suffix; zso++) {
1248 PyObject *code = NULL;
1249
1250 strcpy(path + len, zso->suffix);
1251 if (Py_VerboseFlag > 1)
1252 PySys_WriteStderr("# trying %s%c%s\n",
1253 PyString_AsString(self->archive),
1254 SEP, path);
1255 toc_entry = PyDict_GetItemString(self->files, path);
1256 if (toc_entry != NULL) {
1257 time_t mtime = 0;
1258 int ispackage = zso->type & IS_PACKAGE;
1259 int isbytecode = zso->type & IS_BYTECODE;
1260
1261 if (isbytecode)
1262 mtime = get_mtime_of_source(self, path);
1263 if (p_ispackage != NULL)
1264 *p_ispackage = ispackage;
1265 code = get_code_from_data(self, ispackage,
1266 isbytecode, mtime,
1267 toc_entry);
1268 if (code == Py_None) {
1269 /* bad magic number or non-matching mtime
1270 in byte code, try next */
1271 Py_DECREF(code);
1272 continue;
1273 }
1274 if (code != NULL && p_modpath != NULL)
1275 *p_modpath = PyString_AsString(
1276 PyTuple_GetItem(toc_entry, 0));
1277 return code;
1278 }
1279 }
1280 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1281 return NULL;
1282 }
1283
1284
1285 /* Module init */
1286
1287 PyDoc_STRVAR(zipimport_doc,
1288 "zipimport provides support for importing Python modules from Zip archives.\n\
1289 \n\
1290 This module exports three objects:\n\
1291 - zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1292 - ZipImportError: exception raised by zipimporter objects. It's a\n\
1293 subclass of ImportError, so it can be caught as ImportError, too.\n\
1294 - _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1295 info dicts, as used in zipimporter._files.\n\
1296 \n\
1297 It is usually not needed to use the zipimport module explicitly; it is\n\
1298 used by the builtin import mechanism for sys.path items that are paths\n\
1299 to Zip archives.");
1300
1301 PyMODINIT_FUNC
initzipimport(void)1302 initzipimport(void)
1303 {
1304 PyObject *mod;
1305
1306 if (PyType_Ready(&ZipImporter_Type) < 0)
1307 return;
1308
1309 /* Correct directory separator */
1310 zip_searchorder[0].suffix[0] = SEP;
1311 zip_searchorder[1].suffix[0] = SEP;
1312 zip_searchorder[2].suffix[0] = SEP;
1313 if (Py_OptimizeFlag) {
1314 /* Reverse *.pyc and *.pyo */
1315 struct st_zip_searchorder tmp;
1316 tmp = zip_searchorder[0];
1317 zip_searchorder[0] = zip_searchorder[1];
1318 zip_searchorder[1] = tmp;
1319 tmp = zip_searchorder[3];
1320 zip_searchorder[3] = zip_searchorder[4];
1321 zip_searchorder[4] = tmp;
1322 }
1323
1324 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1325 NULL, PYTHON_API_VERSION);
1326 if (mod == NULL)
1327 return;
1328
1329 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1330 PyExc_ImportError, NULL);
1331 if (ZipImportError == NULL)
1332 return;
1333
1334 Py_INCREF(ZipImportError);
1335 if (PyModule_AddObject(mod, "ZipImportError",
1336 ZipImportError) < 0)
1337 return;
1338
1339 Py_INCREF(&ZipImporter_Type);
1340 if (PyModule_AddObject(mod, "zipimporter",
1341 (PyObject *)&ZipImporter_Type) < 0)
1342 return;
1343
1344 zip_directory_cache = PyDict_New();
1345 if (zip_directory_cache == NULL)
1346 return;
1347 Py_INCREF(zip_directory_cache);
1348 if (PyModule_AddObject(mod, "_zip_directory_cache",
1349 zip_directory_cache) < 0)
1350 return;
1351 }
1352