1 #include "Python.h"
2 #include "structmember.h"
3 #include "osdefs.h"
4 #include "marshal.h"
5 #include <time.h>
6
7
8 #define IS_SOURCE 0x0
9 #define IS_BYTECODE 0x1
10 #define IS_PACKAGE 0x2
11
12 struct st_zip_searchorder {
13 char suffix[14];
14 int type;
15 };
16
17 /* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
22 static struct st_zip_searchorder zip_searchorder[] = {
23 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
30 };
31
32 /* zipimporter object definition and support */
33
34 typedef struct _zipimporter ZipImporter;
35
36 struct _zipimporter {
37 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
41 };
42
43 static PyObject *ZipImportError;
44 static PyObject *zip_directory_cache = NULL;
45
46 /* forward decls */
47 static PyObject *read_directory(const char *archive);
48 static PyObject *get_data(const char *archive, PyObject *toc_entry);
49 static PyObject *get_module_code(ZipImporter *self, char *fullname,
50 int *p_ispackage, char **p_modpath);
51
52
53 #define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54
55
56 /* zipimporter.__init__
57 Split the "subdirectory" from the Zip archive path, lookup a matching
58 entry in sys.path_importer_cache, fetch the file directory from there
59 if found, or else read it from the archive. */
60 static int
zipimporter_init(ZipImporter * self,PyObject * args,PyObject * kwds)61 zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62 {
63 char *path, *p, *prefix, buf[MAXPATHLEN+2];
64 size_t len;
65
66 if (!_PyArg_NoKeywords("zipimporter()", kwds))
67 return -1;
68
69 if (!PyArg_ParseTuple(args, "s:zipimporter",
70 &path))
71 return -1;
72
73 len = strlen(path);
74 if (len == 0) {
75 PyErr_SetString(ZipImportError, "archive path is empty");
76 return -1;
77 }
78 if (len >= MAXPATHLEN) {
79 PyErr_SetString(ZipImportError,
80 "archive path too long");
81 return -1;
82 }
83 strcpy(buf, path);
84
85 #ifdef ALTSEP
86 for (p = buf; *p; p++) {
87 if (*p == ALTSEP)
88 *p = SEP;
89 }
90 #endif
91
92 path = NULL;
93 prefix = NULL;
94 for (;;) {
95 #ifndef RISCOS
96 struct stat statbuf;
97 int rv;
98
99 rv = stat(buf, &statbuf);
100 if (rv == 0) {
101 /* it exists */
102 if (S_ISREG(statbuf.st_mode))
103 /* it's a file */
104 path = buf;
105 break;
106 }
107 #else
108 if (object_exists(buf)) {
109 /* it exists */
110 if (isfile(buf))
111 /* it's a file */
112 path = buf;
113 break;
114 }
115 #endif
116 /* back up one path element */
117 p = strrchr(buf, SEP);
118 if (prefix != NULL)
119 *prefix = SEP;
120 if (p == NULL)
121 break;
122 *p = '\0';
123 prefix = p;
124 }
125 if (path != NULL) {
126 PyObject *files;
127 files = PyDict_GetItemString(zip_directory_cache, path);
128 if (files == NULL) {
129 files = read_directory(buf);
130 if (files == NULL)
131 return -1;
132 if (PyDict_SetItemString(zip_directory_cache, path,
133 files) != 0)
134 return -1;
135 }
136 else
137 Py_INCREF(files);
138 self->files = files;
139 }
140 else {
141 PyErr_SetString(ZipImportError, "not a Zip file");
142 return -1;
143 }
144
145 if (prefix == NULL)
146 prefix = "";
147 else {
148 prefix++;
149 len = strlen(prefix);
150 if (prefix[len-1] != SEP) {
151 /* add trailing SEP */
152 prefix[len] = SEP;
153 prefix[len + 1] = '\0';
154 }
155 }
156
157 self->archive = PyString_FromString(buf);
158 if (self->archive == NULL)
159 return -1;
160
161 self->prefix = PyString_FromString(prefix);
162 if (self->prefix == NULL)
163 return -1;
164
165 return 0;
166 }
167
168 /* GC support. */
169 static int
zipimporter_traverse(PyObject * obj,visitproc visit,void * arg)170 zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
171 {
172 ZipImporter *self = (ZipImporter *)obj;
173 Py_VISIT(self->files);
174 return 0;
175 }
176
177 static void
zipimporter_dealloc(ZipImporter * self)178 zipimporter_dealloc(ZipImporter *self)
179 {
180 PyObject_GC_UnTrack(self);
181 Py_XDECREF(self->archive);
182 Py_XDECREF(self->prefix);
183 Py_XDECREF(self->files);
184 Py_TYPE(self)->tp_free((PyObject *)self);
185 }
186
187 static PyObject *
zipimporter_repr(ZipImporter * self)188 zipimporter_repr(ZipImporter *self)
189 {
190 char buf[500];
191 char *archive = "???";
192 char *prefix = "";
193
194 if (self->archive != NULL && PyString_Check(self->archive))
195 archive = PyString_AsString(self->archive);
196 if (self->prefix != NULL && PyString_Check(self->prefix))
197 prefix = PyString_AsString(self->prefix);
198 if (prefix != NULL && *prefix)
199 PyOS_snprintf(buf, sizeof(buf),
200 "<zipimporter object \"%.300s%c%.150s\">",
201 archive, SEP, prefix);
202 else
203 PyOS_snprintf(buf, sizeof(buf),
204 "<zipimporter object \"%.300s\">",
205 archive);
206 return PyString_FromString(buf);
207 }
208
209 /* return fullname.split(".")[-1] */
210 static char *
get_subname(char * fullname)211 get_subname(char *fullname)
212 {
213 char *subname = strrchr(fullname, '.');
214 if (subname == NULL)
215 subname = fullname;
216 else
217 subname++;
218 return subname;
219 }
220
221 /* Given a (sub)modulename, write the potential file path in the
222 archive (without extension) to the path buffer. Return the
223 length of the resulting string. */
224 static int
make_filename(char * prefix,char * name,char * path)225 make_filename(char *prefix, char *name, char *path)
226 {
227 size_t len;
228 char *p;
229
230 len = strlen(prefix);
231
232 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
233 if (len + strlen(name) + 13 >= MAXPATHLEN) {
234 PyErr_SetString(ZipImportError, "path too long");
235 return -1;
236 }
237
238 strcpy(path, prefix);
239 strcpy(path + len, name);
240 for (p = path + len; *p; p++) {
241 if (*p == '.')
242 *p = SEP;
243 }
244 len += strlen(name);
245 assert(len < INT_MAX);
246 return (int)len;
247 }
248
249 enum zi_module_info {
250 MI_ERROR,
251 MI_NOT_FOUND,
252 MI_MODULE,
253 MI_PACKAGE
254 };
255
256 /* Return some information about a module. */
257 static enum zi_module_info
get_module_info(ZipImporter * self,char * fullname)258 get_module_info(ZipImporter *self, char *fullname)
259 {
260 char *subname, path[MAXPATHLEN + 1];
261 int len;
262 struct st_zip_searchorder *zso;
263
264 subname = get_subname(fullname);
265
266 len = make_filename(PyString_AsString(self->prefix), subname, path);
267 if (len < 0)
268 return MI_ERROR;
269
270 for (zso = zip_searchorder; *zso->suffix; zso++) {
271 strcpy(path + len, zso->suffix);
272 if (PyDict_GetItemString(self->files, path) != NULL) {
273 if (zso->type & IS_PACKAGE)
274 return MI_PACKAGE;
275 else
276 return MI_MODULE;
277 }
278 }
279 return MI_NOT_FOUND;
280 }
281
282 /* Check whether we can satisfy the import of the module named by
283 'fullname'. Return self if we can, None if we can't. */
284 static PyObject *
zipimporter_find_module(PyObject * obj,PyObject * args)285 zipimporter_find_module(PyObject *obj, PyObject *args)
286 {
287 ZipImporter *self = (ZipImporter *)obj;
288 PyObject *path = NULL;
289 char *fullname;
290 enum zi_module_info mi;
291
292 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
293 &fullname, &path))
294 return NULL;
295
296 mi = get_module_info(self, fullname);
297 if (mi == MI_ERROR)
298 return NULL;
299 if (mi == MI_NOT_FOUND) {
300 Py_INCREF(Py_None);
301 return Py_None;
302 }
303 Py_INCREF(self);
304 return (PyObject *)self;
305 }
306
307 /* Load and return the module named by 'fullname'. */
308 static PyObject *
zipimporter_load_module(PyObject * obj,PyObject * args)309 zipimporter_load_module(PyObject *obj, PyObject *args)
310 {
311 ZipImporter *self = (ZipImporter *)obj;
312 PyObject *code, *mod, *dict;
313 char *fullname, *modpath;
314 int ispackage;
315
316 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
317 &fullname))
318 return NULL;
319
320 code = get_module_code(self, fullname, &ispackage, &modpath);
321 if (code == NULL)
322 return NULL;
323
324 mod = PyImport_AddModule(fullname);
325 if (mod == NULL) {
326 Py_DECREF(code);
327 return NULL;
328 }
329 dict = PyModule_GetDict(mod);
330
331 /* mod.__loader__ = self */
332 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
333 goto error;
334
335 if (ispackage) {
336 /* add __path__ to the module *before* the code gets
337 executed */
338 PyObject *pkgpath, *fullpath;
339 char *prefix = PyString_AsString(self->prefix);
340 char *subname = get_subname(fullname);
341 int err;
342
343 fullpath = PyString_FromFormat("%s%c%s%s",
344 PyString_AsString(self->archive),
345 SEP,
346 *prefix ? prefix : "",
347 subname);
348 if (fullpath == NULL)
349 goto error;
350
351 pkgpath = Py_BuildValue("[O]", fullpath);
352 Py_DECREF(fullpath);
353 if (pkgpath == NULL)
354 goto error;
355 err = PyDict_SetItemString(dict, "__path__", pkgpath);
356 Py_DECREF(pkgpath);
357 if (err != 0)
358 goto error;
359 }
360 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
361 Py_DECREF(code);
362 if (Py_VerboseFlag)
363 PySys_WriteStderr("import %s # loaded from Zip %s\n",
364 fullname, modpath);
365 return mod;
366 error:
367 Py_DECREF(code);
368 Py_DECREF(mod);
369 return NULL;
370 }
371
372 /* Return a string matching __file__ for the named module */
373 static PyObject *
zipimporter_get_filename(PyObject * obj,PyObject * args)374 zipimporter_get_filename(PyObject *obj, PyObject *args)
375 {
376 ZipImporter *self = (ZipImporter *)obj;
377 PyObject *code;
378 char *fullname, *modpath;
379 int ispackage;
380
381 if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
382 &fullname))
383 return NULL;
384
385 /* Deciding the filename requires working out where the code
386 would come from if the module was actually loaded */
387 code = get_module_code(self, fullname, &ispackage, &modpath);
388 if (code == NULL)
389 return NULL;
390 Py_DECREF(code); /* Only need the path info */
391
392 return PyString_FromString(modpath);
393 }
394
395 /* Return a bool signifying whether the module is a package or not. */
396 static PyObject *
zipimporter_is_package(PyObject * obj,PyObject * args)397 zipimporter_is_package(PyObject *obj, PyObject *args)
398 {
399 ZipImporter *self = (ZipImporter *)obj;
400 char *fullname;
401 enum zi_module_info mi;
402
403 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
404 &fullname))
405 return NULL;
406
407 mi = get_module_info(self, fullname);
408 if (mi == MI_ERROR)
409 return NULL;
410 if (mi == MI_NOT_FOUND) {
411 PyErr_Format(ZipImportError, "can't find module '%.200s'",
412 fullname);
413 return NULL;
414 }
415 return PyBool_FromLong(mi == MI_PACKAGE);
416 }
417
418 static PyObject *
zipimporter_get_data(PyObject * obj,PyObject * args)419 zipimporter_get_data(PyObject *obj, PyObject *args)
420 {
421 ZipImporter *self = (ZipImporter *)obj;
422 char *path;
423 #ifdef ALTSEP
424 char *p, buf[MAXPATHLEN + 1];
425 #endif
426 PyObject *toc_entry;
427 Py_ssize_t len;
428
429 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
430 return NULL;
431
432 #ifdef ALTSEP
433 if (strlen(path) >= MAXPATHLEN) {
434 PyErr_SetString(ZipImportError, "path too long");
435 return NULL;
436 }
437 strcpy(buf, path);
438 for (p = buf; *p; p++) {
439 if (*p == ALTSEP)
440 *p = SEP;
441 }
442 path = buf;
443 #endif
444 len = PyString_Size(self->archive);
445 if ((size_t)len < strlen(path) &&
446 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
447 path[len] == SEP) {
448 path = path + len + 1;
449 }
450
451 toc_entry = PyDict_GetItemString(self->files, path);
452 if (toc_entry == NULL) {
453 PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
454 return NULL;
455 }
456 return get_data(PyString_AsString(self->archive), toc_entry);
457 }
458
459 static PyObject *
zipimporter_get_code(PyObject * obj,PyObject * args)460 zipimporter_get_code(PyObject *obj, PyObject *args)
461 {
462 ZipImporter *self = (ZipImporter *)obj;
463 char *fullname;
464
465 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
466 return NULL;
467
468 return get_module_code(self, fullname, NULL, NULL);
469 }
470
471 static PyObject *
zipimporter_get_source(PyObject * obj,PyObject * args)472 zipimporter_get_source(PyObject *obj, PyObject *args)
473 {
474 ZipImporter *self = (ZipImporter *)obj;
475 PyObject *toc_entry;
476 char *fullname, *subname, path[MAXPATHLEN+1];
477 int len;
478 enum zi_module_info mi;
479
480 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
481 return NULL;
482
483 mi = get_module_info(self, fullname);
484 if (mi == MI_ERROR)
485 return NULL;
486 if (mi == MI_NOT_FOUND) {
487 PyErr_Format(ZipImportError, "can't find module '%.200s'",
488 fullname);
489 return NULL;
490 }
491 subname = get_subname(fullname);
492
493 len = make_filename(PyString_AsString(self->prefix), subname, path);
494 if (len < 0)
495 return NULL;
496
497 if (mi == MI_PACKAGE) {
498 path[len] = SEP;
499 strcpy(path + len + 1, "__init__.py");
500 }
501 else
502 strcpy(path + len, ".py");
503
504 toc_entry = PyDict_GetItemString(self->files, path);
505 if (toc_entry != NULL)
506 return get_data(PyString_AsString(self->archive), toc_entry);
507
508 /* we have the module, but no source */
509 Py_INCREF(Py_None);
510 return Py_None;
511 }
512
513 PyDoc_STRVAR(doc_find_module,
514 "find_module(fullname, path=None) -> self or None.\n\
515 \n\
516 Search for a module specified by 'fullname'. 'fullname' must be the\n\
517 fully qualified (dotted) module name. It returns the zipimporter\n\
518 instance itself if the module was found, or None if it wasn't.\n\
519 The optional 'path' argument is ignored -- it's there for compatibility\n\
520 with the importer protocol.");
521
522 PyDoc_STRVAR(doc_load_module,
523 "load_module(fullname) -> module.\n\
524 \n\
525 Load the module specified by 'fullname'. 'fullname' must be the\n\
526 fully qualified (dotted) module name. It returns the imported\n\
527 module, or raises ZipImportError if it wasn't found.");
528
529 PyDoc_STRVAR(doc_get_data,
530 "get_data(pathname) -> string with file data.\n\
531 \n\
532 Return the data associated with 'pathname'. Raise IOError if\n\
533 the file wasn't found.");
534
535 PyDoc_STRVAR(doc_is_package,
536 "is_package(fullname) -> bool.\n\
537 \n\
538 Return True if the module specified by fullname is a package.\n\
539 Raise ZipImportError if the module couldn't be found.");
540
541 PyDoc_STRVAR(doc_get_code,
542 "get_code(fullname) -> code object.\n\
543 \n\
544 Return the code object for the specified module. Raise ZipImportError\n\
545 if the module couldn't be found.");
546
547 PyDoc_STRVAR(doc_get_source,
548 "get_source(fullname) -> source string.\n\
549 \n\
550 Return the source code for the specified module. Raise ZipImportError\n\
551 if the module couldn't be found, return None if the archive does\n\
552 contain the module, but has no source for it.");
553
554
555 PyDoc_STRVAR(doc_get_filename,
556 "get_filename(fullname) -> filename string.\n\
557 \n\
558 Return the filename for the specified module.");
559
560 static PyMethodDef zipimporter_methods[] = {
561 {"find_module", zipimporter_find_module, METH_VARARGS,
562 doc_find_module},
563 {"load_module", zipimporter_load_module, METH_VARARGS,
564 doc_load_module},
565 {"get_data", zipimporter_get_data, METH_VARARGS,
566 doc_get_data},
567 {"get_code", zipimporter_get_code, METH_VARARGS,
568 doc_get_code},
569 {"get_source", zipimporter_get_source, METH_VARARGS,
570 doc_get_source},
571 {"get_filename", zipimporter_get_filename, METH_VARARGS,
572 doc_get_filename},
573 {"is_package", zipimporter_is_package, METH_VARARGS,
574 doc_is_package},
575 {NULL, NULL} /* sentinel */
576 };
577
578 static PyMemberDef zipimporter_members[] = {
579 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
580 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
581 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
582 {NULL}
583 };
584
585 PyDoc_STRVAR(zipimporter_doc,
586 "zipimporter(archivepath) -> zipimporter object\n\
587 \n\
588 Create a new zipimporter instance. 'archivepath' must be a path to\n\
589 a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
590 '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
591 valid directory inside the archive.\n\
592 \n\
593 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
594 archive.\n\
595 \n\
596 The 'archive' attribute of zipimporter objects contains the name of the\n\
597 zipfile targeted.");
598
599 #define DEFERRED_ADDRESS(ADDR) 0
600
601 static PyTypeObject ZipImporter_Type = {
602 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
603 "zipimport.zipimporter",
604 sizeof(ZipImporter),
605 0, /* tp_itemsize */
606 (destructor)zipimporter_dealloc, /* tp_dealloc */
607 0, /* tp_print */
608 0, /* tp_getattr */
609 0, /* tp_setattr */
610 0, /* tp_compare */
611 (reprfunc)zipimporter_repr, /* tp_repr */
612 0, /* tp_as_number */
613 0, /* tp_as_sequence */
614 0, /* tp_as_mapping */
615 0, /* tp_hash */
616 0, /* tp_call */
617 0, /* tp_str */
618 PyObject_GenericGetAttr, /* tp_getattro */
619 0, /* tp_setattro */
620 0, /* tp_as_buffer */
621 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
622 Py_TPFLAGS_HAVE_GC, /* tp_flags */
623 zipimporter_doc, /* tp_doc */
624 zipimporter_traverse, /* tp_traverse */
625 0, /* tp_clear */
626 0, /* tp_richcompare */
627 0, /* tp_weaklistoffset */
628 0, /* tp_iter */
629 0, /* tp_iternext */
630 zipimporter_methods, /* tp_methods */
631 zipimporter_members, /* tp_members */
632 0, /* tp_getset */
633 0, /* tp_base */
634 0, /* tp_dict */
635 0, /* tp_descr_get */
636 0, /* tp_descr_set */
637 0, /* tp_dictoffset */
638 (initproc)zipimporter_init, /* tp_init */
639 PyType_GenericAlloc, /* tp_alloc */
640 PyType_GenericNew, /* tp_new */
641 PyObject_GC_Del, /* tp_free */
642 };
643
644
645 /* implementation */
646
647 /* Given a buffer, return the unsigned int that is represented by the first
648 4 bytes, encoded as little endian. This partially reimplements
649 marshal.c:r_long() */
650 static unsigned int
get_uint32(const unsigned char * buf)651 get_uint32(const unsigned char *buf)
652 {
653 unsigned int x;
654 x = buf[0];
655 x |= (unsigned int)buf[1] << 8;
656 x |= (unsigned int)buf[2] << 16;
657 x |= (unsigned int)buf[3] << 24;
658 return x;
659 }
660
661 /* Given a buffer, return the unsigned int that is represented by the first
662 2 bytes, encoded as little endian. This partially reimplements
663 marshal.c:r_short() */
664 static unsigned short
get_uint16(const unsigned char * buf)665 get_uint16(const unsigned char *buf)
666 {
667 unsigned short x;
668 x = buf[0];
669 x |= (unsigned short)buf[1] << 8;
670 return x;
671 }
672
673 static void
set_file_error(const char * archive,int eof)674 set_file_error(const char *archive, int eof)
675 {
676 if (eof) {
677 PyErr_SetString(PyExc_EOFError, "EOF read where not expected");
678 }
679 else {
680 PyErr_SetFromErrnoWithFilename(PyExc_IOError, archive);
681 }
682 }
683
684 /*
685 read_directory(archive) -> files dict (new reference)
686
687 Given a path to a Zip archive, build a dict, mapping file names
688 (local to the archive, using SEP as a separator) to toc entries.
689
690 A toc_entry is a tuple:
691
692 (__file__, # value to use for __file__, available for all files
693 compress, # compression kind; 0 for uncompressed
694 data_size, # size of compressed data on disk
695 file_size, # size of decompressed data
696 file_offset, # offset of file header from start of archive
697 time, # mod time of file (in dos format)
698 date, # mod data of file (in dos format)
699 crc, # crc checksum of the data
700 )
701
702 Directories can be recognized by the trailing SEP in the name,
703 data_size and file_offset are 0.
704 */
705 static PyObject *
read_directory(const char * archive)706 read_directory(const char *archive)
707 {
708 PyObject *files = NULL;
709 FILE *fp;
710 unsigned short compress, time, date, name_size;
711 unsigned int crc, data_size, file_size, header_size, header_offset;
712 unsigned long file_offset, header_position;
713 unsigned long arc_offset; /* Absolute offset to start of the zip-archive. */
714 unsigned int count, i;
715 unsigned char buffer[46];
716 size_t length;
717 char path[MAXPATHLEN + 5];
718 char name[MAXPATHLEN + 5];
719 const char *errmsg = NULL;
720
721 if (strlen(archive) > MAXPATHLEN) {
722 PyErr_SetString(PyExc_OverflowError,
723 "Zip path name is too long");
724 return NULL;
725 }
726 strcpy(path, archive);
727
728 fp = fopen(archive, "rb");
729 if (fp == NULL) {
730 PyErr_Format(ZipImportError, "can't open Zip file: "
731 "'%.200s'", archive);
732 return NULL;
733 }
734
735 if (fseek(fp, -22, SEEK_END) == -1) {
736 goto file_error;
737 }
738 header_position = (unsigned long)ftell(fp);
739 if (header_position == (unsigned long)-1) {
740 goto file_error;
741 }
742 assert(header_position <= (unsigned long)LONG_MAX);
743 if (fread(buffer, 1, 22, fp) != 22) {
744 goto file_error;
745 }
746 if (get_uint32(buffer) != 0x06054B50u) {
747 /* Bad: End of Central Dir signature */
748 errmsg = "not a Zip file";
749 goto invalid_header;
750 }
751
752 header_size = get_uint32(buffer + 12);
753 header_offset = get_uint32(buffer + 16);
754 if (header_position < header_size) {
755 errmsg = "bad central directory size";
756 goto invalid_header;
757 }
758 if (header_position < header_offset) {
759 errmsg = "bad central directory offset";
760 goto invalid_header;
761 }
762 if (header_position - header_size < header_offset) {
763 errmsg = "bad central directory size or offset";
764 goto invalid_header;
765 }
766 header_position -= header_size;
767 arc_offset = header_position - header_offset;
768
769 files = PyDict_New();
770 if (files == NULL) {
771 goto error;
772 }
773
774 length = (long)strlen(path);
775 path[length] = SEP;
776
777 /* Start of Central Directory */
778 count = 0;
779 if (fseek(fp, (long)header_position, 0) == -1) {
780 goto file_error;
781 }
782 for (;;) {
783 PyObject *t;
784 size_t n;
785 int err;
786
787 n = fread(buffer, 1, 46, fp);
788 if (n < 4) {
789 goto eof_error;
790 }
791 /* Start of file header */
792 if (get_uint32(buffer) != 0x02014B50u) {
793 break; /* Bad: Central Dir File Header */
794 }
795 if (n != 46) {
796 goto eof_error;
797 }
798 compress = get_uint16(buffer + 10);
799 time = get_uint16(buffer + 12);
800 date = get_uint16(buffer + 14);
801 crc = get_uint32(buffer + 16);
802 data_size = get_uint32(buffer + 20);
803 file_size = get_uint32(buffer + 24);
804 name_size = get_uint16(buffer + 28);
805 header_size = (unsigned int)name_size +
806 get_uint16(buffer + 30) /* extra field */ +
807 get_uint16(buffer + 32) /* comment */;
808
809 file_offset = get_uint32(buffer + 42);
810 if (file_offset > header_offset) {
811 errmsg = "bad local header offset";
812 goto invalid_header;
813 }
814 file_offset += arc_offset;
815
816 if (name_size > MAXPATHLEN) {
817 name_size = MAXPATHLEN;
818 }
819 if (fread(name, 1, name_size, fp) != name_size) {
820 goto file_error;
821 }
822 name[name_size] = '\0'; /* Add terminating null byte */
823 if (SEP != '/') {
824 for (i = 0; i < name_size; i++) {
825 if (name[i] == '/') {
826 name[i] = SEP;
827 }
828 }
829 }
830 /* Skip the rest of the header.
831 * On Windows, calling fseek to skip over the fields we don't use is
832 * slower than reading the data because fseek flushes stdio's
833 * internal buffers. See issue #8745. */
834 assert(header_size <= 3*0xFFFFu);
835 for (i = name_size; i < header_size; i++) {
836 if (getc(fp) == EOF) {
837 goto file_error;
838 }
839 }
840
841 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
842
843 t = Py_BuildValue("sHIIkHHI", path, compress, data_size,
844 file_size, file_offset, time, date, crc);
845 if (t == NULL) {
846 goto error;
847 }
848 err = PyDict_SetItemString(files, name, t);
849 Py_DECREF(t);
850 if (err != 0) {
851 goto error;
852 }
853 count++;
854 }
855 fclose(fp);
856 if (Py_VerboseFlag) {
857 PySys_WriteStderr("# zipimport: found %u names in %.200s\n",
858 count, archive);
859 }
860 return files;
861
862 eof_error:
863 set_file_error(archive, !ferror(fp));
864 goto error;
865
866 file_error:
867 PyErr_Format(ZipImportError, "can't read Zip file: %.200s", archive);
868 goto error;
869
870 invalid_header:
871 assert(errmsg != NULL);
872 PyErr_Format(ZipImportError, "%s: %.200s", errmsg, archive);
873 goto error;
874
875 error:
876 fclose(fp);
877 Py_XDECREF(files);
878 return NULL;
879 }
880
881 /* Return the zlib.decompress function object, or NULL if zlib couldn't
882 be imported. The function is cached when found, so subsequent calls
883 don't import zlib again. */
884 static PyObject *
get_decompress_func(void)885 get_decompress_func(void)
886 {
887 static int importing_zlib = 0;
888 PyObject *zlib;
889 PyObject *decompress;
890
891 if (importing_zlib != 0)
892 /* Someone has a zlib.py[co] in their Zip file;
893 let's avoid a stack overflow. */
894 return NULL;
895 importing_zlib = 1;
896 zlib = PyImport_ImportModuleNoBlock("zlib");
897 importing_zlib = 0;
898 if (zlib != NULL) {
899 decompress = PyObject_GetAttrString(zlib,
900 "decompress");
901 Py_DECREF(zlib);
902 }
903 else {
904 PyErr_Clear();
905 decompress = NULL;
906 }
907 if (Py_VerboseFlag)
908 PySys_WriteStderr("# zipimport: zlib %s\n",
909 zlib != NULL ? "available": "UNAVAILABLE");
910 return decompress;
911 }
912
913 /* Given a path to a Zip file and a toc_entry, return the (uncompressed)
914 data as a new reference. */
915 static PyObject *
get_data(const char * archive,PyObject * toc_entry)916 get_data(const char *archive, PyObject *toc_entry)
917 {
918 PyObject *raw_data = NULL, *data, *decompress;
919 char *buf;
920 FILE *fp;
921 const char *datapath;
922 unsigned short compress, time, date;
923 unsigned int crc;
924 Py_ssize_t data_size, file_size;
925 long file_offset, header_size;
926 unsigned char buffer[30];
927 const char *errmsg = NULL;
928
929 if (!PyArg_ParseTuple(toc_entry, "sHnnlHHI", &datapath, &compress,
930 &data_size, &file_size, &file_offset, &time,
931 &date, &crc)) {
932 return NULL;
933 }
934 if (data_size < 0) {
935 PyErr_Format(ZipImportError, "negative data size");
936 return NULL;
937 }
938
939 fp = fopen(archive, "rb");
940 if (!fp) {
941 PyErr_Format(PyExc_IOError,
942 "zipimport: can not open file %s", archive);
943 return NULL;
944 }
945
946 /* Check to make sure the local file header is correct */
947 if (fseek(fp, file_offset, 0) == -1) {
948 goto file_error;
949 }
950 if (fread(buffer, 1, 30, fp) != 30) {
951 goto eof_error;
952 }
953 if (get_uint32(buffer) != 0x04034B50u) {
954 /* Bad: Local File Header */
955 errmsg = "bad local file header";
956 goto invalid_header;
957 }
958
959 header_size = (unsigned int)30 +
960 get_uint16(buffer + 26) /* file name */ +
961 get_uint16(buffer + 28) /* extra field */;
962 if (file_offset > LONG_MAX - header_size) {
963 errmsg = "bad local file header size";
964 goto invalid_header;
965 }
966 file_offset += header_size; /* Start of file data */
967
968 if (data_size > LONG_MAX - 1) {
969 fclose(fp);
970 PyErr_NoMemory();
971 return NULL;
972 }
973 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
974 data_size : data_size + 1);
975
976 if (raw_data == NULL) {
977 goto error;
978 }
979 buf = PyString_AsString(raw_data);
980
981 if (fseek(fp, file_offset, 0) == -1) {
982 goto file_error;
983 }
984 if (fread(buf, 1, data_size, fp) != (size_t)data_size) {
985 PyErr_SetString(PyExc_IOError,
986 "zipimport: can't read data");
987 goto error;
988 }
989
990 fclose(fp);
991 fp = NULL;
992
993 if (compress != 0) {
994 buf[data_size] = 'Z'; /* saw this in zipfile.py */
995 data_size++;
996 }
997 buf[data_size] = '\0';
998
999 if (compress == 0) /* data is not compressed */
1000 return raw_data;
1001
1002 /* Decompress with zlib */
1003 decompress = get_decompress_func();
1004 if (decompress == NULL) {
1005 PyErr_SetString(ZipImportError,
1006 "can't decompress data; "
1007 "zlib not available");
1008 goto error;
1009 }
1010 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
1011 Py_DECREF(decompress);
1012 Py_DECREF(raw_data);
1013 return data;
1014
1015 eof_error:
1016 set_file_error(archive, !ferror(fp));
1017 goto error;
1018
1019 file_error:
1020 PyErr_Format(ZipImportError, "can't read Zip file: %.200s", archive);
1021 goto error;
1022
1023 invalid_header:
1024 assert(errmsg != NULL);
1025 PyErr_Format(ZipImportError, "%s: %.200s", errmsg, archive);
1026 goto error;
1027
1028 error:
1029 if (fp != NULL) {
1030 fclose(fp);
1031 }
1032 Py_XDECREF(raw_data);
1033 return NULL;
1034 }
1035
1036 /* Lenient date/time comparison function. The precision of the mtime
1037 in the archive is lower than the mtime stored in a .pyc: we
1038 must allow a difference of at most one second. */
1039 static int
eq_mtime(time_t t1,time_t t2)1040 eq_mtime(time_t t1, time_t t2)
1041 {
1042 time_t d = t1 - t2;
1043 if (d < 0)
1044 d = -d;
1045 /* dostime only stores even seconds, so be lenient */
1046 return d <= 1;
1047 }
1048
1049 /* Given the contents of a .py[co] file in a buffer, unmarshal the data
1050 and return the code object. Return None if it the magic word doesn't
1051 match (we do this instead of raising an exception as we fall back
1052 to .py if available and we don't want to mask other errors).
1053 Returns a new reference. */
1054 static PyObject *
unmarshal_code(const char * pathname,PyObject * data,time_t mtime)1055 unmarshal_code(const char *pathname, PyObject *data, time_t mtime)
1056 {
1057 PyObject *code;
1058 unsigned char *buf = (unsigned char *)PyString_AsString(data);
1059 Py_ssize_t size = PyString_Size(data);
1060
1061 if (size < 8) {
1062 PyErr_SetString(ZipImportError,
1063 "bad pyc data");
1064 return NULL;
1065 }
1066
1067 if (get_uint32(buf) != (unsigned int)PyImport_GetMagicNumber()) {
1068 if (Py_VerboseFlag) {
1069 PySys_WriteStderr("# %s has bad magic\n",
1070 pathname);
1071 }
1072 Py_INCREF(Py_None);
1073 return Py_None; /* signal caller to try alternative */
1074 }
1075
1076 if (mtime != 0 && !eq_mtime(get_uint32(buf + 4), mtime)) {
1077 if (Py_VerboseFlag) {
1078 PySys_WriteStderr("# %s has bad mtime\n",
1079 pathname);
1080 }
1081 Py_INCREF(Py_None);
1082 return Py_None; /* signal caller to try alternative */
1083 }
1084
1085 code = PyMarshal_ReadObjectFromString((char *)buf + 8, size - 8);
1086 if (code == NULL) {
1087 return NULL;
1088 }
1089 if (!PyCode_Check(code)) {
1090 Py_DECREF(code);
1091 PyErr_Format(PyExc_TypeError,
1092 "compiled module %.200s is not a code object",
1093 pathname);
1094 return NULL;
1095 }
1096 return code;
1097 }
1098
1099 /* Replace any occurrences of "\r\n?" in the input string with "\n".
1100 This converts DOS and Mac line endings to Unix line endings.
1101 Also append a trailing "\n" to be compatible with
1102 PyParser_SimpleParseFile(). Returns a new reference. */
1103 static PyObject *
normalize_line_endings(PyObject * source)1104 normalize_line_endings(PyObject *source)
1105 {
1106 char *buf, *q, *p = PyString_AsString(source);
1107 PyObject *fixed_source;
1108
1109 if (!p)
1110 return NULL;
1111
1112 /* one char extra for trailing \n and one for terminating \0 */
1113 buf = (char *)PyMem_Malloc(PyString_Size(source) + 2);
1114 if (buf == NULL) {
1115 PyErr_SetString(PyExc_MemoryError,
1116 "zipimport: no memory to allocate "
1117 "source buffer");
1118 return NULL;
1119 }
1120 /* replace "\r\n?" by "\n" */
1121 for (q = buf; *p != '\0'; p++) {
1122 if (*p == '\r') {
1123 *q++ = '\n';
1124 if (*(p + 1) == '\n')
1125 p++;
1126 }
1127 else
1128 *q++ = *p;
1129 }
1130 *q++ = '\n'; /* add trailing \n */
1131 *q = '\0';
1132 fixed_source = PyString_FromString(buf);
1133 PyMem_Free(buf);
1134 return fixed_source;
1135 }
1136
1137 /* Given a string buffer containing Python source code, compile it
1138 return and return a code object as a new reference. */
1139 static PyObject *
compile_source(char * pathname,PyObject * source)1140 compile_source(char *pathname, PyObject *source)
1141 {
1142 PyObject *code, *fixed_source;
1143
1144 fixed_source = normalize_line_endings(source);
1145 if (fixed_source == NULL)
1146 return NULL;
1147
1148 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
1149 Py_file_input);
1150 Py_DECREF(fixed_source);
1151 return code;
1152 }
1153
1154 /* Convert the date/time values found in the Zip archive to a value
1155 that's compatible with the time stamp stored in .pyc files. */
1156 static time_t
parse_dostime(int dostime,int dosdate)1157 parse_dostime(int dostime, int dosdate)
1158 {
1159 struct tm stm;
1160
1161 memset((void *) &stm, '\0', sizeof(stm));
1162
1163 stm.tm_sec = (dostime & 0x1f) * 2;
1164 stm.tm_min = (dostime >> 5) & 0x3f;
1165 stm.tm_hour = (dostime >> 11) & 0x1f;
1166 stm.tm_mday = dosdate & 0x1f;
1167 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1168 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1169 stm.tm_isdst = -1; /* wday/yday is ignored */
1170
1171 return mktime(&stm);
1172 }
1173
1174 /* Given a path to a .pyc or .pyo file in the archive, return the
1175 modification time of the matching .py file, or 0 if no source
1176 is available. */
1177 static time_t
get_mtime_of_source(ZipImporter * self,char * path)1178 get_mtime_of_source(ZipImporter *self, char *path)
1179 {
1180 PyObject *toc_entry;
1181 time_t mtime = 0;
1182 Py_ssize_t lastchar = strlen(path) - 1;
1183 char savechar = path[lastchar];
1184 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1185 toc_entry = PyDict_GetItemString(self->files, path);
1186 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1187 PyTuple_Size(toc_entry) == 8) {
1188 /* fetch the time stamp of the .py file for comparison
1189 with an embedded pyc time stamp */
1190 int time, date;
1191 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1192 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1193 mtime = parse_dostime(time, date);
1194 }
1195 path[lastchar] = savechar;
1196 return mtime;
1197 }
1198
1199 /* Return the code object for the module named by 'fullname' from the
1200 Zip archive as a new reference. */
1201 static PyObject *
get_code_from_data(ZipImporter * self,int ispackage,int isbytecode,time_t mtime,PyObject * toc_entry)1202 get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1203 time_t mtime, PyObject *toc_entry)
1204 {
1205 PyObject *data, *code;
1206 char *modpath;
1207 char *archive = PyString_AsString(self->archive);
1208
1209 if (archive == NULL)
1210 return NULL;
1211
1212 data = get_data(archive, toc_entry);
1213 if (data == NULL)
1214 return NULL;
1215
1216 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1217
1218 if (isbytecode) {
1219 code = unmarshal_code(modpath, data, mtime);
1220 }
1221 else {
1222 code = compile_source(modpath, data);
1223 }
1224 Py_DECREF(data);
1225 return code;
1226 }
1227
1228 /* Get the code object associated with the module specified by
1229 'fullname'. */
1230 static PyObject *
get_module_code(ZipImporter * self,char * fullname,int * p_ispackage,char ** p_modpath)1231 get_module_code(ZipImporter *self, char *fullname,
1232 int *p_ispackage, char **p_modpath)
1233 {
1234 PyObject *toc_entry;
1235 char *subname, path[MAXPATHLEN + 1];
1236 int len;
1237 struct st_zip_searchorder *zso;
1238
1239 subname = get_subname(fullname);
1240
1241 len = make_filename(PyString_AsString(self->prefix), subname, path);
1242 if (len < 0)
1243 return NULL;
1244
1245 for (zso = zip_searchorder; *zso->suffix; zso++) {
1246 PyObject *code = NULL;
1247
1248 strcpy(path + len, zso->suffix);
1249 if (Py_VerboseFlag > 1)
1250 PySys_WriteStderr("# trying %s%c%s\n",
1251 PyString_AsString(self->archive),
1252 SEP, path);
1253 toc_entry = PyDict_GetItemString(self->files, path);
1254 if (toc_entry != NULL) {
1255 time_t mtime = 0;
1256 int ispackage = zso->type & IS_PACKAGE;
1257 int isbytecode = zso->type & IS_BYTECODE;
1258
1259 if (isbytecode)
1260 mtime = get_mtime_of_source(self, path);
1261 if (p_ispackage != NULL)
1262 *p_ispackage = ispackage;
1263 code = get_code_from_data(self, ispackage,
1264 isbytecode, mtime,
1265 toc_entry);
1266 if (code == Py_None) {
1267 /* bad magic number or non-matching mtime
1268 in byte code, try next */
1269 Py_DECREF(code);
1270 continue;
1271 }
1272 if (code != NULL && p_modpath != NULL)
1273 *p_modpath = PyString_AsString(
1274 PyTuple_GetItem(toc_entry, 0));
1275 return code;
1276 }
1277 }
1278 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1279 return NULL;
1280 }
1281
1282
1283 /* Module init */
1284
1285 PyDoc_STRVAR(zipimport_doc,
1286 "zipimport provides support for importing Python modules from Zip archives.\n\
1287 \n\
1288 This module exports three objects:\n\
1289 - zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1290 - ZipImportError: exception raised by zipimporter objects. It's a\n\
1291 subclass of ImportError, so it can be caught as ImportError, too.\n\
1292 - _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1293 info dicts, as used in zipimporter._files.\n\
1294 \n\
1295 It is usually not needed to use the zipimport module explicitly; it is\n\
1296 used by the builtin import mechanism for sys.path items that are paths\n\
1297 to Zip archives.");
1298
1299 PyMODINIT_FUNC
initzipimport(void)1300 initzipimport(void)
1301 {
1302 PyObject *mod;
1303
1304 if (PyType_Ready(&ZipImporter_Type) < 0)
1305 return;
1306
1307 /* Correct directory separator */
1308 zip_searchorder[0].suffix[0] = SEP;
1309 zip_searchorder[1].suffix[0] = SEP;
1310 zip_searchorder[2].suffix[0] = SEP;
1311 if (Py_OptimizeFlag) {
1312 /* Reverse *.pyc and *.pyo */
1313 struct st_zip_searchorder tmp;
1314 tmp = zip_searchorder[0];
1315 zip_searchorder[0] = zip_searchorder[1];
1316 zip_searchorder[1] = tmp;
1317 tmp = zip_searchorder[3];
1318 zip_searchorder[3] = zip_searchorder[4];
1319 zip_searchorder[4] = tmp;
1320 }
1321
1322 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1323 NULL, PYTHON_API_VERSION);
1324 if (mod == NULL)
1325 return;
1326
1327 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1328 PyExc_ImportError, NULL);
1329 if (ZipImportError == NULL)
1330 return;
1331
1332 Py_INCREF(ZipImportError);
1333 if (PyModule_AddObject(mod, "ZipImportError",
1334 ZipImportError) < 0)
1335 return;
1336
1337 Py_INCREF(&ZipImporter_Type);
1338 if (PyModule_AddObject(mod, "zipimporter",
1339 (PyObject *)&ZipImporter_Type) < 0)
1340 return;
1341
1342 zip_directory_cache = PyDict_New();
1343 if (zip_directory_cache == NULL)
1344 return;
1345 Py_INCREF(zip_directory_cache);
1346 if (PyModule_AddObject(mod, "_zip_directory_cache",
1347 zip_directory_cache) < 0)
1348 return;
1349 }
1350