1 #include "Python.h"
2 #include "internal/import.h"
3 #include "internal/pystate.h"
4 #include "structmember.h"
5 #include "osdefs.h"
6 #include "marshal.h"
7 #include <time.h>
8
9
10 #define IS_SOURCE 0x0
11 #define IS_BYTECODE 0x1
12 #define IS_PACKAGE 0x2
13
14 struct st_zip_searchorder {
15 char suffix[14];
16 int type;
17 };
18
19 #ifdef ALTSEP
20 _Py_IDENTIFIER(replace);
21 #endif
22
23 /* zip_searchorder defines how we search for a module in the Zip
24 archive: we first search for a package __init__, then for
25 non-package .pyc, and .py entries. The .pyc entries
26 are swapped by initzipimport() if we run in optimized mode. Also,
27 '/' is replaced by SEP there. */
28 static struct st_zip_searchorder zip_searchorder[] = {
29 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
30 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
31 {".pyc", IS_BYTECODE},
32 {".py", IS_SOURCE},
33 {"", 0}
34 };
35
36 /* zipimporter object definition and support */
37
38 typedef struct _zipimporter ZipImporter;
39
40 struct _zipimporter {
41 PyObject_HEAD
42 PyObject *archive; /* pathname of the Zip archive,
43 decoded from the filesystem encoding */
44 PyObject *prefix; /* file prefix: "a/sub/directory/",
45 encoded to the filesystem encoding */
46 PyObject *files; /* dict with file info {path: toc_entry} */
47 };
48
49 static PyObject *ZipImportError;
50 /* read_directory() cache */
51 static PyObject *zip_directory_cache = NULL;
52
53 /* forward decls */
54 static PyObject *read_directory(PyObject *archive);
55 static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
56 static PyObject *get_module_code(ZipImporter *self, PyObject *fullname,
57 int *p_ispackage, PyObject **p_modpath);
58
59 static PyTypeObject ZipImporter_Type;
60
61 #define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
62
63 /*[clinic input]
64 module zipimport
65 class zipimport.zipimporter "ZipImporter *" "&ZipImporter_Type"
66 [clinic start generated code]*/
67 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=9db8b61557d911e7]*/
68 #include "clinic/zipimport.c.h"
69
70
71 /* zipimporter.__init__
72 Split the "subdirectory" from the Zip archive path, lookup a matching
73 entry in sys.path_importer_cache, fetch the file directory from there
74 if found, or else read it from the archive. */
75
76 /*[clinic input]
77 zipimport.zipimporter.__init__
78
79 archivepath as path: object(converter="PyUnicode_FSDecoder")
80 A path-like object to a zipfile, or to a specific path inside
81 a zipfile.
82 /
83
84 Create a new zipimporter instance.
85
86 'archivepath' must be a path-like object to a zipfile, or to a specific path
87 inside a zipfile. For example, it can be '/tmp/myimport.zip', or
88 '/tmp/myimport.zip/mydirectory', if mydirectory is a valid directory inside
89 the archive.
90
91 'ZipImportError' is raised if 'archivepath' doesn't point to a valid Zip
92 archive.
93
94 The 'archive' attribute of the zipimporter object contains the name of the
95 zipfile targeted.
96
97 [clinic start generated code]*/
98
99 static int
zipimport_zipimporter___init___impl(ZipImporter * self,PyObject * path)100 zipimport_zipimporter___init___impl(ZipImporter *self, PyObject *path)
101 /*[clinic end generated code: output=141558fefdb46dc8 input=92b9ebeed1f6a704]*/
102 {
103 PyObject *files, *tmp;
104 PyObject *filename = NULL;
105 Py_ssize_t len, flen;
106
107 if (PyUnicode_READY(path) == -1)
108 return -1;
109
110 len = PyUnicode_GET_LENGTH(path);
111 if (len == 0) {
112 PyErr_SetString(ZipImportError, "archive path is empty");
113 goto error;
114 }
115
116 #ifdef ALTSEP
117 tmp = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
118 if (!tmp)
119 goto error;
120 Py_DECREF(path);
121 path = tmp;
122 #endif
123
124 filename = path;
125 Py_INCREF(filename);
126 flen = len;
127 for (;;) {
128 struct stat statbuf;
129 int rv;
130
131 rv = _Py_stat(filename, &statbuf);
132 if (rv == -2)
133 goto error;
134 if (rv == 0) {
135 /* it exists */
136 if (!S_ISREG(statbuf.st_mode))
137 /* it's a not file */
138 Py_CLEAR(filename);
139 break;
140 }
141 Py_CLEAR(filename);
142 /* back up one path element */
143 flen = PyUnicode_FindChar(path, SEP, 0, flen, -1);
144 if (flen == -1)
145 break;
146 filename = PyUnicode_Substring(path, 0, flen);
147 if (filename == NULL)
148 goto error;
149 }
150 if (filename == NULL) {
151 PyErr_SetString(ZipImportError, "not a Zip file");
152 goto error;
153 }
154
155 if (PyUnicode_READY(filename) < 0)
156 goto error;
157
158 files = PyDict_GetItem(zip_directory_cache, filename);
159 if (files == NULL) {
160 files = read_directory(filename);
161 if (files == NULL)
162 goto error;
163 if (PyDict_SetItem(zip_directory_cache, filename, files) != 0)
164 goto error;
165 }
166 else
167 Py_INCREF(files);
168 Py_XSETREF(self->files, files);
169
170 /* Transfer reference */
171 Py_XSETREF(self->archive, filename);
172 filename = NULL;
173
174 /* Check if there is a prefix directory following the filename. */
175 if (flen != len) {
176 tmp = PyUnicode_Substring(path, flen+1,
177 PyUnicode_GET_LENGTH(path));
178 if (tmp == NULL)
179 goto error;
180 Py_XSETREF(self->prefix, tmp);
181 if (PyUnicode_READ_CHAR(path, len-1) != SEP) {
182 /* add trailing SEP */
183 tmp = PyUnicode_FromFormat("%U%c", self->prefix, SEP);
184 if (tmp == NULL)
185 goto error;
186 Py_SETREF(self->prefix, tmp);
187 }
188 }
189 else {
190 Py_XSETREF(self->prefix, PyUnicode_New(0, 0));
191 }
192 Py_DECREF(path);
193 return 0;
194
195 error:
196 Py_DECREF(path);
197 Py_XDECREF(filename);
198 return -1;
199 }
200
201 /* GC support. */
202 static int
zipimporter_traverse(PyObject * obj,visitproc visit,void * arg)203 zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
204 {
205 ZipImporter *self = (ZipImporter *)obj;
206 Py_VISIT(self->files);
207 return 0;
208 }
209
210 static void
zipimporter_dealloc(ZipImporter * self)211 zipimporter_dealloc(ZipImporter *self)
212 {
213 PyObject_GC_UnTrack(self);
214 Py_XDECREF(self->archive);
215 Py_XDECREF(self->prefix);
216 Py_XDECREF(self->files);
217 Py_TYPE(self)->tp_free((PyObject *)self);
218 }
219
220 static PyObject *
zipimporter_repr(ZipImporter * self)221 zipimporter_repr(ZipImporter *self)
222 {
223 if (self->archive == NULL)
224 return PyUnicode_FromString("<zipimporter object \"???\">");
225 else if (self->prefix != NULL && PyUnicode_GET_LENGTH(self->prefix) != 0)
226 return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">",
227 self->archive, SEP, self->prefix);
228 else
229 return PyUnicode_FromFormat("<zipimporter object \"%U\">",
230 self->archive);
231 }
232
233 /* return fullname.split(".")[-1] */
234 static PyObject *
get_subname(PyObject * fullname)235 get_subname(PyObject *fullname)
236 {
237 Py_ssize_t len, dot;
238 if (PyUnicode_READY(fullname) < 0)
239 return NULL;
240 len = PyUnicode_GET_LENGTH(fullname);
241 dot = PyUnicode_FindChar(fullname, '.', 0, len, -1);
242 if (dot == -1) {
243 Py_INCREF(fullname);
244 return fullname;
245 } else
246 return PyUnicode_Substring(fullname, dot+1, len);
247 }
248
249 /* Given a (sub)modulename, write the potential file path in the
250 archive (without extension) to the path buffer. Return the
251 length of the resulting string.
252
253 return self.prefix + name.replace('.', os.sep) */
254 static PyObject*
make_filename(PyObject * prefix,PyObject * name)255 make_filename(PyObject *prefix, PyObject *name)
256 {
257 PyObject *pathobj;
258 Py_UCS4 *p, *buf;
259 Py_ssize_t len;
260
261 len = PyUnicode_GET_LENGTH(prefix) + PyUnicode_GET_LENGTH(name) + 1;
262 p = buf = PyMem_New(Py_UCS4, len);
263 if (buf == NULL) {
264 PyErr_NoMemory();
265 return NULL;
266 }
267
268 if (!PyUnicode_AsUCS4(prefix, p, len, 0)) {
269 PyMem_Free(buf);
270 return NULL;
271 }
272 p += PyUnicode_GET_LENGTH(prefix);
273 len -= PyUnicode_GET_LENGTH(prefix);
274 if (!PyUnicode_AsUCS4(name, p, len, 1)) {
275 PyMem_Free(buf);
276 return NULL;
277 }
278 for (; *p; p++) {
279 if (*p == '.')
280 *p = SEP;
281 }
282 pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
283 buf, p-buf);
284 PyMem_Free(buf);
285 return pathobj;
286 }
287
288 enum zi_module_info {
289 MI_ERROR,
290 MI_NOT_FOUND,
291 MI_MODULE,
292 MI_PACKAGE
293 };
294
295 /* Does this path represent a directory?
296 on error, return < 0
297 if not a dir, return 0
298 if a dir, return 1
299 */
300 static int
check_is_directory(ZipImporter * self,PyObject * prefix,PyObject * path)301 check_is_directory(ZipImporter *self, PyObject* prefix, PyObject *path)
302 {
303 PyObject *dirpath;
304 int res;
305
306 /* See if this is a "directory". If so, it's eligible to be part
307 of a namespace package. We test by seeing if the name, with an
308 appended path separator, exists. */
309 dirpath = PyUnicode_FromFormat("%U%U%c", prefix, path, SEP);
310 if (dirpath == NULL)
311 return -1;
312 /* If dirpath is present in self->files, we have a directory. */
313 res = PyDict_Contains(self->files, dirpath);
314 Py_DECREF(dirpath);
315 return res;
316 }
317
318 /* Return some information about a module. */
319 static enum zi_module_info
get_module_info(ZipImporter * self,PyObject * fullname)320 get_module_info(ZipImporter *self, PyObject *fullname)
321 {
322 PyObject *subname;
323 PyObject *path, *fullpath, *item;
324 struct st_zip_searchorder *zso;
325
326 if (self->prefix == NULL) {
327 PyErr_SetString(PyExc_ValueError,
328 "zipimporter.__init__() wasn't called");
329 return MI_ERROR;
330 }
331
332 subname = get_subname(fullname);
333 if (subname == NULL)
334 return MI_ERROR;
335
336 path = make_filename(self->prefix, subname);
337 Py_DECREF(subname);
338 if (path == NULL)
339 return MI_ERROR;
340
341 for (zso = zip_searchorder; *zso->suffix; zso++) {
342 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
343 if (fullpath == NULL) {
344 Py_DECREF(path);
345 return MI_ERROR;
346 }
347 item = PyDict_GetItem(self->files, fullpath);
348 Py_DECREF(fullpath);
349 if (item != NULL) {
350 Py_DECREF(path);
351 if (zso->type & IS_PACKAGE)
352 return MI_PACKAGE;
353 else
354 return MI_MODULE;
355 }
356 }
357 Py_DECREF(path);
358 return MI_NOT_FOUND;
359 }
360
361 typedef enum {
362 FL_ERROR = -1, /* error */
363 FL_NOT_FOUND, /* no loader or namespace portions found */
364 FL_MODULE_FOUND, /* module/package found */
365 FL_NS_FOUND /* namespace portion found: */
366 /* *namespace_portion will point to the name */
367 } find_loader_result;
368
369 /* The guts of "find_loader" and "find_module".
370 */
371 static find_loader_result
find_loader(ZipImporter * self,PyObject * fullname,PyObject ** namespace_portion)372 find_loader(ZipImporter *self, PyObject *fullname, PyObject **namespace_portion)
373 {
374 enum zi_module_info mi;
375
376 *namespace_portion = NULL;
377
378 mi = get_module_info(self, fullname);
379 if (mi == MI_ERROR)
380 return FL_ERROR;
381 if (mi == MI_NOT_FOUND) {
382 /* Not a module or regular package. See if this is a directory, and
383 therefore possibly a portion of a namespace package. */
384 find_loader_result result = FL_NOT_FOUND;
385 PyObject *subname;
386 int is_dir;
387
388 /* We're only interested in the last path component of fullname;
389 earlier components are recorded in self->prefix. */
390 subname = get_subname(fullname);
391 if (subname == NULL) {
392 return FL_ERROR;
393 }
394
395 is_dir = check_is_directory(self, self->prefix, subname);
396 if (is_dir < 0)
397 result = FL_ERROR;
398 else if (is_dir) {
399 /* This is possibly a portion of a namespace
400 package. Return the string representing its path,
401 without a trailing separator. */
402 *namespace_portion = PyUnicode_FromFormat("%U%c%U%U",
403 self->archive, SEP,
404 self->prefix, subname);
405 if (*namespace_portion == NULL)
406 result = FL_ERROR;
407 else
408 result = FL_NS_FOUND;
409 }
410 Py_DECREF(subname);
411 return result;
412 }
413 /* This is a module or package. */
414 return FL_MODULE_FOUND;
415 }
416
417 /*[clinic input]
418 zipimport.zipimporter.find_module
419
420 fullname: unicode
421 path: object = None
422 /
423
424 Search for a module specified by 'fullname'.
425
426 'fullname' must be the fully qualified (dotted) module name. It returns the
427 zipimporter instance itself if the module was found, or None if it wasn't.
428 The optional 'path' argument is ignored -- it's there for compatibility
429 with the importer protocol.
430
431 [clinic start generated code]*/
432
433 static PyObject *
zipimport_zipimporter_find_module_impl(ZipImporter * self,PyObject * fullname,PyObject * path)434 zipimport_zipimporter_find_module_impl(ZipImporter *self, PyObject *fullname,
435 PyObject *path)
436 /*[clinic end generated code: output=506087f609466dc7 input=e3528520e075063f]*/
437 {
438 PyObject *namespace_portion = NULL;
439 PyObject *result = NULL;
440
441 switch (find_loader(self, fullname, &namespace_portion)) {
442 case FL_ERROR:
443 return NULL;
444 case FL_NS_FOUND:
445 /* A namespace portion is not allowed via find_module, so return None. */
446 Py_DECREF(namespace_portion);
447 /* FALL THROUGH */
448 case FL_NOT_FOUND:
449 result = Py_None;
450 break;
451 case FL_MODULE_FOUND:
452 result = (PyObject *)self;
453 break;
454 default:
455 PyErr_BadInternalCall();
456 return NULL;
457 }
458 Py_INCREF(result);
459 return result;
460 }
461
462
463 /*[clinic input]
464 zipimport.zipimporter.find_loader
465
466 fullname: unicode
467 path: object = None
468 /
469
470 Search for a module specified by 'fullname'.
471
472 'fullname' must be the fully qualified (dotted) module name. It returns the
473 zipimporter instance itself if the module was found, a string containing the
474 full path name if it's possibly a portion of a namespace package,
475 or None otherwise. The optional 'path' argument is ignored -- it's
476 there for compatibility with the importer protocol.
477
478 [clinic start generated code]*/
479
480 static PyObject *
zipimport_zipimporter_find_loader_impl(ZipImporter * self,PyObject * fullname,PyObject * path)481 zipimport_zipimporter_find_loader_impl(ZipImporter *self, PyObject *fullname,
482 PyObject *path)
483 /*[clinic end generated code: output=601599a43bc0f49a input=dc73f275b0d5be23]*/
484 {
485 PyObject *result = NULL;
486 PyObject *namespace_portion = NULL;
487
488 switch (find_loader(self, fullname, &namespace_portion)) {
489 case FL_ERROR:
490 return NULL;
491 case FL_NOT_FOUND: /* Not found, return (None, []) */
492 result = Py_BuildValue("O[]", Py_None);
493 break;
494 case FL_MODULE_FOUND: /* Return (self, []) */
495 result = Py_BuildValue("O[]", self);
496 break;
497 case FL_NS_FOUND: /* Return (None, [namespace_portion]) */
498 result = Py_BuildValue("O[O]", Py_None, namespace_portion);
499 Py_DECREF(namespace_portion);
500 return result;
501 default:
502 PyErr_BadInternalCall();
503 return NULL;
504 }
505 return result;
506 }
507
508 /*[clinic input]
509 zipimport.zipimporter.load_module
510
511 fullname: unicode
512 /
513
514 Load the module specified by 'fullname'.
515
516 'fullname' must be the fully qualified (dotted) module name. It returns the
517 imported module, or raises ZipImportError if it wasn't found.
518
519 [clinic start generated code]*/
520
521 static PyObject *
zipimport_zipimporter_load_module_impl(ZipImporter * self,PyObject * fullname)522 zipimport_zipimporter_load_module_impl(ZipImporter *self, PyObject *fullname)
523 /*[clinic end generated code: output=7303cebf88d47953 input=c236e2e8621f04ef]*/
524 {
525 PyObject *code = NULL, *mod, *dict;
526 PyObject *modpath = NULL;
527 int ispackage;
528
529 if (PyUnicode_READY(fullname) == -1)
530 return NULL;
531
532 code = get_module_code(self, fullname, &ispackage, &modpath);
533 if (code == NULL)
534 goto error;
535
536 mod = PyImport_AddModuleObject(fullname);
537 if (mod == NULL)
538 goto error;
539 dict = PyModule_GetDict(mod);
540
541 /* mod.__loader__ = self */
542 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
543 goto error;
544
545 if (ispackage) {
546 /* add __path__ to the module *before* the code gets
547 executed */
548 PyObject *pkgpath, *fullpath, *subname;
549 int err;
550
551 subname = get_subname(fullname);
552 if (subname == NULL)
553 goto error;
554
555 fullpath = PyUnicode_FromFormat("%U%c%U%U",
556 self->archive, SEP,
557 self->prefix, subname);
558 Py_DECREF(subname);
559 if (fullpath == NULL)
560 goto error;
561
562 pkgpath = Py_BuildValue("[N]", fullpath);
563 if (pkgpath == NULL)
564 goto error;
565 err = PyDict_SetItemString(dict, "__path__", pkgpath);
566 Py_DECREF(pkgpath);
567 if (err != 0)
568 goto error;
569 }
570 mod = PyImport_ExecCodeModuleObject(fullname, code, modpath, NULL);
571 Py_CLEAR(code);
572 if (mod == NULL)
573 goto error;
574
575 if (Py_VerboseFlag)
576 PySys_FormatStderr("import %U # loaded from Zip %U\n",
577 fullname, modpath);
578 Py_DECREF(modpath);
579 return mod;
580 error:
581 Py_XDECREF(code);
582 Py_XDECREF(modpath);
583 return NULL;
584 }
585
586 /*[clinic input]
587 zipimport.zipimporter.get_filename
588
589 fullname: unicode
590 /
591
592 Return the filename for the specified module.
593 [clinic start generated code]*/
594
595 static PyObject *
zipimport_zipimporter_get_filename_impl(ZipImporter * self,PyObject * fullname)596 zipimport_zipimporter_get_filename_impl(ZipImporter *self,
597 PyObject *fullname)
598 /*[clinic end generated code: output=c5b92b58bea86506 input=28d2eb57e4f25c8a]*/
599 {
600 PyObject *code, *modpath;
601 int ispackage;
602
603 /* Deciding the filename requires working out where the code
604 would come from if the module was actually loaded */
605 code = get_module_code(self, fullname, &ispackage, &modpath);
606 if (code == NULL)
607 return NULL;
608 Py_DECREF(code); /* Only need the path info */
609
610 return modpath;
611 }
612
613 /*[clinic input]
614 zipimport.zipimporter.is_package
615
616 fullname: unicode
617 /
618
619 Return True if the module specified by fullname is a package.
620
621 Raise ZipImportError if the module couldn't be found.
622
623 [clinic start generated code]*/
624
625 static PyObject *
zipimport_zipimporter_is_package_impl(ZipImporter * self,PyObject * fullname)626 zipimport_zipimporter_is_package_impl(ZipImporter *self, PyObject *fullname)
627 /*[clinic end generated code: output=c32958c2a5216ae6 input=a7ba752f64345062]*/
628 {
629 enum zi_module_info mi;
630
631 mi = get_module_info(self, fullname);
632 if (mi == MI_ERROR)
633 return NULL;
634 if (mi == MI_NOT_FOUND) {
635 PyErr_Format(ZipImportError, "can't find module %R", fullname);
636 return NULL;
637 }
638 return PyBool_FromLong(mi == MI_PACKAGE);
639 }
640
641
642 /*[clinic input]
643 zipimport.zipimporter.get_data
644
645 pathname as path: unicode
646 /
647
648 Return the data associated with 'pathname'.
649
650 Raise OSError if the file was not found.
651
652 [clinic start generated code]*/
653
654 static PyObject *
zipimport_zipimporter_get_data_impl(ZipImporter * self,PyObject * path)655 zipimport_zipimporter_get_data_impl(ZipImporter *self, PyObject *path)
656 /*[clinic end generated code: output=65dc506aaa268436 input=fa6428b74843c4ae]*/
657 {
658 PyObject *key;
659 PyObject *toc_entry;
660 Py_ssize_t path_start, path_len, len;
661
662 if (self->archive == NULL) {
663 PyErr_SetString(PyExc_ValueError,
664 "zipimporter.__init__() wasn't called");
665 return NULL;
666 }
667
668 #ifdef ALTSEP
669 path = _PyObject_CallMethodId((PyObject *)&PyUnicode_Type, &PyId_replace,
670 "OCC", path, ALTSEP, SEP);
671 if (!path)
672 return NULL;
673 #else
674 Py_INCREF(path);
675 #endif
676 if (PyUnicode_READY(path) == -1)
677 goto error;
678
679 path_len = PyUnicode_GET_LENGTH(path);
680
681 len = PyUnicode_GET_LENGTH(self->archive);
682 path_start = 0;
683 if (PyUnicode_Tailmatch(path, self->archive, 0, len, -1)
684 && PyUnicode_READ_CHAR(path, len) == SEP) {
685 path_start = len + 1;
686 }
687
688 key = PyUnicode_Substring(path, path_start, path_len);
689 if (key == NULL)
690 goto error;
691 toc_entry = PyDict_GetItem(self->files, key);
692 if (toc_entry == NULL) {
693 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, key);
694 Py_DECREF(key);
695 goto error;
696 }
697 Py_DECREF(key);
698 Py_DECREF(path);
699 return get_data(self->archive, toc_entry);
700 error:
701 Py_DECREF(path);
702 return NULL;
703 }
704
705 /*[clinic input]
706 zipimport.zipimporter.get_code
707
708 fullname: unicode
709 /
710
711 Return the code object for the specified module.
712
713 Raise ZipImportError if the module couldn't be found.
714
715 [clinic start generated code]*/
716
717 static PyObject *
zipimport_zipimporter_get_code_impl(ZipImporter * self,PyObject * fullname)718 zipimport_zipimporter_get_code_impl(ZipImporter *self, PyObject *fullname)
719 /*[clinic end generated code: output=b923c37fa99cbac4 input=2761412bc37f3549]*/
720 {
721 return get_module_code(self, fullname, NULL, NULL);
722 }
723
724 /*[clinic input]
725 zipimport.zipimporter.get_source
726
727 fullname: unicode
728 /
729
730 Return the source code for the specified module.
731
732 Raise ZipImportError if the module couldn't be found, return None if the
733 archive does contain the module, but has no source for it.
734
735 [clinic start generated code]*/
736
737 static PyObject *
zipimport_zipimporter_get_source_impl(ZipImporter * self,PyObject * fullname)738 zipimport_zipimporter_get_source_impl(ZipImporter *self, PyObject *fullname)
739 /*[clinic end generated code: output=bc059301b0c33729 input=4e4b186f2e690716]*/
740 {
741 PyObject *toc_entry;
742 PyObject *subname, *path, *fullpath;
743 enum zi_module_info mi;
744
745 mi = get_module_info(self, fullname);
746 if (mi == MI_ERROR)
747 return NULL;
748 if (mi == MI_NOT_FOUND) {
749 PyErr_Format(ZipImportError, "can't find module %R", fullname);
750 return NULL;
751 }
752
753 subname = get_subname(fullname);
754 if (subname == NULL)
755 return NULL;
756
757 path = make_filename(self->prefix, subname);
758 Py_DECREF(subname);
759 if (path == NULL)
760 return NULL;
761
762 if (mi == MI_PACKAGE)
763 fullpath = PyUnicode_FromFormat("%U%c__init__.py", path, SEP);
764 else
765 fullpath = PyUnicode_FromFormat("%U.py", path);
766 Py_DECREF(path);
767 if (fullpath == NULL)
768 return NULL;
769
770 toc_entry = PyDict_GetItem(self->files, fullpath);
771 Py_DECREF(fullpath);
772 if (toc_entry != NULL) {
773 PyObject *res, *bytes;
774 bytes = get_data(self->archive, toc_entry);
775 if (bytes == NULL)
776 return NULL;
777 res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
778 PyBytes_GET_SIZE(bytes));
779 Py_DECREF(bytes);
780 return res;
781 }
782
783 /* we have the module, but no source */
784 Py_RETURN_NONE;
785 }
786
787 /*[clinic input]
788 zipimport.zipimporter.get_resource_reader
789
790 fullname: unicode
791 /
792
793 Return the ResourceReader for a package in a zip file.
794
795 If 'fullname' is a package within the zip file, return the 'ResourceReader'
796 object for the package. Otherwise return None.
797
798 [clinic start generated code]*/
799
800 static PyObject *
zipimport_zipimporter_get_resource_reader_impl(ZipImporter * self,PyObject * fullname)801 zipimport_zipimporter_get_resource_reader_impl(ZipImporter *self,
802 PyObject *fullname)
803 /*[clinic end generated code: output=5e367d431f830726 input=bfab94d736e99151]*/
804 {
805 PyObject *module = PyImport_ImportModule("importlib.resources");
806 if (module == NULL) {
807 return NULL;
808 }
809 PyObject *retval = PyObject_CallMethod(
810 module, "_zipimport_get_resource_reader",
811 "OO", (PyObject *)self, fullname);
812 Py_DECREF(module);
813 return retval;
814 }
815
816
817 static PyMethodDef zipimporter_methods[] = {
818 ZIPIMPORT_ZIPIMPORTER_FIND_MODULE_METHODDEF
819 ZIPIMPORT_ZIPIMPORTER_FIND_LOADER_METHODDEF
820 ZIPIMPORT_ZIPIMPORTER_LOAD_MODULE_METHODDEF
821 ZIPIMPORT_ZIPIMPORTER_GET_FILENAME_METHODDEF
822 ZIPIMPORT_ZIPIMPORTER_IS_PACKAGE_METHODDEF
823 ZIPIMPORT_ZIPIMPORTER_GET_DATA_METHODDEF
824 ZIPIMPORT_ZIPIMPORTER_GET_CODE_METHODDEF
825 ZIPIMPORT_ZIPIMPORTER_GET_SOURCE_METHODDEF
826 ZIPIMPORT_ZIPIMPORTER_GET_RESOURCE_READER_METHODDEF
827 {NULL, NULL} /* sentinel */
828 };
829
830 static PyMemberDef zipimporter_members[] = {
831 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
832 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
833 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
834 {NULL}
835 };
836
837 #define DEFERRED_ADDRESS(ADDR) 0
838
839 static PyTypeObject ZipImporter_Type = {
840 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
841 "zipimport.zipimporter",
842 sizeof(ZipImporter),
843 0, /* tp_itemsize */
844 (destructor)zipimporter_dealloc, /* tp_dealloc */
845 0, /* tp_print */
846 0, /* tp_getattr */
847 0, /* tp_setattr */
848 0, /* tp_reserved */
849 (reprfunc)zipimporter_repr, /* tp_repr */
850 0, /* tp_as_number */
851 0, /* tp_as_sequence */
852 0, /* tp_as_mapping */
853 0, /* tp_hash */
854 0, /* tp_call */
855 0, /* tp_str */
856 PyObject_GenericGetAttr, /* tp_getattro */
857 0, /* tp_setattro */
858 0, /* tp_as_buffer */
859 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
860 Py_TPFLAGS_HAVE_GC, /* tp_flags */
861 zipimport_zipimporter___init____doc__, /* tp_doc */
862 zipimporter_traverse, /* tp_traverse */
863 0, /* tp_clear */
864 0, /* tp_richcompare */
865 0, /* tp_weaklistoffset */
866 0, /* tp_iter */
867 0, /* tp_iternext */
868 zipimporter_methods, /* tp_methods */
869 zipimporter_members, /* tp_members */
870 0, /* tp_getset */
871 0, /* tp_base */
872 0, /* tp_dict */
873 0, /* tp_descr_get */
874 0, /* tp_descr_set */
875 0, /* tp_dictoffset */
876 (initproc)zipimport_zipimporter___init__, /* tp_init */
877 PyType_GenericAlloc, /* tp_alloc */
878 PyType_GenericNew, /* tp_new */
879 PyObject_GC_Del, /* tp_free */
880 };
881
882
883 /* implementation */
884
885 /* Given a buffer, return the unsigned int that is represented by the first
886 4 bytes, encoded as little endian. This partially reimplements
887 marshal.c:r_long() */
888 static unsigned int
get_uint32(const unsigned char * buf)889 get_uint32(const unsigned char *buf)
890 {
891 unsigned int x;
892 x = buf[0];
893 x |= (unsigned int)buf[1] << 8;
894 x |= (unsigned int)buf[2] << 16;
895 x |= (unsigned int)buf[3] << 24;
896 return x;
897 }
898
899 /* Given a buffer, return the unsigned int that is represented by the first
900 2 bytes, encoded as little endian. This partially reimplements
901 marshal.c:r_short() */
902 static unsigned short
get_uint16(const unsigned char * buf)903 get_uint16(const unsigned char *buf)
904 {
905 unsigned short x;
906 x = buf[0];
907 x |= (unsigned short)buf[1] << 8;
908 return x;
909 }
910
911 static void
set_file_error(PyObject * archive,int eof)912 set_file_error(PyObject *archive, int eof)
913 {
914 if (eof) {
915 PyErr_SetString(PyExc_EOFError, "EOF read where not expected");
916 }
917 else {
918 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, archive);
919 }
920 }
921
922 /*
923 read_directory(archive) -> files dict (new reference)
924
925 Given a path to a Zip archive, build a dict, mapping file names
926 (local to the archive, using SEP as a separator) to toc entries.
927
928 A toc_entry is a tuple:
929
930 (__file__, # value to use for __file__, available for all files,
931 # encoded to the filesystem encoding
932 compress, # compression kind; 0 for uncompressed
933 data_size, # size of compressed data on disk
934 file_size, # size of decompressed data
935 file_offset, # offset of file header from start of archive
936 time, # mod time of file (in dos format)
937 date, # mod data of file (in dos format)
938 crc, # crc checksum of the data
939 )
940
941 Directories can be recognized by the trailing SEP in the name,
942 data_size and file_offset are 0.
943 */
944 static PyObject *
read_directory(PyObject * archive)945 read_directory(PyObject *archive)
946 {
947 PyObject *files = NULL;
948 FILE *fp;
949 unsigned short flags, compress, time, date, name_size;
950 unsigned int crc, data_size, file_size, header_size, header_offset;
951 unsigned long file_offset, header_position;
952 unsigned long arc_offset; /* Absolute offset to start of the zip-archive. */
953 unsigned int count, i;
954 unsigned char buffer[46];
955 char name[MAXPATHLEN + 5];
956 PyObject *nameobj = NULL;
957 PyObject *path;
958 const char *charset;
959 int bootstrap;
960 const char *errmsg = NULL;
961
962 fp = _Py_fopen_obj(archive, "rb");
963 if (fp == NULL) {
964 if (PyErr_ExceptionMatches(PyExc_OSError)) {
965 _PyErr_FormatFromCause(ZipImportError,
966 "can't open Zip file: %R", archive);
967 }
968 return NULL;
969 }
970
971 if (fseek(fp, -22, SEEK_END) == -1) {
972 goto file_error;
973 }
974 header_position = (unsigned long)ftell(fp);
975 if (header_position == (unsigned long)-1) {
976 goto file_error;
977 }
978 assert(header_position <= (unsigned long)LONG_MAX);
979 if (fread(buffer, 1, 22, fp) != 22) {
980 goto file_error;
981 }
982 if (get_uint32(buffer) != 0x06054B50u) {
983 /* Bad: End of Central Dir signature */
984 errmsg = "not a Zip file";
985 goto invalid_header;
986 }
987
988 header_size = get_uint32(buffer + 12);
989 header_offset = get_uint32(buffer + 16);
990 if (header_position < header_size) {
991 errmsg = "bad central directory size";
992 goto invalid_header;
993 }
994 if (header_position < header_offset) {
995 errmsg = "bad central directory offset";
996 goto invalid_header;
997 }
998 if (header_position - header_size < header_offset) {
999 errmsg = "bad central directory size or offset";
1000 goto invalid_header;
1001 }
1002 header_position -= header_size;
1003 arc_offset = header_position - header_offset;
1004
1005 files = PyDict_New();
1006 if (files == NULL) {
1007 goto error;
1008 }
1009 /* Start of Central Directory */
1010 count = 0;
1011 if (fseek(fp, (long)header_position, 0) == -1) {
1012 goto file_error;
1013 }
1014 for (;;) {
1015 PyObject *t;
1016 size_t n;
1017 int err;
1018
1019 n = fread(buffer, 1, 46, fp);
1020 if (n < 4) {
1021 goto eof_error;
1022 }
1023 /* Start of file header */
1024 if (get_uint32(buffer) != 0x02014B50u) {
1025 break; /* Bad: Central Dir File Header */
1026 }
1027 if (n != 46) {
1028 goto eof_error;
1029 }
1030 flags = get_uint16(buffer + 8);
1031 compress = get_uint16(buffer + 10);
1032 time = get_uint16(buffer + 12);
1033 date = get_uint16(buffer + 14);
1034 crc = get_uint32(buffer + 16);
1035 data_size = get_uint32(buffer + 20);
1036 file_size = get_uint32(buffer + 24);
1037 name_size = get_uint16(buffer + 28);
1038 header_size = (unsigned int)name_size +
1039 get_uint16(buffer + 30) /* extra field */ +
1040 get_uint16(buffer + 32) /* comment */;
1041
1042 file_offset = get_uint32(buffer + 42);
1043 if (file_offset > header_offset) {
1044 errmsg = "bad local header offset";
1045 goto invalid_header;
1046 }
1047 file_offset += arc_offset;
1048
1049 if (name_size > MAXPATHLEN) {
1050 name_size = MAXPATHLEN;
1051 }
1052 if (fread(name, 1, name_size, fp) != name_size) {
1053 goto file_error;
1054 }
1055 name[name_size] = '\0'; /* Add terminating null byte */
1056 #if SEP != '/'
1057 for (i = 0; i < name_size; i++) {
1058 if (name[i] == '/') {
1059 name[i] = SEP;
1060 }
1061 }
1062 #endif
1063 /* Skip the rest of the header.
1064 * On Windows, calling fseek to skip over the fields we don't use is
1065 * slower than reading the data because fseek flushes stdio's
1066 * internal buffers. See issue #8745. */
1067 assert(header_size <= 3*0xFFFFu);
1068 for (i = name_size; i < header_size; i++) {
1069 if (getc(fp) == EOF) {
1070 goto file_error;
1071 }
1072 }
1073
1074 bootstrap = 0;
1075 if (flags & 0x0800) {
1076 charset = "utf-8";
1077 }
1078 else if (!PyThreadState_GET()->interp->codecs_initialized) {
1079 /* During bootstrap, we may need to load the encodings
1080 package from a ZIP file. But the cp437 encoding is implemented
1081 in Python in the encodings package.
1082
1083 Break out of this dependency by assuming that the path to
1084 the encodings module is ASCII-only. */
1085 charset = "ascii";
1086 bootstrap = 1;
1087 }
1088 else {
1089 charset = "cp437";
1090 }
1091 nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
1092 if (nameobj == NULL) {
1093 if (bootstrap) {
1094 PyErr_Format(PyExc_NotImplementedError,
1095 "bootstrap issue: python%i%i.zip contains non-ASCII "
1096 "filenames without the unicode flag",
1097 PY_MAJOR_VERSION, PY_MINOR_VERSION);
1098 }
1099 goto error;
1100 }
1101 if (PyUnicode_READY(nameobj) == -1) {
1102 goto error;
1103 }
1104 path = PyUnicode_FromFormat("%U%c%U", archive, SEP, nameobj);
1105 if (path == NULL) {
1106 goto error;
1107 }
1108 t = Py_BuildValue("NHIIkHHI", path, compress, data_size,
1109 file_size, file_offset, time, date, crc);
1110 if (t == NULL) {
1111 goto error;
1112 }
1113 err = PyDict_SetItem(files, nameobj, t);
1114 Py_CLEAR(nameobj);
1115 Py_DECREF(t);
1116 if (err != 0) {
1117 goto error;
1118 }
1119 count++;
1120 }
1121 fclose(fp);
1122 if (Py_VerboseFlag) {
1123 PySys_FormatStderr("# zipimport: found %u names in %R\n",
1124 count, archive);
1125 }
1126 return files;
1127
1128 eof_error:
1129 set_file_error(archive, !ferror(fp));
1130 goto error;
1131
1132 file_error:
1133 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1134 goto error;
1135
1136 invalid_header:
1137 assert(errmsg != NULL);
1138 PyErr_Format(ZipImportError, "%s: %R", errmsg, archive);
1139 goto error;
1140
1141 error:
1142 fclose(fp);
1143 Py_XDECREF(files);
1144 Py_XDECREF(nameobj);
1145 return NULL;
1146 }
1147
1148 /* Return the zlib.decompress function object, or NULL if zlib couldn't
1149 be imported. The function is cached when found, so subsequent calls
1150 don't import zlib again. */
1151 static PyObject *
get_decompress_func(void)1152 get_decompress_func(void)
1153 {
1154 static int importing_zlib = 0;
1155 PyObject *zlib;
1156 PyObject *decompress;
1157 _Py_IDENTIFIER(decompress);
1158
1159 if (importing_zlib != 0)
1160 /* Someone has a zlib.pyc in their Zip file;
1161 let's avoid a stack overflow. */
1162 return NULL;
1163 importing_zlib = 1;
1164 zlib = PyImport_ImportModuleNoBlock("zlib");
1165 importing_zlib = 0;
1166 if (zlib != NULL) {
1167 decompress = _PyObject_GetAttrId(zlib,
1168 &PyId_decompress);
1169 Py_DECREF(zlib);
1170 }
1171 else {
1172 PyErr_Clear();
1173 decompress = NULL;
1174 }
1175 if (Py_VerboseFlag)
1176 PySys_WriteStderr("# zipimport: zlib %s\n",
1177 zlib != NULL ? "available": "UNAVAILABLE");
1178 return decompress;
1179 }
1180
1181 /* Given a path to a Zip file and a toc_entry, return the (uncompressed)
1182 data as a new reference. */
1183 static PyObject *
get_data(PyObject * archive,PyObject * toc_entry)1184 get_data(PyObject *archive, PyObject *toc_entry)
1185 {
1186 PyObject *raw_data = NULL, *data, *decompress;
1187 char *buf;
1188 FILE *fp;
1189 PyObject *datapath;
1190 unsigned short compress, time, date;
1191 unsigned int crc;
1192 Py_ssize_t data_size, file_size, bytes_size;
1193 long file_offset, header_size;
1194 unsigned char buffer[30];
1195 const char *errmsg = NULL;
1196
1197 if (!PyArg_ParseTuple(toc_entry, "OHnnlHHI", &datapath, &compress,
1198 &data_size, &file_size, &file_offset, &time,
1199 &date, &crc)) {
1200 return NULL;
1201 }
1202 if (data_size < 0) {
1203 PyErr_Format(ZipImportError, "negative data size");
1204 return NULL;
1205 }
1206
1207 fp = _Py_fopen_obj(archive, "rb");
1208 if (!fp) {
1209 return NULL;
1210 }
1211 /* Check to make sure the local file header is correct */
1212 if (fseek(fp, file_offset, 0) == -1) {
1213 goto file_error;
1214 }
1215 if (fread(buffer, 1, 30, fp) != 30) {
1216 goto eof_error;
1217 }
1218 if (get_uint32(buffer) != 0x04034B50u) {
1219 /* Bad: Local File Header */
1220 errmsg = "bad local file header";
1221 goto invalid_header;
1222 }
1223
1224 header_size = (unsigned int)30 +
1225 get_uint16(buffer + 26) /* file name */ +
1226 get_uint16(buffer + 28) /* extra field */;
1227 if (file_offset > LONG_MAX - header_size) {
1228 errmsg = "bad local file header size";
1229 goto invalid_header;
1230 }
1231 file_offset += header_size; /* Start of file data */
1232
1233 if (data_size > LONG_MAX - 1) {
1234 fclose(fp);
1235 PyErr_NoMemory();
1236 return NULL;
1237 }
1238 bytes_size = compress == 0 ? data_size : data_size + 1;
1239 if (bytes_size == 0) {
1240 bytes_size++;
1241 }
1242 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
1243 if (raw_data == NULL) {
1244 goto error;
1245 }
1246 buf = PyBytes_AsString(raw_data);
1247
1248 if (fseek(fp, file_offset, 0) == -1) {
1249 goto file_error;
1250 }
1251 if (fread(buf, 1, data_size, fp) != (size_t)data_size) {
1252 PyErr_SetString(PyExc_OSError,
1253 "zipimport: can't read data");
1254 goto error;
1255 }
1256
1257 fclose(fp);
1258 fp = NULL;
1259
1260 if (compress != 0) {
1261 buf[data_size] = 'Z'; /* saw this in zipfile.py */
1262 data_size++;
1263 }
1264 buf[data_size] = '\0';
1265
1266 if (compress == 0) { /* data is not compressed */
1267 data = PyBytes_FromStringAndSize(buf, data_size);
1268 Py_DECREF(raw_data);
1269 return data;
1270 }
1271
1272 /* Decompress with zlib */
1273 decompress = get_decompress_func();
1274 if (decompress == NULL) {
1275 PyErr_SetString(ZipImportError,
1276 "can't decompress data; "
1277 "zlib not available");
1278 goto error;
1279 }
1280 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
1281 Py_DECREF(decompress);
1282 Py_DECREF(raw_data);
1283 if (data != NULL && !PyBytes_Check(data)) {
1284 PyErr_Format(PyExc_TypeError,
1285 "zlib.decompress() must return a bytes object, not "
1286 "%.200s",
1287 Py_TYPE(data)->tp_name);
1288 Py_DECREF(data);
1289 return NULL;
1290 }
1291 return data;
1292
1293 eof_error:
1294 set_file_error(archive, !ferror(fp));
1295 goto error;
1296
1297 file_error:
1298 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1299 goto error;
1300
1301 invalid_header:
1302 assert(errmsg != NULL);
1303 PyErr_Format(ZipImportError, "%s: %R", errmsg, archive);
1304 goto error;
1305
1306 error:
1307 if (fp != NULL) {
1308 fclose(fp);
1309 }
1310 Py_XDECREF(raw_data);
1311 return NULL;
1312 }
1313
1314 /* Lenient date/time comparison function. The precision of the mtime
1315 in the archive is lower than the mtime stored in a .pyc: we
1316 must allow a difference of at most one second. */
1317 static int
eq_mtime(time_t t1,time_t t2)1318 eq_mtime(time_t t1, time_t t2)
1319 {
1320 time_t d = t1 - t2;
1321 if (d < 0)
1322 d = -d;
1323 /* dostime only stores even seconds, so be lenient */
1324 return d <= 1;
1325 }
1326
1327 /* Given the contents of a .pyc file in a buffer, unmarshal the data
1328 and return the code object. Return None if it the magic word doesn't
1329 match (we do this instead of raising an exception as we fall back
1330 to .py if available and we don't want to mask other errors).
1331 Returns a new reference. */
1332 static PyObject *
unmarshal_code(PyObject * pathname,PyObject * data,time_t mtime)1333 unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
1334 {
1335 PyObject *code;
1336 unsigned char *buf = (unsigned char *)PyBytes_AsString(data);
1337 Py_ssize_t size = PyBytes_Size(data);
1338
1339 if (size < 16) {
1340 PyErr_SetString(ZipImportError,
1341 "bad pyc data");
1342 return NULL;
1343 }
1344
1345 if (get_uint32(buf) != (unsigned int)PyImport_GetMagicNumber()) {
1346 if (Py_VerboseFlag) {
1347 PySys_FormatStderr("# %R has bad magic\n",
1348 pathname);
1349 }
1350 Py_RETURN_NONE; /* signal caller to try alternative */
1351 }
1352
1353 uint32_t flags = get_uint32(buf + 4);
1354 if (flags != 0) {
1355 // Hash-based pyc. We currently refuse to handle checked hash-based
1356 // pycs. We could validate hash-based pycs against the source, but it
1357 // seems likely that most people putting hash-based pycs in a zipfile
1358 // will use unchecked ones.
1359 if (strcmp(_Py_CheckHashBasedPycsMode, "never") &&
1360 (flags != 0x1 || !strcmp(_Py_CheckHashBasedPycsMode, "always")))
1361 Py_RETURN_NONE;
1362 } else if ((mtime != 0 && !eq_mtime(get_uint32(buf + 8), mtime))) {
1363 if (Py_VerboseFlag) {
1364 PySys_FormatStderr("# %R has bad mtime\n",
1365 pathname);
1366 }
1367 Py_RETURN_NONE; /* signal caller to try alternative */
1368 }
1369
1370 /* XXX the pyc's size field is ignored; timestamp collisions are probably
1371 unimportant with zip files. */
1372 code = PyMarshal_ReadObjectFromString((char *)buf + 16, size - 16);
1373 if (code == NULL) {
1374 return NULL;
1375 }
1376 if (!PyCode_Check(code)) {
1377 Py_DECREF(code);
1378 PyErr_Format(PyExc_TypeError,
1379 "compiled module %R is not a code object",
1380 pathname);
1381 return NULL;
1382 }
1383 return code;
1384 }
1385
1386 /* Replace any occurrences of "\r\n?" in the input string with "\n".
1387 This converts DOS and Mac line endings to Unix line endings.
1388 Also append a trailing "\n" to be compatible with
1389 PyParser_SimpleParseFile(). Returns a new reference. */
1390 static PyObject *
normalize_line_endings(PyObject * source)1391 normalize_line_endings(PyObject *source)
1392 {
1393 char *buf, *q, *p;
1394 PyObject *fixed_source;
1395 int len = 0;
1396
1397 p = PyBytes_AsString(source);
1398 if (p == NULL) {
1399 return PyBytes_FromStringAndSize("\n\0", 2);
1400 }
1401
1402 /* one char extra for trailing \n and one for terminating \0 */
1403 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1404 if (buf == NULL) {
1405 PyErr_SetString(PyExc_MemoryError,
1406 "zipimport: no memory to allocate "
1407 "source buffer");
1408 return NULL;
1409 }
1410 /* replace "\r\n?" by "\n" */
1411 for (q = buf; *p != '\0'; p++) {
1412 if (*p == '\r') {
1413 *q++ = '\n';
1414 if (*(p + 1) == '\n')
1415 p++;
1416 }
1417 else
1418 *q++ = *p;
1419 len++;
1420 }
1421 *q++ = '\n'; /* add trailing \n */
1422 *q = '\0';
1423 fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1424 PyMem_Free(buf);
1425 return fixed_source;
1426 }
1427
1428 /* Given a string buffer containing Python source code, compile it
1429 and return a code object as a new reference. */
1430 static PyObject *
compile_source(PyObject * pathname,PyObject * source)1431 compile_source(PyObject *pathname, PyObject *source)
1432 {
1433 PyObject *code, *fixed_source;
1434
1435 fixed_source = normalize_line_endings(source);
1436 if (fixed_source == NULL) {
1437 return NULL;
1438 }
1439
1440 code = Py_CompileStringObject(PyBytes_AsString(fixed_source),
1441 pathname, Py_file_input, NULL, -1);
1442
1443 Py_DECREF(fixed_source);
1444 return code;
1445 }
1446
1447 /* Convert the date/time values found in the Zip archive to a value
1448 that's compatible with the time stamp stored in .pyc files. */
1449 static time_t
parse_dostime(int dostime,int dosdate)1450 parse_dostime(int dostime, int dosdate)
1451 {
1452 struct tm stm;
1453
1454 memset((void *) &stm, '\0', sizeof(stm));
1455
1456 stm.tm_sec = (dostime & 0x1f) * 2;
1457 stm.tm_min = (dostime >> 5) & 0x3f;
1458 stm.tm_hour = (dostime >> 11) & 0x1f;
1459 stm.tm_mday = dosdate & 0x1f;
1460 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1461 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1462 stm.tm_isdst = -1; /* wday/yday is ignored */
1463
1464 return mktime(&stm);
1465 }
1466
1467 /* Given a path to a .pyc file in the archive, return the
1468 modification time of the matching .py file, or 0 if no source
1469 is available. */
1470 static time_t
get_mtime_of_source(ZipImporter * self,PyObject * path)1471 get_mtime_of_source(ZipImporter *self, PyObject *path)
1472 {
1473 PyObject *toc_entry, *stripped;
1474 time_t mtime;
1475
1476 /* strip 'c' from *.pyc */
1477 if (PyUnicode_READY(path) == -1)
1478 return (time_t)-1;
1479 stripped = PyUnicode_FromKindAndData(PyUnicode_KIND(path),
1480 PyUnicode_DATA(path),
1481 PyUnicode_GET_LENGTH(path) - 1);
1482 if (stripped == NULL)
1483 return (time_t)-1;
1484
1485 toc_entry = PyDict_GetItem(self->files, stripped);
1486 Py_DECREF(stripped);
1487 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1488 PyTuple_Size(toc_entry) == 8) {
1489 /* fetch the time stamp of the .py file for comparison
1490 with an embedded pyc time stamp */
1491 int time, date;
1492 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1493 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1494 mtime = parse_dostime(time, date);
1495 } else
1496 mtime = 0;
1497 return mtime;
1498 }
1499
1500 /* Return the code object for the module named by 'fullname' from the
1501 Zip archive as a new reference. */
1502 static PyObject *
get_code_from_data(ZipImporter * self,int ispackage,int isbytecode,time_t mtime,PyObject * toc_entry)1503 get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1504 time_t mtime, PyObject *toc_entry)
1505 {
1506 PyObject *data, *modpath, *code;
1507
1508 data = get_data(self->archive, toc_entry);
1509 if (data == NULL)
1510 return NULL;
1511
1512 modpath = PyTuple_GetItem(toc_entry, 0);
1513 if (isbytecode)
1514 code = unmarshal_code(modpath, data, mtime);
1515 else
1516 code = compile_source(modpath, data);
1517 Py_DECREF(data);
1518 return code;
1519 }
1520
1521 /* Get the code object associated with the module specified by
1522 'fullname'. */
1523 static PyObject *
get_module_code(ZipImporter * self,PyObject * fullname,int * p_ispackage,PyObject ** p_modpath)1524 get_module_code(ZipImporter *self, PyObject *fullname,
1525 int *p_ispackage, PyObject **p_modpath)
1526 {
1527 PyObject *code = NULL, *toc_entry, *subname;
1528 PyObject *path, *fullpath = NULL;
1529 struct st_zip_searchorder *zso;
1530
1531 if (self->prefix == NULL) {
1532 PyErr_SetString(PyExc_ValueError,
1533 "zipimporter.__init__() wasn't called");
1534 return NULL;
1535 }
1536
1537 subname = get_subname(fullname);
1538 if (subname == NULL)
1539 return NULL;
1540
1541 path = make_filename(self->prefix, subname);
1542 Py_DECREF(subname);
1543 if (path == NULL)
1544 return NULL;
1545
1546 for (zso = zip_searchorder; *zso->suffix; zso++) {
1547 code = NULL;
1548
1549 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
1550 if (fullpath == NULL)
1551 goto exit;
1552
1553 if (Py_VerboseFlag > 1)
1554 PySys_FormatStderr("# trying %U%c%U\n",
1555 self->archive, (int)SEP, fullpath);
1556 toc_entry = PyDict_GetItem(self->files, fullpath);
1557 if (toc_entry != NULL) {
1558 time_t mtime = 0;
1559 int ispackage = zso->type & IS_PACKAGE;
1560 int isbytecode = zso->type & IS_BYTECODE;
1561
1562 if (isbytecode) {
1563 mtime = get_mtime_of_source(self, fullpath);
1564 if (mtime == (time_t)-1 && PyErr_Occurred()) {
1565 goto exit;
1566 }
1567 }
1568 Py_CLEAR(fullpath);
1569 if (p_ispackage != NULL)
1570 *p_ispackage = ispackage;
1571 code = get_code_from_data(self, ispackage,
1572 isbytecode, mtime,
1573 toc_entry);
1574 if (code == Py_None) {
1575 /* bad magic number or non-matching mtime
1576 in byte code, try next */
1577 Py_DECREF(code);
1578 continue;
1579 }
1580 if (code != NULL && p_modpath != NULL) {
1581 *p_modpath = PyTuple_GetItem(toc_entry, 0);
1582 Py_INCREF(*p_modpath);
1583 }
1584 goto exit;
1585 }
1586 else
1587 Py_CLEAR(fullpath);
1588 }
1589 PyErr_Format(ZipImportError, "can't find module %R", fullname);
1590 exit:
1591 Py_DECREF(path);
1592 Py_XDECREF(fullpath);
1593 return code;
1594 }
1595
1596
1597 /* Module init */
1598
1599 PyDoc_STRVAR(zipimport_doc,
1600 "zipimport provides support for importing Python modules from Zip archives.\n\
1601 \n\
1602 This module exports three objects:\n\
1603 - zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1604 - ZipImportError: exception raised by zipimporter objects. It's a\n\
1605 subclass of ImportError, so it can be caught as ImportError, too.\n\
1606 - _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1607 info dicts, as used in zipimporter._files.\n\
1608 \n\
1609 It is usually not needed to use the zipimport module explicitly; it is\n\
1610 used by the builtin import mechanism for sys.path items that are paths\n\
1611 to Zip archives.");
1612
1613 static struct PyModuleDef zipimportmodule = {
1614 PyModuleDef_HEAD_INIT,
1615 "zipimport",
1616 zipimport_doc,
1617 -1,
1618 NULL,
1619 NULL,
1620 NULL,
1621 NULL,
1622 NULL
1623 };
1624
1625 PyMODINIT_FUNC
PyInit_zipimport(void)1626 PyInit_zipimport(void)
1627 {
1628 PyObject *mod;
1629
1630 if (PyType_Ready(&ZipImporter_Type) < 0)
1631 return NULL;
1632
1633 /* Correct directory separator */
1634 zip_searchorder[0].suffix[0] = SEP;
1635 zip_searchorder[1].suffix[0] = SEP;
1636
1637 mod = PyModule_Create(&zipimportmodule);
1638 if (mod == NULL)
1639 return NULL;
1640
1641 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1642 PyExc_ImportError, NULL);
1643 if (ZipImportError == NULL)
1644 return NULL;
1645
1646 Py_INCREF(ZipImportError);
1647 if (PyModule_AddObject(mod, "ZipImportError",
1648 ZipImportError) < 0)
1649 return NULL;
1650
1651 Py_INCREF(&ZipImporter_Type);
1652 if (PyModule_AddObject(mod, "zipimporter",
1653 (PyObject *)&ZipImporter_Type) < 0)
1654 return NULL;
1655
1656 zip_directory_cache = PyDict_New();
1657 if (zip_directory_cache == NULL)
1658 return NULL;
1659 Py_INCREF(zip_directory_cache);
1660 if (PyModule_AddObject(mod, "_zip_directory_cache",
1661 zip_directory_cache) < 0)
1662 return NULL;
1663 return mod;
1664 }
1665