• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include "internal/import.h"
3 #include "internal/pystate.h"
4 #include "structmember.h"
5 #include "osdefs.h"
6 #include "marshal.h"
7 #include <time.h>
8 
9 
10 #define IS_SOURCE   0x0
11 #define IS_BYTECODE 0x1
12 #define IS_PACKAGE  0x2
13 
14 struct st_zip_searchorder {
15     char suffix[14];
16     int type;
17 };
18 
19 #ifdef ALTSEP
20 _Py_IDENTIFIER(replace);
21 #endif
22 
23 /* zip_searchorder defines how we search for a module in the Zip
24    archive: we first search for a package __init__, then for
25    non-package .pyc, and .py entries. The .pyc entries
26    are swapped by initzipimport() if we run in optimized mode. Also,
27    '/' is replaced by SEP there. */
28 static struct st_zip_searchorder zip_searchorder[] = {
29     {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
30     {"/__init__.py", IS_PACKAGE | IS_SOURCE},
31     {".pyc", IS_BYTECODE},
32     {".py", IS_SOURCE},
33     {"", 0}
34 };
35 
36 /* zipimporter object definition and support */
37 
38 typedef struct _zipimporter ZipImporter;
39 
40 struct _zipimporter {
41     PyObject_HEAD
42     PyObject *archive;  /* pathname of the Zip archive,
43                            decoded from the filesystem encoding */
44     PyObject *prefix;   /* file prefix: "a/sub/directory/",
45                            encoded to the filesystem encoding */
46     PyObject *files;    /* dict with file info {path: toc_entry} */
47 };
48 
49 static PyObject *ZipImportError;
50 /* read_directory() cache */
51 static PyObject *zip_directory_cache = NULL;
52 
53 /* forward decls */
54 static PyObject *read_directory(PyObject *archive);
55 static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
56 static PyObject *get_module_code(ZipImporter *self, PyObject *fullname,
57                                  int *p_ispackage, PyObject **p_modpath);
58 
59 static PyTypeObject ZipImporter_Type;
60 
61 #define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
62 
63 /*[clinic input]
64 module zipimport
65 class zipimport.zipimporter "ZipImporter *" "&ZipImporter_Type"
66 [clinic start generated code]*/
67 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=9db8b61557d911e7]*/
68 #include "clinic/zipimport.c.h"
69 
70 
71 /* zipimporter.__init__
72    Split the "subdirectory" from the Zip archive path, lookup a matching
73    entry in sys.path_importer_cache, fetch the file directory from there
74    if found, or else read it from the archive. */
75 
76 /*[clinic input]
77 zipimport.zipimporter.__init__
78 
79     archivepath as path: object(converter="PyUnicode_FSDecoder")
80         A path-like object to a zipfile, or to a specific path inside
81         a zipfile.
82     /
83 
84 Create a new zipimporter instance.
85 
86 'archivepath' must be a path-like object to a zipfile, or to a specific path
87 inside a zipfile. For example, it can be '/tmp/myimport.zip', or
88 '/tmp/myimport.zip/mydirectory', if mydirectory is a valid directory inside
89 the archive.
90 
91 'ZipImportError' is raised if 'archivepath' doesn't point to a valid Zip
92 archive.
93 
94 The 'archive' attribute of the zipimporter object contains the name of the
95 zipfile targeted.
96 
97 [clinic start generated code]*/
98 
99 static int
zipimport_zipimporter___init___impl(ZipImporter * self,PyObject * path)100 zipimport_zipimporter___init___impl(ZipImporter *self, PyObject *path)
101 /*[clinic end generated code: output=141558fefdb46dc8 input=92b9ebeed1f6a704]*/
102 {
103     PyObject *files, *tmp;
104     PyObject *filename = NULL;
105     Py_ssize_t len, flen;
106 
107     if (PyUnicode_READY(path) == -1)
108         return -1;
109 
110     len = PyUnicode_GET_LENGTH(path);
111     if (len == 0) {
112         PyErr_SetString(ZipImportError, "archive path is empty");
113         goto error;
114     }
115 
116 #ifdef ALTSEP
117     tmp = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
118     if (!tmp)
119         goto error;
120     Py_DECREF(path);
121     path = tmp;
122 #endif
123 
124     filename = path;
125     Py_INCREF(filename);
126     flen = len;
127     for (;;) {
128         struct stat statbuf;
129         int rv;
130 
131         rv = _Py_stat(filename, &statbuf);
132         if (rv == -2)
133             goto error;
134         if (rv == 0) {
135             /* it exists */
136             if (!S_ISREG(statbuf.st_mode))
137                 /* it's a not file */
138                 Py_CLEAR(filename);
139             break;
140         }
141         Py_CLEAR(filename);
142         /* back up one path element */
143         flen = PyUnicode_FindChar(path, SEP, 0, flen, -1);
144         if (flen == -1)
145             break;
146         filename = PyUnicode_Substring(path, 0, flen);
147         if (filename == NULL)
148             goto error;
149     }
150     if (filename == NULL) {
151         PyErr_SetString(ZipImportError, "not a Zip file");
152         goto error;
153     }
154 
155     if (PyUnicode_READY(filename) < 0)
156         goto error;
157 
158     files = PyDict_GetItem(zip_directory_cache, filename);
159     if (files == NULL) {
160         files = read_directory(filename);
161         if (files == NULL)
162             goto error;
163         if (PyDict_SetItem(zip_directory_cache, filename, files) != 0)
164             goto error;
165     }
166     else
167         Py_INCREF(files);
168     Py_XSETREF(self->files, files);
169 
170     /* Transfer reference */
171     Py_XSETREF(self->archive, filename);
172     filename = NULL;
173 
174     /* Check if there is a prefix directory following the filename. */
175     if (flen != len) {
176         tmp = PyUnicode_Substring(path, flen+1,
177                                   PyUnicode_GET_LENGTH(path));
178         if (tmp == NULL)
179             goto error;
180         Py_XSETREF(self->prefix, tmp);
181         if (PyUnicode_READ_CHAR(path, len-1) != SEP) {
182             /* add trailing SEP */
183             tmp = PyUnicode_FromFormat("%U%c", self->prefix, SEP);
184             if (tmp == NULL)
185                 goto error;
186             Py_SETREF(self->prefix, tmp);
187         }
188     }
189     else {
190         Py_XSETREF(self->prefix, PyUnicode_New(0, 0));
191     }
192     Py_DECREF(path);
193     return 0;
194 
195 error:
196     Py_DECREF(path);
197     Py_XDECREF(filename);
198     return -1;
199 }
200 
201 /* GC support. */
202 static int
zipimporter_traverse(PyObject * obj,visitproc visit,void * arg)203 zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
204 {
205     ZipImporter *self = (ZipImporter *)obj;
206     Py_VISIT(self->files);
207     return 0;
208 }
209 
210 static void
zipimporter_dealloc(ZipImporter * self)211 zipimporter_dealloc(ZipImporter *self)
212 {
213     PyObject_GC_UnTrack(self);
214     Py_XDECREF(self->archive);
215     Py_XDECREF(self->prefix);
216     Py_XDECREF(self->files);
217     Py_TYPE(self)->tp_free((PyObject *)self);
218 }
219 
220 static PyObject *
zipimporter_repr(ZipImporter * self)221 zipimporter_repr(ZipImporter *self)
222 {
223     if (self->archive == NULL)
224         return PyUnicode_FromString("<zipimporter object \"???\">");
225     else if (self->prefix != NULL && PyUnicode_GET_LENGTH(self->prefix) != 0)
226         return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">",
227                                     self->archive, SEP, self->prefix);
228     else
229         return PyUnicode_FromFormat("<zipimporter object \"%U\">",
230                                     self->archive);
231 }
232 
233 /* return fullname.split(".")[-1] */
234 static PyObject *
get_subname(PyObject * fullname)235 get_subname(PyObject *fullname)
236 {
237     Py_ssize_t len, dot;
238     if (PyUnicode_READY(fullname) < 0)
239         return NULL;
240     len = PyUnicode_GET_LENGTH(fullname);
241     dot = PyUnicode_FindChar(fullname, '.', 0, len, -1);
242     if (dot == -1) {
243         Py_INCREF(fullname);
244         return fullname;
245     } else
246         return PyUnicode_Substring(fullname, dot+1, len);
247 }
248 
249 /* Given a (sub)modulename, write the potential file path in the
250    archive (without extension) to the path buffer. Return the
251    length of the resulting string.
252 
253    return self.prefix + name.replace('.', os.sep) */
254 static PyObject*
make_filename(PyObject * prefix,PyObject * name)255 make_filename(PyObject *prefix, PyObject *name)
256 {
257     PyObject *pathobj;
258     Py_UCS4 *p, *buf;
259     Py_ssize_t len;
260 
261     len = PyUnicode_GET_LENGTH(prefix) + PyUnicode_GET_LENGTH(name) + 1;
262     p = buf = PyMem_New(Py_UCS4, len);
263     if (buf == NULL) {
264         PyErr_NoMemory();
265         return NULL;
266     }
267 
268     if (!PyUnicode_AsUCS4(prefix, p, len, 0)) {
269         PyMem_Free(buf);
270         return NULL;
271     }
272     p += PyUnicode_GET_LENGTH(prefix);
273     len -= PyUnicode_GET_LENGTH(prefix);
274     if (!PyUnicode_AsUCS4(name, p, len, 1)) {
275         PyMem_Free(buf);
276         return NULL;
277     }
278     for (; *p; p++) {
279         if (*p == '.')
280             *p = SEP;
281     }
282     pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
283                                         buf, p-buf);
284     PyMem_Free(buf);
285     return pathobj;
286 }
287 
288 enum zi_module_info {
289     MI_ERROR,
290     MI_NOT_FOUND,
291     MI_MODULE,
292     MI_PACKAGE
293 };
294 
295 /* Does this path represent a directory?
296    on error, return < 0
297    if not a dir, return 0
298    if a dir, return 1
299 */
300 static int
check_is_directory(ZipImporter * self,PyObject * prefix,PyObject * path)301 check_is_directory(ZipImporter *self, PyObject* prefix, PyObject *path)
302 {
303     PyObject *dirpath;
304     int res;
305 
306     /* See if this is a "directory". If so, it's eligible to be part
307        of a namespace package. We test by seeing if the name, with an
308        appended path separator, exists. */
309     dirpath = PyUnicode_FromFormat("%U%U%c", prefix, path, SEP);
310     if (dirpath == NULL)
311         return -1;
312     /* If dirpath is present in self->files, we have a directory. */
313     res = PyDict_Contains(self->files, dirpath);
314     Py_DECREF(dirpath);
315     return res;
316 }
317 
318 /* Return some information about a module. */
319 static enum zi_module_info
get_module_info(ZipImporter * self,PyObject * fullname)320 get_module_info(ZipImporter *self, PyObject *fullname)
321 {
322     PyObject *subname;
323     PyObject *path, *fullpath, *item;
324     struct st_zip_searchorder *zso;
325 
326     if (self->prefix == NULL) {
327         PyErr_SetString(PyExc_ValueError,
328                         "zipimporter.__init__() wasn't called");
329         return MI_ERROR;
330     }
331 
332     subname = get_subname(fullname);
333     if (subname == NULL)
334         return MI_ERROR;
335 
336     path = make_filename(self->prefix, subname);
337     Py_DECREF(subname);
338     if (path == NULL)
339         return MI_ERROR;
340 
341     for (zso = zip_searchorder; *zso->suffix; zso++) {
342         fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
343         if (fullpath == NULL) {
344             Py_DECREF(path);
345             return MI_ERROR;
346         }
347         item = PyDict_GetItem(self->files, fullpath);
348         Py_DECREF(fullpath);
349         if (item != NULL) {
350             Py_DECREF(path);
351             if (zso->type & IS_PACKAGE)
352                 return MI_PACKAGE;
353             else
354                 return MI_MODULE;
355         }
356     }
357     Py_DECREF(path);
358     return MI_NOT_FOUND;
359 }
360 
361 typedef enum {
362     FL_ERROR = -1,       /* error */
363     FL_NOT_FOUND,        /* no loader or namespace portions found */
364     FL_MODULE_FOUND,     /* module/package found */
365     FL_NS_FOUND          /* namespace portion found: */
366                          /* *namespace_portion will point to the name */
367 } find_loader_result;
368 
369 /* The guts of "find_loader" and "find_module".
370 */
371 static find_loader_result
find_loader(ZipImporter * self,PyObject * fullname,PyObject ** namespace_portion)372 find_loader(ZipImporter *self, PyObject *fullname, PyObject **namespace_portion)
373 {
374     enum zi_module_info mi;
375 
376     *namespace_portion = NULL;
377 
378     mi = get_module_info(self, fullname);
379     if (mi == MI_ERROR)
380         return FL_ERROR;
381     if (mi == MI_NOT_FOUND) {
382         /* Not a module or regular package. See if this is a directory, and
383            therefore possibly a portion of a namespace package. */
384         find_loader_result result = FL_NOT_FOUND;
385         PyObject *subname;
386         int is_dir;
387 
388         /* We're only interested in the last path component of fullname;
389            earlier components are recorded in self->prefix. */
390         subname = get_subname(fullname);
391         if (subname == NULL) {
392             return FL_ERROR;
393         }
394 
395         is_dir = check_is_directory(self, self->prefix, subname);
396         if (is_dir < 0)
397             result = FL_ERROR;
398         else if (is_dir) {
399             /* This is possibly a portion of a namespace
400                package. Return the string representing its path,
401                without a trailing separator. */
402             *namespace_portion = PyUnicode_FromFormat("%U%c%U%U",
403                                                       self->archive, SEP,
404                                                       self->prefix, subname);
405             if (*namespace_portion == NULL)
406                 result = FL_ERROR;
407             else
408                 result = FL_NS_FOUND;
409         }
410         Py_DECREF(subname);
411         return result;
412     }
413     /* This is a module or package. */
414     return FL_MODULE_FOUND;
415 }
416 
417 /*[clinic input]
418 zipimport.zipimporter.find_module
419 
420     fullname: unicode
421     path: object = None
422     /
423 
424 Search for a module specified by 'fullname'.
425 
426 'fullname' must be the fully qualified (dotted) module name. It returns the
427 zipimporter instance itself if the module was found, or None if it wasn't.
428 The optional 'path' argument is ignored -- it's there for compatibility
429 with the importer protocol.
430 
431 [clinic start generated code]*/
432 
433 static PyObject *
zipimport_zipimporter_find_module_impl(ZipImporter * self,PyObject * fullname,PyObject * path)434 zipimport_zipimporter_find_module_impl(ZipImporter *self, PyObject *fullname,
435                                        PyObject *path)
436 /*[clinic end generated code: output=506087f609466dc7 input=e3528520e075063f]*/
437 {
438     PyObject *namespace_portion = NULL;
439     PyObject *result = NULL;
440 
441     switch (find_loader(self, fullname, &namespace_portion)) {
442     case FL_ERROR:
443         return NULL;
444     case FL_NS_FOUND:
445         /* A namespace portion is not allowed via find_module, so return None. */
446         Py_DECREF(namespace_portion);
447         /* FALL THROUGH */
448     case FL_NOT_FOUND:
449         result = Py_None;
450         break;
451     case FL_MODULE_FOUND:
452         result = (PyObject *)self;
453         break;
454     default:
455         PyErr_BadInternalCall();
456         return NULL;
457     }
458     Py_INCREF(result);
459     return result;
460 }
461 
462 
463 /*[clinic input]
464 zipimport.zipimporter.find_loader
465 
466     fullname: unicode
467     path: object = None
468     /
469 
470 Search for a module specified by 'fullname'.
471 
472 'fullname' must be the fully qualified (dotted) module name. It returns the
473 zipimporter instance itself if the module was found, a string containing the
474 full path name if it's possibly a portion of a namespace package,
475 or None otherwise. The optional 'path' argument is ignored -- it's
476 there for compatibility with the importer protocol.
477 
478 [clinic start generated code]*/
479 
480 static PyObject *
zipimport_zipimporter_find_loader_impl(ZipImporter * self,PyObject * fullname,PyObject * path)481 zipimport_zipimporter_find_loader_impl(ZipImporter *self, PyObject *fullname,
482                                        PyObject *path)
483 /*[clinic end generated code: output=601599a43bc0f49a input=dc73f275b0d5be23]*/
484 {
485     PyObject *result = NULL;
486     PyObject *namespace_portion = NULL;
487 
488     switch (find_loader(self, fullname, &namespace_portion)) {
489     case FL_ERROR:
490         return NULL;
491     case FL_NOT_FOUND:        /* Not found, return (None, []) */
492         result = Py_BuildValue("O[]", Py_None);
493         break;
494     case FL_MODULE_FOUND:     /* Return (self, []) */
495         result = Py_BuildValue("O[]", self);
496         break;
497     case FL_NS_FOUND:         /* Return (None, [namespace_portion]) */
498         result = Py_BuildValue("O[O]", Py_None, namespace_portion);
499         Py_DECREF(namespace_portion);
500         return result;
501     default:
502         PyErr_BadInternalCall();
503         return NULL;
504     }
505     return result;
506 }
507 
508 /*[clinic input]
509 zipimport.zipimporter.load_module
510 
511     fullname: unicode
512     /
513 
514 Load the module specified by 'fullname'.
515 
516 'fullname' must be the fully qualified (dotted) module name. It returns the
517 imported module, or raises ZipImportError if it wasn't found.
518 
519 [clinic start generated code]*/
520 
521 static PyObject *
zipimport_zipimporter_load_module_impl(ZipImporter * self,PyObject * fullname)522 zipimport_zipimporter_load_module_impl(ZipImporter *self, PyObject *fullname)
523 /*[clinic end generated code: output=7303cebf88d47953 input=c236e2e8621f04ef]*/
524 {
525     PyObject *code = NULL, *mod, *dict;
526     PyObject *modpath = NULL;
527     int ispackage;
528 
529     if (PyUnicode_READY(fullname) == -1)
530         return NULL;
531 
532     code = get_module_code(self, fullname, &ispackage, &modpath);
533     if (code == NULL)
534         goto error;
535 
536     mod = PyImport_AddModuleObject(fullname);
537     if (mod == NULL)
538         goto error;
539     dict = PyModule_GetDict(mod);
540 
541     /* mod.__loader__ = self */
542     if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
543         goto error;
544 
545     if (ispackage) {
546         /* add __path__ to the module *before* the code gets
547            executed */
548         PyObject *pkgpath, *fullpath, *subname;
549         int err;
550 
551         subname = get_subname(fullname);
552         if (subname == NULL)
553             goto error;
554 
555         fullpath = PyUnicode_FromFormat("%U%c%U%U",
556                                 self->archive, SEP,
557                                 self->prefix, subname);
558         Py_DECREF(subname);
559         if (fullpath == NULL)
560             goto error;
561 
562         pkgpath = Py_BuildValue("[N]", fullpath);
563         if (pkgpath == NULL)
564             goto error;
565         err = PyDict_SetItemString(dict, "__path__", pkgpath);
566         Py_DECREF(pkgpath);
567         if (err != 0)
568             goto error;
569     }
570     mod = PyImport_ExecCodeModuleObject(fullname, code, modpath, NULL);
571     Py_CLEAR(code);
572     if (mod == NULL)
573         goto error;
574 
575     if (Py_VerboseFlag)
576         PySys_FormatStderr("import %U # loaded from Zip %U\n",
577                            fullname, modpath);
578     Py_DECREF(modpath);
579     return mod;
580 error:
581     Py_XDECREF(code);
582     Py_XDECREF(modpath);
583     return NULL;
584 }
585 
586 /*[clinic input]
587 zipimport.zipimporter.get_filename
588 
589     fullname: unicode
590     /
591 
592 Return the filename for the specified module.
593 [clinic start generated code]*/
594 
595 static PyObject *
zipimport_zipimporter_get_filename_impl(ZipImporter * self,PyObject * fullname)596 zipimport_zipimporter_get_filename_impl(ZipImporter *self,
597                                         PyObject *fullname)
598 /*[clinic end generated code: output=c5b92b58bea86506 input=28d2eb57e4f25c8a]*/
599 {
600     PyObject *code, *modpath;
601     int ispackage;
602 
603     /* Deciding the filename requires working out where the code
604        would come from if the module was actually loaded */
605     code = get_module_code(self, fullname, &ispackage, &modpath);
606     if (code == NULL)
607         return NULL;
608     Py_DECREF(code); /* Only need the path info */
609 
610     return modpath;
611 }
612 
613 /*[clinic input]
614 zipimport.zipimporter.is_package
615 
616     fullname: unicode
617     /
618 
619 Return True if the module specified by fullname is a package.
620 
621 Raise ZipImportError if the module couldn't be found.
622 
623 [clinic start generated code]*/
624 
625 static PyObject *
zipimport_zipimporter_is_package_impl(ZipImporter * self,PyObject * fullname)626 zipimport_zipimporter_is_package_impl(ZipImporter *self, PyObject *fullname)
627 /*[clinic end generated code: output=c32958c2a5216ae6 input=a7ba752f64345062]*/
628 {
629     enum zi_module_info mi;
630 
631     mi = get_module_info(self, fullname);
632     if (mi == MI_ERROR)
633         return NULL;
634     if (mi == MI_NOT_FOUND) {
635         PyErr_Format(ZipImportError, "can't find module %R", fullname);
636         return NULL;
637     }
638     return PyBool_FromLong(mi == MI_PACKAGE);
639 }
640 
641 
642 /*[clinic input]
643 zipimport.zipimporter.get_data
644 
645     pathname as path: unicode
646     /
647 
648 Return the data associated with 'pathname'.
649 
650 Raise OSError if the file was not found.
651 
652 [clinic start generated code]*/
653 
654 static PyObject *
zipimport_zipimporter_get_data_impl(ZipImporter * self,PyObject * path)655 zipimport_zipimporter_get_data_impl(ZipImporter *self, PyObject *path)
656 /*[clinic end generated code: output=65dc506aaa268436 input=fa6428b74843c4ae]*/
657 {
658     PyObject *key;
659     PyObject *toc_entry;
660     Py_ssize_t path_start, path_len, len;
661 
662     if (self->archive == NULL) {
663         PyErr_SetString(PyExc_ValueError,
664                         "zipimporter.__init__() wasn't called");
665         return NULL;
666     }
667 
668 #ifdef ALTSEP
669     path = _PyObject_CallMethodId((PyObject *)&PyUnicode_Type, &PyId_replace,
670                                   "OCC", path, ALTSEP, SEP);
671     if (!path)
672         return NULL;
673 #else
674     Py_INCREF(path);
675 #endif
676     if (PyUnicode_READY(path) == -1)
677         goto error;
678 
679     path_len = PyUnicode_GET_LENGTH(path);
680 
681     len = PyUnicode_GET_LENGTH(self->archive);
682     path_start = 0;
683     if (PyUnicode_Tailmatch(path, self->archive, 0, len, -1)
684         && PyUnicode_READ_CHAR(path, len) == SEP) {
685         path_start = len + 1;
686     }
687 
688     key = PyUnicode_Substring(path, path_start, path_len);
689     if (key == NULL)
690         goto error;
691     toc_entry = PyDict_GetItem(self->files, key);
692     if (toc_entry == NULL) {
693         PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, key);
694         Py_DECREF(key);
695         goto error;
696     }
697     Py_DECREF(key);
698     Py_DECREF(path);
699     return get_data(self->archive, toc_entry);
700   error:
701     Py_DECREF(path);
702     return NULL;
703 }
704 
705 /*[clinic input]
706 zipimport.zipimporter.get_code
707 
708     fullname: unicode
709     /
710 
711 Return the code object for the specified module.
712 
713 Raise ZipImportError if the module couldn't be found.
714 
715 [clinic start generated code]*/
716 
717 static PyObject *
zipimport_zipimporter_get_code_impl(ZipImporter * self,PyObject * fullname)718 zipimport_zipimporter_get_code_impl(ZipImporter *self, PyObject *fullname)
719 /*[clinic end generated code: output=b923c37fa99cbac4 input=2761412bc37f3549]*/
720 {
721     return get_module_code(self, fullname, NULL, NULL);
722 }
723 
724 /*[clinic input]
725 zipimport.zipimporter.get_source
726 
727     fullname: unicode
728     /
729 
730 Return the source code for the specified module.
731 
732 Raise ZipImportError if the module couldn't be found, return None if the
733 archive does contain the module, but has no source for it.
734 
735 [clinic start generated code]*/
736 
737 static PyObject *
zipimport_zipimporter_get_source_impl(ZipImporter * self,PyObject * fullname)738 zipimport_zipimporter_get_source_impl(ZipImporter *self, PyObject *fullname)
739 /*[clinic end generated code: output=bc059301b0c33729 input=4e4b186f2e690716]*/
740 {
741     PyObject *toc_entry;
742     PyObject *subname, *path, *fullpath;
743     enum zi_module_info mi;
744 
745     mi = get_module_info(self, fullname);
746     if (mi == MI_ERROR)
747         return NULL;
748     if (mi == MI_NOT_FOUND) {
749         PyErr_Format(ZipImportError, "can't find module %R", fullname);
750         return NULL;
751     }
752 
753     subname = get_subname(fullname);
754     if (subname == NULL)
755         return NULL;
756 
757     path = make_filename(self->prefix, subname);
758     Py_DECREF(subname);
759     if (path == NULL)
760         return NULL;
761 
762     if (mi == MI_PACKAGE)
763         fullpath = PyUnicode_FromFormat("%U%c__init__.py", path, SEP);
764     else
765         fullpath = PyUnicode_FromFormat("%U.py", path);
766     Py_DECREF(path);
767     if (fullpath == NULL)
768         return NULL;
769 
770     toc_entry = PyDict_GetItem(self->files, fullpath);
771     Py_DECREF(fullpath);
772     if (toc_entry != NULL) {
773         PyObject *res, *bytes;
774         bytes = get_data(self->archive, toc_entry);
775         if (bytes == NULL)
776             return NULL;
777         res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
778                                           PyBytes_GET_SIZE(bytes));
779         Py_DECREF(bytes);
780         return res;
781     }
782 
783     /* we have the module, but no source */
784     Py_RETURN_NONE;
785 }
786 
787 /*[clinic input]
788 zipimport.zipimporter.get_resource_reader
789 
790     fullname: unicode
791     /
792 
793 Return the ResourceReader for a package in a zip file.
794 
795 If 'fullname' is a package within the zip file, return the 'ResourceReader'
796 object for the package.  Otherwise return None.
797 
798 [clinic start generated code]*/
799 
800 static PyObject *
zipimport_zipimporter_get_resource_reader_impl(ZipImporter * self,PyObject * fullname)801 zipimport_zipimporter_get_resource_reader_impl(ZipImporter *self,
802                                                PyObject *fullname)
803 /*[clinic end generated code: output=5e367d431f830726 input=bfab94d736e99151]*/
804 {
805     PyObject *module = PyImport_ImportModule("importlib.resources");
806     if (module == NULL) {
807         return NULL;
808     }
809     PyObject *retval = PyObject_CallMethod(
810         module, "_zipimport_get_resource_reader",
811         "OO", (PyObject *)self, fullname);
812     Py_DECREF(module);
813     return retval;
814 }
815 
816 
817 static PyMethodDef zipimporter_methods[] = {
818     ZIPIMPORT_ZIPIMPORTER_FIND_MODULE_METHODDEF
819     ZIPIMPORT_ZIPIMPORTER_FIND_LOADER_METHODDEF
820     ZIPIMPORT_ZIPIMPORTER_LOAD_MODULE_METHODDEF
821     ZIPIMPORT_ZIPIMPORTER_GET_FILENAME_METHODDEF
822     ZIPIMPORT_ZIPIMPORTER_IS_PACKAGE_METHODDEF
823     ZIPIMPORT_ZIPIMPORTER_GET_DATA_METHODDEF
824     ZIPIMPORT_ZIPIMPORTER_GET_CODE_METHODDEF
825     ZIPIMPORT_ZIPIMPORTER_GET_SOURCE_METHODDEF
826     ZIPIMPORT_ZIPIMPORTER_GET_RESOURCE_READER_METHODDEF
827     {NULL,              NULL}   /* sentinel */
828 };
829 
830 static PyMemberDef zipimporter_members[] = {
831     {"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
832     {"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
833     {"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
834     {NULL}
835 };
836 
837 #define DEFERRED_ADDRESS(ADDR) 0
838 
839 static PyTypeObject ZipImporter_Type = {
840     PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
841     "zipimport.zipimporter",
842     sizeof(ZipImporter),
843     0,                                          /* tp_itemsize */
844     (destructor)zipimporter_dealloc,            /* tp_dealloc */
845     0,                                          /* tp_print */
846     0,                                          /* tp_getattr */
847     0,                                          /* tp_setattr */
848     0,                                          /* tp_reserved */
849     (reprfunc)zipimporter_repr,                 /* tp_repr */
850     0,                                          /* tp_as_number */
851     0,                                          /* tp_as_sequence */
852     0,                                          /* tp_as_mapping */
853     0,                                          /* tp_hash */
854     0,                                          /* tp_call */
855     0,                                          /* tp_str */
856     PyObject_GenericGetAttr,                    /* tp_getattro */
857     0,                                          /* tp_setattro */
858     0,                                          /* tp_as_buffer */
859     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
860         Py_TPFLAGS_HAVE_GC,                     /* tp_flags */
861     zipimport_zipimporter___init____doc__,      /* tp_doc */
862     zipimporter_traverse,                       /* tp_traverse */
863     0,                                          /* tp_clear */
864     0,                                          /* tp_richcompare */
865     0,                                          /* tp_weaklistoffset */
866     0,                                          /* tp_iter */
867     0,                                          /* tp_iternext */
868     zipimporter_methods,                        /* tp_methods */
869     zipimporter_members,                        /* tp_members */
870     0,                                          /* tp_getset */
871     0,                                          /* tp_base */
872     0,                                          /* tp_dict */
873     0,                                          /* tp_descr_get */
874     0,                                          /* tp_descr_set */
875     0,                                          /* tp_dictoffset */
876     (initproc)zipimport_zipimporter___init__,   /* tp_init */
877     PyType_GenericAlloc,                        /* tp_alloc */
878     PyType_GenericNew,                          /* tp_new */
879     PyObject_GC_Del,                            /* tp_free */
880 };
881 
882 
883 /* implementation */
884 
885 /* Given a buffer, return the unsigned int that is represented by the first
886    4 bytes, encoded as little endian. This partially reimplements
887    marshal.c:r_long() */
888 static unsigned int
get_uint32(const unsigned char * buf)889 get_uint32(const unsigned char *buf)
890 {
891     unsigned int x;
892     x =  buf[0];
893     x |= (unsigned int)buf[1] <<  8;
894     x |= (unsigned int)buf[2] << 16;
895     x |= (unsigned int)buf[3] << 24;
896     return x;
897 }
898 
899 /* Given a buffer, return the unsigned int that is represented by the first
900    2 bytes, encoded as little endian. This partially reimplements
901    marshal.c:r_short() */
902 static unsigned short
get_uint16(const unsigned char * buf)903 get_uint16(const unsigned char *buf)
904 {
905     unsigned short x;
906     x =  buf[0];
907     x |= (unsigned short)buf[1] <<  8;
908     return x;
909 }
910 
911 static void
set_file_error(PyObject * archive,int eof)912 set_file_error(PyObject *archive, int eof)
913 {
914     if (eof) {
915         PyErr_SetString(PyExc_EOFError, "EOF read where not expected");
916     }
917     else {
918         PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, archive);
919     }
920 }
921 
922 /*
923    read_directory(archive) -> files dict (new reference)
924 
925    Given a path to a Zip archive, build a dict, mapping file names
926    (local to the archive, using SEP as a separator) to toc entries.
927 
928    A toc_entry is a tuple:
929 
930    (__file__,      # value to use for __file__, available for all files,
931                    # encoded to the filesystem encoding
932     compress,      # compression kind; 0 for uncompressed
933     data_size,     # size of compressed data on disk
934     file_size,     # size of decompressed data
935     file_offset,   # offset of file header from start of archive
936     time,          # mod time of file (in dos format)
937     date,          # mod data of file (in dos format)
938     crc,           # crc checksum of the data
939    )
940 
941    Directories can be recognized by the trailing SEP in the name,
942    data_size and file_offset are 0.
943 */
944 static PyObject *
read_directory(PyObject * archive)945 read_directory(PyObject *archive)
946 {
947     PyObject *files = NULL;
948     FILE *fp;
949     unsigned short flags, compress, time, date, name_size;
950     unsigned int crc, data_size, file_size, header_size, header_offset;
951     unsigned long file_offset, header_position;
952     unsigned long arc_offset;  /* Absolute offset to start of the zip-archive. */
953     unsigned int count, i;
954     unsigned char buffer[46];
955     char name[MAXPATHLEN + 5];
956     PyObject *nameobj = NULL;
957     PyObject *path;
958     const char *charset;
959     int bootstrap;
960     const char *errmsg = NULL;
961 
962     fp = _Py_fopen_obj(archive, "rb");
963     if (fp == NULL) {
964         if (PyErr_ExceptionMatches(PyExc_OSError)) {
965             _PyErr_FormatFromCause(ZipImportError,
966                                    "can't open Zip file: %R", archive);
967         }
968         return NULL;
969     }
970 
971     if (fseek(fp, -22, SEEK_END) == -1) {
972         goto file_error;
973     }
974     header_position = (unsigned long)ftell(fp);
975     if (header_position == (unsigned long)-1) {
976         goto file_error;
977     }
978     assert(header_position <= (unsigned long)LONG_MAX);
979     if (fread(buffer, 1, 22, fp) != 22) {
980         goto file_error;
981     }
982     if (get_uint32(buffer) != 0x06054B50u) {
983         /* Bad: End of Central Dir signature */
984         errmsg = "not a Zip file";
985         goto invalid_header;
986     }
987 
988     header_size = get_uint32(buffer + 12);
989     header_offset = get_uint32(buffer + 16);
990     if (header_position < header_size) {
991         errmsg = "bad central directory size";
992         goto invalid_header;
993     }
994     if (header_position < header_offset) {
995         errmsg = "bad central directory offset";
996         goto invalid_header;
997     }
998     if (header_position - header_size < header_offset) {
999         errmsg = "bad central directory size or offset";
1000         goto invalid_header;
1001     }
1002     header_position -= header_size;
1003     arc_offset = header_position - header_offset;
1004 
1005     files = PyDict_New();
1006     if (files == NULL) {
1007         goto error;
1008     }
1009     /* Start of Central Directory */
1010     count = 0;
1011     if (fseek(fp, (long)header_position, 0) == -1) {
1012         goto file_error;
1013     }
1014     for (;;) {
1015         PyObject *t;
1016         size_t n;
1017         int err;
1018 
1019         n = fread(buffer, 1, 46, fp);
1020         if (n < 4) {
1021             goto eof_error;
1022         }
1023         /* Start of file header */
1024         if (get_uint32(buffer) != 0x02014B50u) {
1025             break;              /* Bad: Central Dir File Header */
1026         }
1027         if (n != 46) {
1028             goto eof_error;
1029         }
1030         flags = get_uint16(buffer + 8);
1031         compress = get_uint16(buffer + 10);
1032         time = get_uint16(buffer + 12);
1033         date = get_uint16(buffer + 14);
1034         crc = get_uint32(buffer + 16);
1035         data_size = get_uint32(buffer + 20);
1036         file_size = get_uint32(buffer + 24);
1037         name_size = get_uint16(buffer + 28);
1038         header_size = (unsigned int)name_size +
1039            get_uint16(buffer + 30) /* extra field */ +
1040            get_uint16(buffer + 32) /* comment */;
1041 
1042         file_offset = get_uint32(buffer + 42);
1043         if (file_offset > header_offset) {
1044             errmsg = "bad local header offset";
1045             goto invalid_header;
1046         }
1047         file_offset += arc_offset;
1048 
1049         if (name_size > MAXPATHLEN) {
1050             name_size = MAXPATHLEN;
1051         }
1052         if (fread(name, 1, name_size, fp) != name_size) {
1053             goto file_error;
1054         }
1055         name[name_size] = '\0';  /* Add terminating null byte */
1056 #if SEP != '/'
1057         for (i = 0; i < name_size; i++) {
1058             if (name[i] == '/') {
1059                 name[i] = SEP;
1060             }
1061         }
1062 #endif
1063         /* Skip the rest of the header.
1064          * On Windows, calling fseek to skip over the fields we don't use is
1065          * slower than reading the data because fseek flushes stdio's
1066          * internal buffers.  See issue #8745. */
1067         assert(header_size <= 3*0xFFFFu);
1068         for (i = name_size; i < header_size; i++) {
1069             if (getc(fp) == EOF) {
1070                 goto file_error;
1071             }
1072         }
1073 
1074         bootstrap = 0;
1075         if (flags & 0x0800) {
1076             charset = "utf-8";
1077         }
1078         else if (!PyThreadState_GET()->interp->codecs_initialized) {
1079             /* During bootstrap, we may need to load the encodings
1080                package from a ZIP file. But the cp437 encoding is implemented
1081                in Python in the encodings package.
1082 
1083                Break out of this dependency by assuming that the path to
1084                the encodings module is ASCII-only. */
1085             charset = "ascii";
1086             bootstrap = 1;
1087         }
1088         else {
1089             charset = "cp437";
1090         }
1091         nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
1092         if (nameobj == NULL) {
1093             if (bootstrap) {
1094                 PyErr_Format(PyExc_NotImplementedError,
1095                     "bootstrap issue: python%i%i.zip contains non-ASCII "
1096                     "filenames without the unicode flag",
1097                     PY_MAJOR_VERSION, PY_MINOR_VERSION);
1098             }
1099             goto error;
1100         }
1101         if (PyUnicode_READY(nameobj) == -1) {
1102             goto error;
1103         }
1104         path = PyUnicode_FromFormat("%U%c%U", archive, SEP, nameobj);
1105         if (path == NULL) {
1106             goto error;
1107         }
1108         t = Py_BuildValue("NHIIkHHI", path, compress, data_size,
1109                           file_size, file_offset, time, date, crc);
1110         if (t == NULL) {
1111             goto error;
1112         }
1113         err = PyDict_SetItem(files, nameobj, t);
1114         Py_CLEAR(nameobj);
1115         Py_DECREF(t);
1116         if (err != 0) {
1117             goto error;
1118         }
1119         count++;
1120     }
1121     fclose(fp);
1122     if (Py_VerboseFlag) {
1123         PySys_FormatStderr("# zipimport: found %u names in %R\n",
1124                            count, archive);
1125     }
1126     return files;
1127 
1128 eof_error:
1129     set_file_error(archive, !ferror(fp));
1130     goto error;
1131 
1132 file_error:
1133     PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1134     goto error;
1135 
1136 invalid_header:
1137     assert(errmsg != NULL);
1138     PyErr_Format(ZipImportError, "%s: %R", errmsg, archive);
1139     goto error;
1140 
1141 error:
1142     fclose(fp);
1143     Py_XDECREF(files);
1144     Py_XDECREF(nameobj);
1145     return NULL;
1146 }
1147 
1148 /* Return the zlib.decompress function object, or NULL if zlib couldn't
1149    be imported. The function is cached when found, so subsequent calls
1150    don't import zlib again. */
1151 static PyObject *
get_decompress_func(void)1152 get_decompress_func(void)
1153 {
1154     static int importing_zlib = 0;
1155     PyObject *zlib;
1156     PyObject *decompress;
1157     _Py_IDENTIFIER(decompress);
1158 
1159     if (importing_zlib != 0)
1160         /* Someone has a zlib.pyc in their Zip file;
1161            let's avoid a stack overflow. */
1162         return NULL;
1163     importing_zlib = 1;
1164     zlib = PyImport_ImportModuleNoBlock("zlib");
1165     importing_zlib = 0;
1166     if (zlib != NULL) {
1167         decompress = _PyObject_GetAttrId(zlib,
1168                                          &PyId_decompress);
1169         Py_DECREF(zlib);
1170     }
1171     else {
1172         PyErr_Clear();
1173         decompress = NULL;
1174     }
1175     if (Py_VerboseFlag)
1176         PySys_WriteStderr("# zipimport: zlib %s\n",
1177             zlib != NULL ? "available": "UNAVAILABLE");
1178     return decompress;
1179 }
1180 
1181 /* Given a path to a Zip file and a toc_entry, return the (uncompressed)
1182    data as a new reference. */
1183 static PyObject *
get_data(PyObject * archive,PyObject * toc_entry)1184 get_data(PyObject *archive, PyObject *toc_entry)
1185 {
1186     PyObject *raw_data = NULL, *data, *decompress;
1187     char *buf;
1188     FILE *fp;
1189     PyObject *datapath;
1190     unsigned short compress, time, date;
1191     unsigned int crc;
1192     Py_ssize_t data_size, file_size, bytes_size;
1193     long file_offset, header_size;
1194     unsigned char buffer[30];
1195     const char *errmsg = NULL;
1196 
1197     if (!PyArg_ParseTuple(toc_entry, "OHnnlHHI", &datapath, &compress,
1198                           &data_size, &file_size, &file_offset, &time,
1199                           &date, &crc)) {
1200         return NULL;
1201     }
1202     if (data_size < 0) {
1203         PyErr_Format(ZipImportError, "negative data size");
1204         return NULL;
1205     }
1206 
1207     fp = _Py_fopen_obj(archive, "rb");
1208     if (!fp) {
1209         return NULL;
1210     }
1211     /* Check to make sure the local file header is correct */
1212     if (fseek(fp, file_offset, 0) == -1) {
1213         goto file_error;
1214     }
1215     if (fread(buffer, 1, 30, fp) != 30) {
1216         goto eof_error;
1217     }
1218     if (get_uint32(buffer) != 0x04034B50u) {
1219         /* Bad: Local File Header */
1220         errmsg = "bad local file header";
1221         goto invalid_header;
1222     }
1223 
1224     header_size = (unsigned int)30 +
1225         get_uint16(buffer + 26) /* file name */ +
1226         get_uint16(buffer + 28) /* extra field */;
1227     if (file_offset > LONG_MAX - header_size) {
1228         errmsg = "bad local file header size";
1229         goto invalid_header;
1230     }
1231     file_offset += header_size;  /* Start of file data */
1232 
1233     if (data_size > LONG_MAX - 1) {
1234         fclose(fp);
1235         PyErr_NoMemory();
1236         return NULL;
1237     }
1238     bytes_size = compress == 0 ? data_size : data_size + 1;
1239     if (bytes_size == 0) {
1240         bytes_size++;
1241     }
1242     raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
1243     if (raw_data == NULL) {
1244         goto error;
1245     }
1246     buf = PyBytes_AsString(raw_data);
1247 
1248     if (fseek(fp, file_offset, 0) == -1) {
1249         goto file_error;
1250     }
1251     if (fread(buf, 1, data_size, fp) != (size_t)data_size) {
1252         PyErr_SetString(PyExc_OSError,
1253                         "zipimport: can't read data");
1254         goto error;
1255     }
1256 
1257     fclose(fp);
1258     fp = NULL;
1259 
1260     if (compress != 0) {
1261         buf[data_size] = 'Z';  /* saw this in zipfile.py */
1262         data_size++;
1263     }
1264     buf[data_size] = '\0';
1265 
1266     if (compress == 0) {  /* data is not compressed */
1267         data = PyBytes_FromStringAndSize(buf, data_size);
1268         Py_DECREF(raw_data);
1269         return data;
1270     }
1271 
1272     /* Decompress with zlib */
1273     decompress = get_decompress_func();
1274     if (decompress == NULL) {
1275         PyErr_SetString(ZipImportError,
1276                         "can't decompress data; "
1277                         "zlib not available");
1278         goto error;
1279     }
1280     data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
1281     Py_DECREF(decompress);
1282     Py_DECREF(raw_data);
1283     if (data != NULL && !PyBytes_Check(data)) {
1284         PyErr_Format(PyExc_TypeError,
1285                      "zlib.decompress() must return a bytes object, not "
1286                      "%.200s",
1287                      Py_TYPE(data)->tp_name);
1288         Py_DECREF(data);
1289         return NULL;
1290     }
1291     return data;
1292 
1293 eof_error:
1294     set_file_error(archive, !ferror(fp));
1295     goto error;
1296 
1297 file_error:
1298     PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1299     goto error;
1300 
1301 invalid_header:
1302     assert(errmsg != NULL);
1303     PyErr_Format(ZipImportError, "%s: %R", errmsg, archive);
1304     goto error;
1305 
1306 error:
1307     if (fp != NULL) {
1308         fclose(fp);
1309     }
1310     Py_XDECREF(raw_data);
1311     return NULL;
1312 }
1313 
1314 /* Lenient date/time comparison function. The precision of the mtime
1315    in the archive is lower than the mtime stored in a .pyc: we
1316    must allow a difference of at most one second. */
1317 static int
eq_mtime(time_t t1,time_t t2)1318 eq_mtime(time_t t1, time_t t2)
1319 {
1320     time_t d = t1 - t2;
1321     if (d < 0)
1322         d = -d;
1323     /* dostime only stores even seconds, so be lenient */
1324     return d <= 1;
1325 }
1326 
1327 /* Given the contents of a .pyc file in a buffer, unmarshal the data
1328    and return the code object. Return None if it the magic word doesn't
1329    match (we do this instead of raising an exception as we fall back
1330    to .py if available and we don't want to mask other errors).
1331    Returns a new reference. */
1332 static PyObject *
unmarshal_code(PyObject * pathname,PyObject * data,time_t mtime)1333 unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
1334 {
1335     PyObject *code;
1336     unsigned char *buf = (unsigned char *)PyBytes_AsString(data);
1337     Py_ssize_t size = PyBytes_Size(data);
1338 
1339     if (size < 16) {
1340         PyErr_SetString(ZipImportError,
1341                         "bad pyc data");
1342         return NULL;
1343     }
1344 
1345     if (get_uint32(buf) != (unsigned int)PyImport_GetMagicNumber()) {
1346         if (Py_VerboseFlag) {
1347             PySys_FormatStderr("# %R has bad magic\n",
1348                                pathname);
1349         }
1350         Py_RETURN_NONE;  /* signal caller to try alternative */
1351     }
1352 
1353     uint32_t flags = get_uint32(buf + 4);
1354     if (flags != 0) {
1355         // Hash-based pyc. We currently refuse to handle checked hash-based
1356         // pycs. We could validate hash-based pycs against the source, but it
1357         // seems likely that most people putting hash-based pycs in a zipfile
1358         // will use unchecked ones.
1359         if (strcmp(_Py_CheckHashBasedPycsMode, "never") &&
1360             (flags != 0x1 || !strcmp(_Py_CheckHashBasedPycsMode, "always")))
1361             Py_RETURN_NONE;
1362     } else if ((mtime != 0 && !eq_mtime(get_uint32(buf + 8), mtime))) {
1363         if (Py_VerboseFlag) {
1364             PySys_FormatStderr("# %R has bad mtime\n",
1365                                pathname);
1366         }
1367         Py_RETURN_NONE;  /* signal caller to try alternative */
1368     }
1369 
1370     /* XXX the pyc's size field is ignored; timestamp collisions are probably
1371        unimportant with zip files. */
1372     code = PyMarshal_ReadObjectFromString((char *)buf + 16, size - 16);
1373     if (code == NULL) {
1374         return NULL;
1375     }
1376     if (!PyCode_Check(code)) {
1377         Py_DECREF(code);
1378         PyErr_Format(PyExc_TypeError,
1379              "compiled module %R is not a code object",
1380              pathname);
1381         return NULL;
1382     }
1383     return code;
1384 }
1385 
1386 /* Replace any occurrences of "\r\n?" in the input string with "\n".
1387    This converts DOS and Mac line endings to Unix line endings.
1388    Also append a trailing "\n" to be compatible with
1389    PyParser_SimpleParseFile(). Returns a new reference. */
1390 static PyObject *
normalize_line_endings(PyObject * source)1391 normalize_line_endings(PyObject *source)
1392 {
1393     char *buf, *q, *p;
1394     PyObject *fixed_source;
1395     int len = 0;
1396 
1397     p = PyBytes_AsString(source);
1398     if (p == NULL) {
1399         return PyBytes_FromStringAndSize("\n\0", 2);
1400     }
1401 
1402     /* one char extra for trailing \n and one for terminating \0 */
1403     buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1404     if (buf == NULL) {
1405         PyErr_SetString(PyExc_MemoryError,
1406                         "zipimport: no memory to allocate "
1407                         "source buffer");
1408         return NULL;
1409     }
1410     /* replace "\r\n?" by "\n" */
1411     for (q = buf; *p != '\0'; p++) {
1412         if (*p == '\r') {
1413             *q++ = '\n';
1414             if (*(p + 1) == '\n')
1415                 p++;
1416         }
1417         else
1418             *q++ = *p;
1419         len++;
1420     }
1421     *q++ = '\n';  /* add trailing \n */
1422     *q = '\0';
1423     fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1424     PyMem_Free(buf);
1425     return fixed_source;
1426 }
1427 
1428 /* Given a string buffer containing Python source code, compile it
1429    and return a code object as a new reference. */
1430 static PyObject *
compile_source(PyObject * pathname,PyObject * source)1431 compile_source(PyObject *pathname, PyObject *source)
1432 {
1433     PyObject *code, *fixed_source;
1434 
1435     fixed_source = normalize_line_endings(source);
1436     if (fixed_source == NULL) {
1437         return NULL;
1438     }
1439 
1440     code = Py_CompileStringObject(PyBytes_AsString(fixed_source),
1441                                   pathname, Py_file_input, NULL, -1);
1442 
1443     Py_DECREF(fixed_source);
1444     return code;
1445 }
1446 
1447 /* Convert the date/time values found in the Zip archive to a value
1448    that's compatible with the time stamp stored in .pyc files. */
1449 static time_t
parse_dostime(int dostime,int dosdate)1450 parse_dostime(int dostime, int dosdate)
1451 {
1452     struct tm stm;
1453 
1454     memset((void *) &stm, '\0', sizeof(stm));
1455 
1456     stm.tm_sec   =  (dostime        & 0x1f) * 2;
1457     stm.tm_min   =  (dostime >> 5)  & 0x3f;
1458     stm.tm_hour  =  (dostime >> 11) & 0x1f;
1459     stm.tm_mday  =   dosdate        & 0x1f;
1460     stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1461     stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1462     stm.tm_isdst =   -1; /* wday/yday is ignored */
1463 
1464     return mktime(&stm);
1465 }
1466 
1467 /* Given a path to a .pyc file in the archive, return the
1468    modification time of the matching .py file, or 0 if no source
1469    is available. */
1470 static time_t
get_mtime_of_source(ZipImporter * self,PyObject * path)1471 get_mtime_of_source(ZipImporter *self, PyObject *path)
1472 {
1473     PyObject *toc_entry, *stripped;
1474     time_t mtime;
1475 
1476     /* strip 'c' from *.pyc */
1477     if (PyUnicode_READY(path) == -1)
1478         return (time_t)-1;
1479     stripped = PyUnicode_FromKindAndData(PyUnicode_KIND(path),
1480                                          PyUnicode_DATA(path),
1481                                          PyUnicode_GET_LENGTH(path) - 1);
1482     if (stripped == NULL)
1483         return (time_t)-1;
1484 
1485     toc_entry = PyDict_GetItem(self->files, stripped);
1486     Py_DECREF(stripped);
1487     if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1488         PyTuple_Size(toc_entry) == 8) {
1489         /* fetch the time stamp of the .py file for comparison
1490            with an embedded pyc time stamp */
1491         int time, date;
1492         time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1493         date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1494         mtime = parse_dostime(time, date);
1495     } else
1496         mtime = 0;
1497     return mtime;
1498 }
1499 
1500 /* Return the code object for the module named by 'fullname' from the
1501    Zip archive as a new reference. */
1502 static PyObject *
get_code_from_data(ZipImporter * self,int ispackage,int isbytecode,time_t mtime,PyObject * toc_entry)1503 get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1504                    time_t mtime, PyObject *toc_entry)
1505 {
1506     PyObject *data, *modpath, *code;
1507 
1508     data = get_data(self->archive, toc_entry);
1509     if (data == NULL)
1510         return NULL;
1511 
1512     modpath = PyTuple_GetItem(toc_entry, 0);
1513     if (isbytecode)
1514         code = unmarshal_code(modpath, data, mtime);
1515     else
1516         code = compile_source(modpath, data);
1517     Py_DECREF(data);
1518     return code;
1519 }
1520 
1521 /* Get the code object associated with the module specified by
1522    'fullname'. */
1523 static PyObject *
get_module_code(ZipImporter * self,PyObject * fullname,int * p_ispackage,PyObject ** p_modpath)1524 get_module_code(ZipImporter *self, PyObject *fullname,
1525                 int *p_ispackage, PyObject **p_modpath)
1526 {
1527     PyObject *code = NULL, *toc_entry, *subname;
1528     PyObject *path, *fullpath = NULL;
1529     struct st_zip_searchorder *zso;
1530 
1531     if (self->prefix == NULL) {
1532         PyErr_SetString(PyExc_ValueError,
1533                         "zipimporter.__init__() wasn't called");
1534         return NULL;
1535     }
1536 
1537     subname = get_subname(fullname);
1538     if (subname == NULL)
1539         return NULL;
1540 
1541     path = make_filename(self->prefix, subname);
1542     Py_DECREF(subname);
1543     if (path == NULL)
1544         return NULL;
1545 
1546     for (zso = zip_searchorder; *zso->suffix; zso++) {
1547         code = NULL;
1548 
1549         fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
1550         if (fullpath == NULL)
1551             goto exit;
1552 
1553         if (Py_VerboseFlag > 1)
1554             PySys_FormatStderr("# trying %U%c%U\n",
1555                                self->archive, (int)SEP, fullpath);
1556         toc_entry = PyDict_GetItem(self->files, fullpath);
1557         if (toc_entry != NULL) {
1558             time_t mtime = 0;
1559             int ispackage = zso->type & IS_PACKAGE;
1560             int isbytecode = zso->type & IS_BYTECODE;
1561 
1562             if (isbytecode) {
1563                 mtime = get_mtime_of_source(self, fullpath);
1564                 if (mtime == (time_t)-1 && PyErr_Occurred()) {
1565                     goto exit;
1566                 }
1567             }
1568             Py_CLEAR(fullpath);
1569             if (p_ispackage != NULL)
1570                 *p_ispackage = ispackage;
1571             code = get_code_from_data(self, ispackage,
1572                                       isbytecode, mtime,
1573                                       toc_entry);
1574             if (code == Py_None) {
1575                 /* bad magic number or non-matching mtime
1576                    in byte code, try next */
1577                 Py_DECREF(code);
1578                 continue;
1579             }
1580             if (code != NULL && p_modpath != NULL) {
1581                 *p_modpath = PyTuple_GetItem(toc_entry, 0);
1582                 Py_INCREF(*p_modpath);
1583             }
1584             goto exit;
1585         }
1586         else
1587             Py_CLEAR(fullpath);
1588     }
1589     PyErr_Format(ZipImportError, "can't find module %R", fullname);
1590 exit:
1591     Py_DECREF(path);
1592     Py_XDECREF(fullpath);
1593     return code;
1594 }
1595 
1596 
1597 /* Module init */
1598 
1599 PyDoc_STRVAR(zipimport_doc,
1600 "zipimport provides support for importing Python modules from Zip archives.\n\
1601 \n\
1602 This module exports three objects:\n\
1603 - zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1604 - ZipImportError: exception raised by zipimporter objects. It's a\n\
1605   subclass of ImportError, so it can be caught as ImportError, too.\n\
1606 - _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1607   info dicts, as used in zipimporter._files.\n\
1608 \n\
1609 It is usually not needed to use the zipimport module explicitly; it is\n\
1610 used by the builtin import mechanism for sys.path items that are paths\n\
1611 to Zip archives.");
1612 
1613 static struct PyModuleDef zipimportmodule = {
1614     PyModuleDef_HEAD_INIT,
1615     "zipimport",
1616     zipimport_doc,
1617     -1,
1618     NULL,
1619     NULL,
1620     NULL,
1621     NULL,
1622     NULL
1623 };
1624 
1625 PyMODINIT_FUNC
PyInit_zipimport(void)1626 PyInit_zipimport(void)
1627 {
1628     PyObject *mod;
1629 
1630     if (PyType_Ready(&ZipImporter_Type) < 0)
1631         return NULL;
1632 
1633     /* Correct directory separator */
1634     zip_searchorder[0].suffix[0] = SEP;
1635     zip_searchorder[1].suffix[0] = SEP;
1636 
1637     mod = PyModule_Create(&zipimportmodule);
1638     if (mod == NULL)
1639         return NULL;
1640 
1641     ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1642                                         PyExc_ImportError, NULL);
1643     if (ZipImportError == NULL)
1644         return NULL;
1645 
1646     Py_INCREF(ZipImportError);
1647     if (PyModule_AddObject(mod, "ZipImportError",
1648                            ZipImportError) < 0)
1649         return NULL;
1650 
1651     Py_INCREF(&ZipImporter_Type);
1652     if (PyModule_AddObject(mod, "zipimporter",
1653                            (PyObject *)&ZipImporter_Type) < 0)
1654         return NULL;
1655 
1656     zip_directory_cache = PyDict_New();
1657     if (zip_directory_cache == NULL)
1658         return NULL;
1659     Py_INCREF(zip_directory_cache);
1660     if (PyModule_AddObject(mod, "_zip_directory_cache",
1661                            zip_directory_cache) < 0)
1662         return NULL;
1663     return mod;
1664 }
1665