• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""zipimport provides support for importing Python modules from Zip archives.
2
3This module exports two objects:
4- zipimporter: a class; its constructor takes a path to a Zip archive.
5- ZipImportError: exception raised by zipimporter objects. It's a
6  subclass of ImportError, so it can be caught as ImportError, too.
7
8It is usually not needed to use the zipimport module explicitly; it is
9used by the builtin import mechanism for sys.path items that are paths
10to Zip archives.
11"""
12
13#from importlib import _bootstrap_external
14#from importlib import _bootstrap  # for _verbose_message
15import _frozen_importlib_external as _bootstrap_external
16from _frozen_importlib_external import _unpack_uint16, _unpack_uint32, _unpack_uint64
17import _frozen_importlib as _bootstrap  # for _verbose_message
18import _imp  # for check_hash_based_pycs
19import _io  # for open
20import marshal  # for loads
21import sys  # for modules
22import time  # for mktime
23import _warnings  # For warn()
24
25__all__ = ['ZipImportError', 'zipimporter']
26
27
28path_sep = _bootstrap_external.path_sep
29alt_path_sep = _bootstrap_external.path_separators[1:]
30
31
32class ZipImportError(ImportError):
33    pass
34
35# _read_directory() cache
36_zip_directory_cache = {}
37
38_module_type = type(sys)
39
40END_CENTRAL_DIR_SIZE = 22
41END_CENTRAL_DIR_SIZE_64 = 56
42END_CENTRAL_DIR_LOCATOR_SIZE_64 = 20
43STRING_END_ARCHIVE = b'PK\x05\x06'  # standard EOCD signature
44STRING_END_LOCATOR_64 = b'PK\x06\x07'  # Zip64 EOCD Locator signature
45STRING_END_ZIP_64 = b'PK\x06\x06'  # Zip64 EOCD signature
46MAX_COMMENT_LEN = (1 << 16) - 1
47MAX_UINT32 = 0xffffffff
48ZIP64_EXTRA_TAG = 0x1
49
50class zipimporter(_bootstrap_external._LoaderBasics):
51    """zipimporter(archivepath) -> zipimporter object
52
53    Create a new zipimporter instance. 'archivepath' must be a path to
54    a zipfile, or to a specific path inside a zipfile. For example, it can be
55    '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a
56    valid directory inside the archive.
57
58    'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip
59    archive.
60
61    The 'archive' attribute of zipimporter objects contains the name of the
62    zipfile targeted.
63    """
64
65    # Split the "subdirectory" from the Zip archive path, lookup a matching
66    # entry in sys.path_importer_cache, fetch the file directory from there
67    # if found, or else read it from the archive.
68    def __init__(self, path):
69        if not isinstance(path, str):
70            raise TypeError(f"expected str, not {type(path)!r}")
71        if not path:
72            raise ZipImportError('archive path is empty', path=path)
73        if alt_path_sep:
74            path = path.replace(alt_path_sep, path_sep)
75
76        prefix = []
77        while True:
78            try:
79                st = _bootstrap_external._path_stat(path)
80            except (OSError, ValueError):
81                # On Windows a ValueError is raised for too long paths.
82                # Back up one path element.
83                dirname, basename = _bootstrap_external._path_split(path)
84                if dirname == path:
85                    raise ZipImportError('not a Zip file', path=path)
86                path = dirname
87                prefix.append(basename)
88            else:
89                # it exists
90                if (st.st_mode & 0o170000) != 0o100000:  # stat.S_ISREG
91                    # it's a not file
92                    raise ZipImportError('not a Zip file', path=path)
93                break
94
95        if path not in _zip_directory_cache:
96            _zip_directory_cache[path] = _read_directory(path)
97        self.archive = path
98        # a prefix directory following the ZIP file path.
99        self.prefix = _bootstrap_external._path_join(*prefix[::-1])
100        if self.prefix:
101            self.prefix += path_sep
102
103
104    def find_spec(self, fullname, target=None):
105        """Create a ModuleSpec for the specified module.
106
107        Returns None if the module cannot be found.
108        """
109        module_info = _get_module_info(self, fullname)
110        if module_info is not None:
111            return _bootstrap.spec_from_loader(fullname, self, is_package=module_info)
112        else:
113            # Not a module or regular package. See if this is a directory, and
114            # therefore possibly a portion of a namespace package.
115
116            # We're only interested in the last path component of fullname
117            # earlier components are recorded in self.prefix.
118            modpath = _get_module_path(self, fullname)
119            if _is_dir(self, modpath):
120                # This is possibly a portion of a namespace
121                # package. Return the string representing its path,
122                # without a trailing separator.
123                path = f'{self.archive}{path_sep}{modpath}'
124                spec = _bootstrap.ModuleSpec(name=fullname, loader=None,
125                                             is_package=True)
126                spec.submodule_search_locations.append(path)
127                return spec
128            else:
129                return None
130
131    def get_code(self, fullname):
132        """get_code(fullname) -> code object.
133
134        Return the code object for the specified module. Raise ZipImportError
135        if the module couldn't be imported.
136        """
137        code, ispackage, modpath = _get_module_code(self, fullname)
138        return code
139
140
141    def get_data(self, pathname):
142        """get_data(pathname) -> string with file data.
143
144        Return the data associated with 'pathname'. Raise OSError if
145        the file wasn't found.
146        """
147        if alt_path_sep:
148            pathname = pathname.replace(alt_path_sep, path_sep)
149
150        key = pathname
151        if pathname.startswith(self.archive + path_sep):
152            key = pathname[len(self.archive + path_sep):]
153
154        try:
155            toc_entry = self._get_files()[key]
156        except KeyError:
157            raise OSError(0, '', key)
158        return _get_data(self.archive, toc_entry)
159
160
161    # Return a string matching __file__ for the named module
162    def get_filename(self, fullname):
163        """get_filename(fullname) -> filename string.
164
165        Return the filename for the specified module or raise ZipImportError
166        if it couldn't be imported.
167        """
168        # Deciding the filename requires working out where the code
169        # would come from if the module was actually loaded
170        code, ispackage, modpath = _get_module_code(self, fullname)
171        return modpath
172
173
174    def get_source(self, fullname):
175        """get_source(fullname) -> source string.
176
177        Return the source code for the specified module. Raise ZipImportError
178        if the module couldn't be found, return None if the archive does
179        contain the module, but has no source for it.
180        """
181        mi = _get_module_info(self, fullname)
182        if mi is None:
183            raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
184
185        path = _get_module_path(self, fullname)
186        if mi:
187            fullpath = _bootstrap_external._path_join(path, '__init__.py')
188        else:
189            fullpath = f'{path}.py'
190
191        try:
192            toc_entry = self._get_files()[fullpath]
193        except KeyError:
194            # we have the module, but no source
195            return None
196        return _get_data(self.archive, toc_entry).decode()
197
198
199    # Return a bool signifying whether the module is a package or not.
200    def is_package(self, fullname):
201        """is_package(fullname) -> bool.
202
203        Return True if the module specified by fullname is a package.
204        Raise ZipImportError if the module couldn't be found.
205        """
206        mi = _get_module_info(self, fullname)
207        if mi is None:
208            raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
209        return mi
210
211
212    # Load and return the module named by 'fullname'.
213    def load_module(self, fullname):
214        """load_module(fullname) -> module.
215
216        Load the module specified by 'fullname'. 'fullname' must be the
217        fully qualified (dotted) module name. It returns the imported
218        module, or raises ZipImportError if it could not be imported.
219
220        Deprecated since Python 3.10. Use exec_module() instead.
221        """
222        msg = ("zipimport.zipimporter.load_module() is deprecated and slated for "
223               "removal in Python 3.12; use exec_module() instead")
224        _warnings.warn(msg, DeprecationWarning)
225        code, ispackage, modpath = _get_module_code(self, fullname)
226        mod = sys.modules.get(fullname)
227        if mod is None or not isinstance(mod, _module_type):
228            mod = _module_type(fullname)
229            sys.modules[fullname] = mod
230        mod.__loader__ = self
231
232        try:
233            if ispackage:
234                # add __path__ to the module *before* the code gets
235                # executed
236                path = _get_module_path(self, fullname)
237                fullpath = _bootstrap_external._path_join(self.archive, path)
238                mod.__path__ = [fullpath]
239
240            if not hasattr(mod, '__builtins__'):
241                mod.__builtins__ = __builtins__
242            _bootstrap_external._fix_up_module(mod.__dict__, fullname, modpath)
243            exec(code, mod.__dict__)
244        except:
245            del sys.modules[fullname]
246            raise
247
248        try:
249            mod = sys.modules[fullname]
250        except KeyError:
251            raise ImportError(f'Loaded module {fullname!r} not found in sys.modules')
252        _bootstrap._verbose_message('import {} # loaded from Zip {}', fullname, modpath)
253        return mod
254
255
256    def get_resource_reader(self, fullname):
257        """Return the ResourceReader for a module in a zip file."""
258        from importlib.readers import ZipReader
259
260        return ZipReader(self, fullname)
261
262
263    def _get_files(self):
264        """Return the files within the archive path."""
265        try:
266            files = _zip_directory_cache[self.archive]
267        except KeyError:
268            try:
269                files = _zip_directory_cache[self.archive] = _read_directory(self.archive)
270            except ZipImportError:
271                files = {}
272
273        return files
274
275
276    def invalidate_caches(self):
277        """Invalidates the cache of file data of the archive path."""
278        _zip_directory_cache.pop(self.archive, None)
279
280
281    def __repr__(self):
282        return f'<zipimporter object "{self.archive}{path_sep}{self.prefix}">'
283
284
285# _zip_searchorder defines how we search for a module in the Zip
286# archive: we first search for a package __init__, then for
287# non-package .pyc, and .py entries. The .pyc entries
288# are swapped by initzipimport() if we run in optimized mode. Also,
289# '/' is replaced by path_sep there.
290_zip_searchorder = (
291    (path_sep + '__init__.pyc', True, True),
292    (path_sep + '__init__.py', False, True),
293    ('.pyc', True, False),
294    ('.py', False, False),
295)
296
297# Given a module name, return the potential file path in the
298# archive (without extension).
299def _get_module_path(self, fullname):
300    return self.prefix + fullname.rpartition('.')[2]
301
302# Does this path represent a directory?
303def _is_dir(self, path):
304    # See if this is a "directory". If so, it's eligible to be part
305    # of a namespace package. We test by seeing if the name, with an
306    # appended path separator, exists.
307    dirpath = path + path_sep
308    # If dirpath is present in self._get_files(), we have a directory.
309    return dirpath in self._get_files()
310
311# Return some information about a module.
312def _get_module_info(self, fullname):
313    path = _get_module_path(self, fullname)
314    for suffix, isbytecode, ispackage in _zip_searchorder:
315        fullpath = path + suffix
316        if fullpath in self._get_files():
317            return ispackage
318    return None
319
320
321# implementation
322
323# _read_directory(archive) -> files dict (new reference)
324#
325# Given a path to a Zip archive, build a dict, mapping file names
326# (local to the archive, using SEP as a separator) to toc entries.
327#
328# A toc_entry is a tuple:
329#
330# (__file__,        # value to use for __file__, available for all files,
331#                   # encoded to the filesystem encoding
332#  compress,        # compression kind; 0 for uncompressed
333#  data_size,       # size of compressed data on disk
334#  file_size,       # size of decompressed data
335#  file_offset,     # offset of file header from start of archive
336#  time,            # mod time of file (in dos format)
337#  date,            # mod data of file (in dos format)
338#  crc,             # crc checksum of the data
339# )
340#
341# Directories can be recognized by the trailing path_sep in the name,
342# data_size and file_offset are 0.
343def _read_directory(archive):
344    try:
345        fp = _io.open_code(archive)
346    except OSError:
347        raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive)
348
349    with fp:
350        # GH-87235: On macOS all file descriptors for /dev/fd/N share the same
351        # file offset, reset the file offset after scanning the zipfile directory
352        # to not cause problems when some runs 'python3 /dev/fd/9 9<some_script'
353        start_offset = fp.tell()
354        try:
355            # Check if there's a comment.
356            try:
357                fp.seek(0, 2)
358                file_size = fp.tell()
359            except OSError:
360                raise ZipImportError(f"can't read Zip file: {archive!r}",
361                                     path=archive)
362            max_comment_plus_dirs_size = (
363                MAX_COMMENT_LEN + END_CENTRAL_DIR_SIZE +
364                END_CENTRAL_DIR_SIZE_64 + END_CENTRAL_DIR_LOCATOR_SIZE_64)
365            max_comment_start = max(file_size - max_comment_plus_dirs_size, 0)
366            try:
367                fp.seek(max_comment_start)
368                data = fp.read(max_comment_plus_dirs_size)
369            except OSError:
370                raise ZipImportError(f"can't read Zip file: {archive!r}",
371                                     path=archive)
372            pos = data.rfind(STRING_END_ARCHIVE)
373            pos64 = data.rfind(STRING_END_ZIP_64)
374
375            if (pos64 >= 0 and pos64+END_CENTRAL_DIR_SIZE_64+END_CENTRAL_DIR_LOCATOR_SIZE_64==pos):
376                # Zip64 at "correct" offset from standard EOCD
377                buffer = data[pos64:pos64 + END_CENTRAL_DIR_SIZE_64]
378                if len(buffer) != END_CENTRAL_DIR_SIZE_64:
379                    raise ZipImportError(
380                        f"corrupt Zip64 file: Expected {END_CENTRAL_DIR_SIZE_64} byte "
381                        f"zip64 central directory, but read {len(buffer)} bytes.",
382                        path=archive)
383                header_position = file_size - len(data) + pos64
384
385                central_directory_size = _unpack_uint64(buffer[40:48])
386                central_directory_position = _unpack_uint64(buffer[48:56])
387                num_entries = _unpack_uint64(buffer[24:32])
388            elif pos >= 0:
389                buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
390                if len(buffer) != END_CENTRAL_DIR_SIZE:
391                    raise ZipImportError(f"corrupt Zip file: {archive!r}",
392                                         path=archive)
393
394                header_position = file_size - len(data) + pos
395
396                # Buffer now contains a valid EOCD, and header_position gives the
397                # starting position of it.
398                central_directory_size = _unpack_uint32(buffer[12:16])
399                central_directory_position = _unpack_uint32(buffer[16:20])
400                num_entries = _unpack_uint16(buffer[8:10])
401
402                # N.b. if someday you want to prefer the standard (non-zip64) EOCD,
403                # you need to adjust position by 76 for arc to be 0.
404            else:
405                raise ZipImportError(f'not a Zip file: {archive!r}',
406                                     path=archive)
407
408            # Buffer now contains a valid EOCD, and header_position gives the
409            # starting position of it.
410            # XXX: These are cursory checks but are not as exact or strict as they
411            # could be.  Checking the arc-adjusted value is probably good too.
412            if header_position < central_directory_size:
413                raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
414            if header_position < central_directory_position:
415                raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
416            header_position -= central_directory_size
417            # On just-a-zipfile these values are the same and arc_offset is zero; if
418            # the file has some bytes prepended, `arc_offset` is the number of such
419            # bytes.  This is used for pex as well as self-extracting .exe.
420            arc_offset = header_position - central_directory_position
421            if arc_offset < 0:
422                raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)
423
424            files = {}
425            # Start of Central Directory
426            count = 0
427            try:
428                fp.seek(header_position)
429            except OSError:
430                raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
431            while True:
432                buffer = fp.read(46)
433                if len(buffer) < 4:
434                    raise EOFError('EOF read where not expected')
435                # Start of file header
436                if buffer[:4] != b'PK\x01\x02':
437                    if count != num_entries:
438                        raise ZipImportError(
439                            f"mismatched num_entries: {count} should be {num_entries} in {archive!r}",
440                            path=archive,
441                        )
442                    break                                # Bad: Central Dir File Header
443                if len(buffer) != 46:
444                    raise EOFError('EOF read where not expected')
445                flags = _unpack_uint16(buffer[8:10])
446                compress = _unpack_uint16(buffer[10:12])
447                time = _unpack_uint16(buffer[12:14])
448                date = _unpack_uint16(buffer[14:16])
449                crc = _unpack_uint32(buffer[16:20])
450                data_size = _unpack_uint32(buffer[20:24])
451                file_size = _unpack_uint32(buffer[24:28])
452                name_size = _unpack_uint16(buffer[28:30])
453                extra_size = _unpack_uint16(buffer[30:32])
454                comment_size = _unpack_uint16(buffer[32:34])
455                file_offset = _unpack_uint32(buffer[42:46])
456                header_size = name_size + extra_size + comment_size
457
458                try:
459                    name = fp.read(name_size)
460                except OSError:
461                    raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
462                if len(name) != name_size:
463                    raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
464                # On Windows, calling fseek to skip over the fields we don't use is
465                # slower than reading the data because fseek flushes stdio's
466                # internal buffers.    See issue #8745.
467                try:
468                    extra_data_len = header_size - name_size
469                    extra_data = memoryview(fp.read(extra_data_len))
470
471                    if len(extra_data) != extra_data_len:
472                        raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
473                except OSError:
474                    raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
475
476                if flags & 0x800:
477                    # UTF-8 file names extension
478                    name = name.decode()
479                else:
480                    # Historical ZIP filename encoding
481                    try:
482                        name = name.decode('ascii')
483                    except UnicodeDecodeError:
484                        name = name.decode('latin1').translate(cp437_table)
485
486                name = name.replace('/', path_sep)
487                path = _bootstrap_external._path_join(archive, name)
488
489                # Ordering matches unpacking below.
490                if (
491                    file_size == MAX_UINT32 or
492                    data_size == MAX_UINT32 or
493                    file_offset == MAX_UINT32
494                ):
495                    # need to decode extra_data looking for a zip64 extra (which might not
496                    # be present)
497                    while extra_data:
498                        if len(extra_data) < 4:
499                            raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
500                        tag = _unpack_uint16(extra_data[:2])
501                        size = _unpack_uint16(extra_data[2:4])
502                        if len(extra_data) < 4 + size:
503                            raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
504                        if tag == ZIP64_EXTRA_TAG:
505                            if (len(extra_data) - 4) % 8 != 0:
506                                raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
507                            num_extra_values = (len(extra_data) - 4) // 8
508                            if num_extra_values > 3:
509                                raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
510                            import struct
511                            values = list(struct.unpack_from(f"<{min(num_extra_values, 3)}Q",
512                                                             extra_data, offset=4))
513
514                            # N.b. Here be dragons: the ordering of these is different than
515                            # the header fields, and it's really easy to get it wrong since
516                            # naturally-occuring zips that use all 3 are >4GB
517                            if file_size == MAX_UINT32:
518                                file_size = values.pop(0)
519                            if data_size == MAX_UINT32:
520                                data_size = values.pop(0)
521                            if file_offset == MAX_UINT32:
522                                file_offset = values.pop(0)
523
524                            break
525
526                        # For a typical zip, this bytes-slicing only happens 2-3 times, on
527                        # small data like timestamps and filesizes.
528                        extra_data = extra_data[4+size:]
529                    else:
530                        _bootstrap._verbose_message(
531                            "zipimport: suspected zip64 but no zip64 extra for {!r}",
532                            path,
533                        )
534                # XXX These two statements seem swapped because `central_directory_position`
535                # is a position within the actual file, but `file_offset` (when compared) is
536                # as encoded in the entry, not adjusted for this file.
537                # N.b. this must be after we've potentially read the zip64 extra which can
538                # change `file_offset`.
539                if file_offset > central_directory_position:
540                    raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
541                file_offset += arc_offset
542
543                t = (path, compress, data_size, file_size, file_offset, time, date, crc)
544                files[name] = t
545                count += 1
546        finally:
547            fp.seek(start_offset)
548    _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive)
549    return files
550
551# During bootstrap, we may need to load the encodings
552# package from a ZIP file. But the cp437 encoding is implemented
553# in Python in the encodings package.
554#
555# Break out of this dependency by using the translation table for
556# the cp437 encoding.
557cp437_table = (
558    # ASCII part, 8 rows x 16 chars
559    '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
560    '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'
561    ' !"#$%&\'()*+,-./'
562    '0123456789:;<=>?'
563    '@ABCDEFGHIJKLMNO'
564    'PQRSTUVWXYZ[\\]^_'
565    '`abcdefghijklmno'
566    'pqrstuvwxyz{|}~\x7f'
567    # non-ASCII part, 16 rows x 8 chars
568    '\xc7\xfc\xe9\xe2\xe4\xe0\xe5\xe7'
569    '\xea\xeb\xe8\xef\xee\xec\xc4\xc5'
570    '\xc9\xe6\xc6\xf4\xf6\xf2\xfb\xf9'
571    '\xff\xd6\xdc\xa2\xa3\xa5\u20a7\u0192'
572    '\xe1\xed\xf3\xfa\xf1\xd1\xaa\xba'
573    '\xbf\u2310\xac\xbd\xbc\xa1\xab\xbb'
574    '\u2591\u2592\u2593\u2502\u2524\u2561\u2562\u2556'
575    '\u2555\u2563\u2551\u2557\u255d\u255c\u255b\u2510'
576    '\u2514\u2534\u252c\u251c\u2500\u253c\u255e\u255f'
577    '\u255a\u2554\u2569\u2566\u2560\u2550\u256c\u2567'
578    '\u2568\u2564\u2565\u2559\u2558\u2552\u2553\u256b'
579    '\u256a\u2518\u250c\u2588\u2584\u258c\u2590\u2580'
580    '\u03b1\xdf\u0393\u03c0\u03a3\u03c3\xb5\u03c4'
581    '\u03a6\u0398\u03a9\u03b4\u221e\u03c6\u03b5\u2229'
582    '\u2261\xb1\u2265\u2264\u2320\u2321\xf7\u2248'
583    '\xb0\u2219\xb7\u221a\u207f\xb2\u25a0\xa0'
584)
585
586_importing_zlib = False
587
588# Return the zlib.decompress function object, or NULL if zlib couldn't
589# be imported. The function is cached when found, so subsequent calls
590# don't import zlib again.
591def _get_decompress_func():
592    global _importing_zlib
593    if _importing_zlib:
594        # Someone has a zlib.py[co] in their Zip file
595        # let's avoid a stack overflow.
596        _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
597        raise ZipImportError("can't decompress data; zlib not available")
598
599    _importing_zlib = True
600    try:
601        from zlib import decompress
602    except Exception:
603        _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
604        raise ZipImportError("can't decompress data; zlib not available")
605    finally:
606        _importing_zlib = False
607
608    _bootstrap._verbose_message('zipimport: zlib available')
609    return decompress
610
611# Given a path to a Zip file and a toc_entry, return the (uncompressed) data.
612def _get_data(archive, toc_entry):
613    datapath, compress, data_size, file_size, file_offset, time, date, crc = toc_entry
614    if data_size < 0:
615        raise ZipImportError('negative data size')
616
617    with _io.open_code(archive) as fp:
618        # Check to make sure the local file header is correct
619        try:
620            fp.seek(file_offset)
621        except OSError:
622            raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
623        buffer = fp.read(30)
624        if len(buffer) != 30:
625            raise EOFError('EOF read where not expected')
626
627        if buffer[:4] != b'PK\x03\x04':
628            # Bad: Local File Header
629            raise ZipImportError(f'bad local file header: {archive!r}', path=archive)
630
631        name_size = _unpack_uint16(buffer[26:28])
632        extra_size = _unpack_uint16(buffer[28:30])
633        header_size = 30 + name_size + extra_size
634        file_offset += header_size  # Start of file data
635        try:
636            fp.seek(file_offset)
637        except OSError:
638            raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
639        raw_data = fp.read(data_size)
640        if len(raw_data) != data_size:
641            raise OSError("zipimport: can't read data")
642
643    if compress == 0:
644        # data is not compressed
645        return raw_data
646
647    # Decompress with zlib
648    try:
649        decompress = _get_decompress_func()
650    except Exception:
651        raise ZipImportError("can't decompress data; zlib not available")
652    return decompress(raw_data, -15)
653
654
655# Lenient date/time comparison function. The precision of the mtime
656# in the archive is lower than the mtime stored in a .pyc: we
657# must allow a difference of at most one second.
658def _eq_mtime(t1, t2):
659    # dostime only stores even seconds, so be lenient
660    return abs(t1 - t2) <= 1
661
662
663# Given the contents of a .py[co] file, unmarshal the data
664# and return the code object. Raises ImportError it the magic word doesn't
665# match, or if the recorded .py[co] metadata does not match the source.
666def _unmarshal_code(self, pathname, fullpath, fullname, data):
667    exc_details = {
668        'name': fullname,
669        'path': fullpath,
670    }
671
672    flags = _bootstrap_external._classify_pyc(data, fullname, exc_details)
673
674    hash_based = flags & 0b1 != 0
675    if hash_based:
676        check_source = flags & 0b10 != 0
677        if (_imp.check_hash_based_pycs != 'never' and
678                (check_source or _imp.check_hash_based_pycs == 'always')):
679            source_bytes = _get_pyc_source(self, fullpath)
680            if source_bytes is not None:
681                source_hash = _imp.source_hash(
682                    _bootstrap_external._RAW_MAGIC_NUMBER,
683                    source_bytes,
684                )
685
686                _bootstrap_external._validate_hash_pyc(
687                    data, source_hash, fullname, exc_details)
688    else:
689        source_mtime, source_size = \
690            _get_mtime_and_size_of_source(self, fullpath)
691
692        if source_mtime:
693            # We don't use _bootstrap_external._validate_timestamp_pyc
694            # to allow for a more lenient timestamp check.
695            if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or
696                    _unpack_uint32(data[12:16]) != source_size):
697                _bootstrap._verbose_message(
698                    f'bytecode is stale for {fullname!r}')
699                return None
700
701    code = marshal.loads(data[16:])
702    if not isinstance(code, _code_type):
703        raise TypeError(f'compiled module {pathname!r} is not a code object')
704    return code
705
706_code_type = type(_unmarshal_code.__code__)
707
708
709# Replace any occurrences of '\r\n?' in the input string with '\n'.
710# This converts DOS and Mac line endings to Unix line endings.
711def _normalize_line_endings(source):
712    source = source.replace(b'\r\n', b'\n')
713    source = source.replace(b'\r', b'\n')
714    return source
715
716# Given a string buffer containing Python source code, compile it
717# and return a code object.
718def _compile_source(pathname, source):
719    source = _normalize_line_endings(source)
720    return compile(source, pathname, 'exec', dont_inherit=True)
721
722# Convert the date/time values found in the Zip archive to a value
723# that's compatible with the time stamp stored in .pyc files.
724def _parse_dostime(d, t):
725    return time.mktime((
726        (d >> 9) + 1980,    # bits 9..15: year
727        (d >> 5) & 0xF,     # bits 5..8: month
728        d & 0x1F,           # bits 0..4: day
729        t >> 11,            # bits 11..15: hours
730        (t >> 5) & 0x3F,    # bits 8..10: minutes
731        (t & 0x1F) * 2,     # bits 0..7: seconds / 2
732        -1, -1, -1))
733
734# Given a path to a .pyc file in the archive, return the
735# modification time of the matching .py file and its size,
736# or (0, 0) if no source is available.
737def _get_mtime_and_size_of_source(self, path):
738    try:
739        # strip 'c' or 'o' from *.py[co]
740        assert path[-1:] in ('c', 'o')
741        path = path[:-1]
742        toc_entry = self._get_files()[path]
743        # fetch the time stamp of the .py file for comparison
744        # with an embedded pyc time stamp
745        time = toc_entry[5]
746        date = toc_entry[6]
747        uncompressed_size = toc_entry[3]
748        return _parse_dostime(date, time), uncompressed_size
749    except (KeyError, IndexError, TypeError):
750        return 0, 0
751
752
753# Given a path to a .pyc file in the archive, return the
754# contents of the matching .py file, or None if no source
755# is available.
756def _get_pyc_source(self, path):
757    # strip 'c' or 'o' from *.py[co]
758    assert path[-1:] in ('c', 'o')
759    path = path[:-1]
760
761    try:
762        toc_entry = self._get_files()[path]
763    except KeyError:
764        return None
765    else:
766        return _get_data(self.archive, toc_entry)
767
768
769# Get the code object associated with the module specified by
770# 'fullname'.
771def _get_module_code(self, fullname):
772    path = _get_module_path(self, fullname)
773    import_error = None
774    for suffix, isbytecode, ispackage in _zip_searchorder:
775        fullpath = path + suffix
776        _bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2)
777        try:
778            toc_entry = self._get_files()[fullpath]
779        except KeyError:
780            pass
781        else:
782            modpath = toc_entry[0]
783            data = _get_data(self.archive, toc_entry)
784            code = None
785            if isbytecode:
786                try:
787                    code = _unmarshal_code(self, modpath, fullpath, fullname, data)
788                except ImportError as exc:
789                    import_error = exc
790            else:
791                code = _compile_source(modpath, data)
792            if code is None:
793                # bad magic number or non-matching mtime
794                # in byte code, try next
795                continue
796            modpath = toc_entry[0]
797            return code, ispackage, modpath
798    else:
799        if import_error:
800            msg = f"module load failed: {import_error}"
801            raise ZipImportError(msg, name=fullname) from import_error
802        else:
803            raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
804