• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2Read and write ZIP files.
3
4XXX references to utf-8 need further investigation.
5"""
6import binascii
7import importlib.util
8import io
9import os
10import shutil
11import stat
12import struct
13import sys
14import threading
15import time
16
17try:
18    import zlib # We may need its compression method
19    crc32 = zlib.crc32
20except ImportError:
21    zlib = None
22    crc32 = binascii.crc32
23
24try:
25    import bz2 # We may need its compression method
26except ImportError:
27    bz2 = None
28
29try:
30    import lzma # We may need its compression method
31except ImportError:
32    lzma = None
33
34__all__ = ["BadZipFile", "BadZipfile", "error",
35           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
36           "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
37           "Path"]
38
39class BadZipFile(Exception):
40    pass
41
42
43class LargeZipFile(Exception):
44    """
45    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
46    and those extensions are disabled.
47    """
48
49error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
50
51
52ZIP64_LIMIT = (1 << 31) - 1
53ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
54ZIP_MAX_COMMENT = (1 << 16) - 1
55
56# constants for Zip file compression methods
57ZIP_STORED = 0
58ZIP_DEFLATED = 8
59ZIP_BZIP2 = 12
60ZIP_LZMA = 14
61# Other ZIP compression methods not supported
62
63DEFAULT_VERSION = 20
64ZIP64_VERSION = 45
65BZIP2_VERSION = 46
66LZMA_VERSION = 63
67# we recognize (but not necessarily support) all features up to that version
68MAX_EXTRACT_VERSION = 63
69
70# Below are some formats and associated data for reading/writing headers using
71# the struct module.  The names and structures of headers/records are those used
72# in the PKWARE description of the ZIP file format:
73#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
74# (URL valid as of January 2008)
75
76# The "end of central directory" structure, magic number, size, and indices
77# (section V.I in the format document)
78structEndArchive = b"<4s4H2LH"
79stringEndArchive = b"PK\005\006"
80sizeEndCentDir = struct.calcsize(structEndArchive)
81
82_ECD_SIGNATURE = 0
83_ECD_DISK_NUMBER = 1
84_ECD_DISK_START = 2
85_ECD_ENTRIES_THIS_DISK = 3
86_ECD_ENTRIES_TOTAL = 4
87_ECD_SIZE = 5
88_ECD_OFFSET = 6
89_ECD_COMMENT_SIZE = 7
90# These last two indices are not part of the structure as defined in the
91# spec, but they are used internally by this module as a convenience
92_ECD_COMMENT = 8
93_ECD_LOCATION = 9
94
95# The "central directory" structure, magic number, size, and indices
96# of entries in the structure (section V.F in the format document)
97structCentralDir = "<4s4B4HL2L5H2L"
98stringCentralDir = b"PK\001\002"
99sizeCentralDir = struct.calcsize(structCentralDir)
100
101# indexes of entries in the central directory structure
102_CD_SIGNATURE = 0
103_CD_CREATE_VERSION = 1
104_CD_CREATE_SYSTEM = 2
105_CD_EXTRACT_VERSION = 3
106_CD_EXTRACT_SYSTEM = 4
107_CD_FLAG_BITS = 5
108_CD_COMPRESS_TYPE = 6
109_CD_TIME = 7
110_CD_DATE = 8
111_CD_CRC = 9
112_CD_COMPRESSED_SIZE = 10
113_CD_UNCOMPRESSED_SIZE = 11
114_CD_FILENAME_LENGTH = 12
115_CD_EXTRA_FIELD_LENGTH = 13
116_CD_COMMENT_LENGTH = 14
117_CD_DISK_NUMBER_START = 15
118_CD_INTERNAL_FILE_ATTRIBUTES = 16
119_CD_EXTERNAL_FILE_ATTRIBUTES = 17
120_CD_LOCAL_HEADER_OFFSET = 18
121
122# General purpose bit flags
123# Zip Appnote: 4.4.4 general purpose bit flag: (2 bytes)
124_MASK_ENCRYPTED = 1 << 0
125# Bits 1 and 2 have different meanings depending on the compression used.
126_MASK_COMPRESS_OPTION_1 = 1 << 1
127# _MASK_COMPRESS_OPTION_2 = 1 << 2
128# _MASK_USE_DATA_DESCRIPTOR: If set, crc-32, compressed size and uncompressed
129# size are zero in the local header and the real values are written in the data
130# descriptor immediately following the compressed data.
131_MASK_USE_DATA_DESCRIPTOR = 1 << 3
132# Bit 4: Reserved for use with compression method 8, for enhanced deflating.
133# _MASK_RESERVED_BIT_4 = 1 << 4
134_MASK_COMPRESSED_PATCH = 1 << 5
135_MASK_STRONG_ENCRYPTION = 1 << 6
136# _MASK_UNUSED_BIT_7 = 1 << 7
137# _MASK_UNUSED_BIT_8 = 1 << 8
138# _MASK_UNUSED_BIT_9 = 1 << 9
139# _MASK_UNUSED_BIT_10 = 1 << 10
140_MASK_UTF_FILENAME = 1 << 11
141# Bit 12: Reserved by PKWARE for enhanced compression.
142# _MASK_RESERVED_BIT_12 = 1 << 12
143# _MASK_ENCRYPTED_CENTRAL_DIR = 1 << 13
144# Bit 14, 15: Reserved by PKWARE
145# _MASK_RESERVED_BIT_14 = 1 << 14
146# _MASK_RESERVED_BIT_15 = 1 << 15
147
148# The "local file header" structure, magic number, size, and indices
149# (section V.A in the format document)
150structFileHeader = "<4s2B4HL2L2H"
151stringFileHeader = b"PK\003\004"
152sizeFileHeader = struct.calcsize(structFileHeader)
153
154_FH_SIGNATURE = 0
155_FH_EXTRACT_VERSION = 1
156_FH_EXTRACT_SYSTEM = 2
157_FH_GENERAL_PURPOSE_FLAG_BITS = 3
158_FH_COMPRESSION_METHOD = 4
159_FH_LAST_MOD_TIME = 5
160_FH_LAST_MOD_DATE = 6
161_FH_CRC = 7
162_FH_COMPRESSED_SIZE = 8
163_FH_UNCOMPRESSED_SIZE = 9
164_FH_FILENAME_LENGTH = 10
165_FH_EXTRA_FIELD_LENGTH = 11
166
167# The "Zip64 end of central directory locator" structure, magic number, and size
168structEndArchive64Locator = "<4sLQL"
169stringEndArchive64Locator = b"PK\x06\x07"
170sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
171
172# The "Zip64 end of central directory" record, magic number, size, and indices
173# (section V.G in the format document)
174structEndArchive64 = "<4sQ2H2L4Q"
175stringEndArchive64 = b"PK\x06\x06"
176sizeEndCentDir64 = struct.calcsize(structEndArchive64)
177
178_CD64_SIGNATURE = 0
179_CD64_DIRECTORY_RECSIZE = 1
180_CD64_CREATE_VERSION = 2
181_CD64_EXTRACT_VERSION = 3
182_CD64_DISK_NUMBER = 4
183_CD64_DISK_NUMBER_START = 5
184_CD64_NUMBER_ENTRIES_THIS_DISK = 6
185_CD64_NUMBER_ENTRIES_TOTAL = 7
186_CD64_DIRECTORY_SIZE = 8
187_CD64_OFFSET_START_CENTDIR = 9
188
189_DD_SIGNATURE = 0x08074b50
190
191
192class _Extra(bytes):
193    FIELD_STRUCT = struct.Struct('<HH')
194
195    def __new__(cls, val, id=None):
196        return super().__new__(cls, val)
197
198    def __init__(self, val, id=None):
199        self.id = id
200
201    @classmethod
202    def read_one(cls, raw):
203        try:
204            xid, xlen = cls.FIELD_STRUCT.unpack(raw[:4])
205        except struct.error:
206            xid = None
207            xlen = 0
208        return cls(raw[:4+xlen], xid), raw[4+xlen:]
209
210    @classmethod
211    def split(cls, data):
212        # use memoryview for zero-copy slices
213        rest = memoryview(data)
214        while rest:
215            extra, rest = _Extra.read_one(rest)
216            yield extra
217
218    @classmethod
219    def strip(cls, data, xids):
220        """Remove Extra fields with specified IDs."""
221        return b''.join(
222            ex
223            for ex in cls.split(data)
224            if ex.id not in xids
225        )
226
227
228def _check_zipfile(fp):
229    try:
230        if _EndRecData(fp):
231            return True         # file has correct magic number
232    except OSError:
233        pass
234    return False
235
236def is_zipfile(filename):
237    """Quickly see if a file is a ZIP file by checking the magic number.
238
239    The filename argument may be a file or file-like object too.
240    """
241    result = False
242    try:
243        if hasattr(filename, "read"):
244            result = _check_zipfile(fp=filename)
245        else:
246            with open(filename, "rb") as fp:
247                result = _check_zipfile(fp)
248    except OSError:
249        pass
250    return result
251
252def _EndRecData64(fpin, offset, endrec):
253    """
254    Read the ZIP64 end-of-archive records and use that to update endrec
255    """
256    try:
257        fpin.seek(offset - sizeEndCentDir64Locator, 2)
258    except OSError:
259        # If the seek fails, the file is not large enough to contain a ZIP64
260        # end-of-archive record, so just return the end record we were given.
261        return endrec
262
263    data = fpin.read(sizeEndCentDir64Locator)
264    if len(data) != sizeEndCentDir64Locator:
265        return endrec
266    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
267    if sig != stringEndArchive64Locator:
268        return endrec
269
270    if diskno != 0 or disks > 1:
271        raise BadZipFile("zipfiles that span multiple disks are not supported")
272
273    # Assume no 'zip64 extensible data'
274    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
275    data = fpin.read(sizeEndCentDir64)
276    if len(data) != sizeEndCentDir64:
277        return endrec
278    sig, sz, create_version, read_version, disk_num, disk_dir, \
279        dircount, dircount2, dirsize, diroffset = \
280        struct.unpack(structEndArchive64, data)
281    if sig != stringEndArchive64:
282        return endrec
283
284    # Update the original endrec using data from the ZIP64 record
285    endrec[_ECD_SIGNATURE] = sig
286    endrec[_ECD_DISK_NUMBER] = disk_num
287    endrec[_ECD_DISK_START] = disk_dir
288    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
289    endrec[_ECD_ENTRIES_TOTAL] = dircount2
290    endrec[_ECD_SIZE] = dirsize
291    endrec[_ECD_OFFSET] = diroffset
292    return endrec
293
294
295def _EndRecData(fpin):
296    """Return data from the "End of Central Directory" record, or None.
297
298    The data is a list of the nine items in the ZIP "End of central dir"
299    record followed by a tenth item, the file seek offset of this record."""
300
301    # Determine file size
302    fpin.seek(0, 2)
303    filesize = fpin.tell()
304
305    # Check to see if this is ZIP file with no archive comment (the
306    # "end of central directory" structure should be the last item in the
307    # file if this is the case).
308    try:
309        fpin.seek(-sizeEndCentDir, 2)
310    except OSError:
311        return None
312    data = fpin.read(sizeEndCentDir)
313    if (len(data) == sizeEndCentDir and
314        data[0:4] == stringEndArchive and
315        data[-2:] == b"\000\000"):
316        # the signature is correct and there's no comment, unpack structure
317        endrec = struct.unpack(structEndArchive, data)
318        endrec=list(endrec)
319
320        # Append a blank comment and record start offset
321        endrec.append(b"")
322        endrec.append(filesize - sizeEndCentDir)
323
324        # Try to read the "Zip64 end of central directory" structure
325        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
326
327    # Either this is not a ZIP file, or it is a ZIP file with an archive
328    # comment.  Search the end of the file for the "end of central directory"
329    # record signature. The comment is the last item in the ZIP file and may be
330    # up to 64K long.  It is assumed that the "end of central directory" magic
331    # number does not appear in the comment.
332    maxCommentStart = max(filesize - ZIP_MAX_COMMENT - sizeEndCentDir, 0)
333    fpin.seek(maxCommentStart, 0)
334    data = fpin.read(ZIP_MAX_COMMENT + sizeEndCentDir)
335    start = data.rfind(stringEndArchive)
336    if start >= 0:
337        # found the magic number; attempt to unpack and interpret
338        recData = data[start:start+sizeEndCentDir]
339        if len(recData) != sizeEndCentDir:
340            # Zip file is corrupted.
341            return None
342        endrec = list(struct.unpack(structEndArchive, recData))
343        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
344        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
345        endrec.append(comment)
346        endrec.append(maxCommentStart + start)
347
348        # Try to read the "Zip64 end of central directory" structure
349        return _EndRecData64(fpin, maxCommentStart + start - filesize,
350                             endrec)
351
352    # Unable to find a valid end of central directory structure
353    return None
354
355def _sanitize_filename(filename):
356    """Terminate the file name at the first null byte and
357    ensure paths always use forward slashes as the directory separator."""
358
359    # Terminate the file name at the first null byte.  Null bytes in file
360    # names are used as tricks by viruses in archives.
361    null_byte = filename.find(chr(0))
362    if null_byte >= 0:
363        filename = filename[0:null_byte]
364    # This is used to ensure paths in generated ZIP files always use
365    # forward slashes as the directory separator, as required by the
366    # ZIP format specification.
367    if os.sep != "/" and os.sep in filename:
368        filename = filename.replace(os.sep, "/")
369    if os.altsep and os.altsep != "/" and os.altsep in filename:
370        filename = filename.replace(os.altsep, "/")
371    return filename
372
373
374class ZipInfo:
375    """Class with attributes describing each file in the ZIP archive."""
376
377    __slots__ = (
378        'orig_filename',
379        'filename',
380        'date_time',
381        'compress_type',
382        'compress_level',
383        'comment',
384        'extra',
385        'create_system',
386        'create_version',
387        'extract_version',
388        'reserved',
389        'flag_bits',
390        'volume',
391        'internal_attr',
392        'external_attr',
393        'header_offset',
394        'CRC',
395        'compress_size',
396        'file_size',
397        '_raw_time',
398        '_end_offset',
399    )
400
401    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
402        self.orig_filename = filename   # Original file name in archive
403
404        # Terminate the file name at the first null byte and
405        # ensure paths always use forward slashes as the directory separator.
406        filename = _sanitize_filename(filename)
407
408        self.filename = filename        # Normalized file name
409        self.date_time = date_time      # year, month, day, hour, min, sec
410
411        if date_time[0] < 1980:
412            raise ValueError('ZIP does not support timestamps before 1980')
413
414        # Standard values:
415        self.compress_type = ZIP_STORED # Type of compression for the file
416        self.compress_level = None      # Level for the compressor
417        self.comment = b""              # Comment for each file
418        self.extra = b""                # ZIP extra data
419        if sys.platform == 'win32':
420            self.create_system = 0          # System which created ZIP archive
421        else:
422            # Assume everything else is unix-y
423            self.create_system = 3          # System which created ZIP archive
424        self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
425        self.extract_version = DEFAULT_VERSION # Version needed to extract archive
426        self.reserved = 0               # Must be zero
427        self.flag_bits = 0              # ZIP flag bits
428        self.volume = 0                 # Volume number of file header
429        self.internal_attr = 0          # Internal attributes
430        self.external_attr = 0          # External file attributes
431        self.compress_size = 0          # Size of the compressed file
432        self.file_size = 0              # Size of the uncompressed file
433        self._end_offset = None         # Start of the next local header or central directory
434        # Other attributes are set by class ZipFile:
435        # header_offset         Byte offset to the file header
436        # CRC                   CRC-32 of the uncompressed file
437
438    # Maintain backward compatibility with the old protected attribute name.
439    @property
440    def _compresslevel(self):
441        return self.compress_level
442
443    @_compresslevel.setter
444    def _compresslevel(self, value):
445        self.compress_level = value
446
447    def __repr__(self):
448        result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
449        if self.compress_type != ZIP_STORED:
450            result.append(' compress_type=%s' %
451                          compressor_names.get(self.compress_type,
452                                               self.compress_type))
453        hi = self.external_attr >> 16
454        lo = self.external_attr & 0xFFFF
455        if hi:
456            result.append(' filemode=%r' % stat.filemode(hi))
457        if lo:
458            result.append(' external_attr=%#x' % lo)
459        isdir = self.is_dir()
460        if not isdir or self.file_size:
461            result.append(' file_size=%r' % self.file_size)
462        if ((not isdir or self.compress_size) and
463            (self.compress_type != ZIP_STORED or
464             self.file_size != self.compress_size)):
465            result.append(' compress_size=%r' % self.compress_size)
466        result.append('>')
467        return ''.join(result)
468
469    def FileHeader(self, zip64=None):
470        """Return the per-file header as a bytes object.
471
472        When the optional zip64 arg is None rather than a bool, we will
473        decide based upon the file_size and compress_size, if known,
474        False otherwise.
475        """
476        dt = self.date_time
477        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
478        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
479        if self.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
480            # Set these to zero because we write them after the file data
481            CRC = compress_size = file_size = 0
482        else:
483            CRC = self.CRC
484            compress_size = self.compress_size
485            file_size = self.file_size
486
487        extra = self.extra
488
489        min_version = 0
490        if zip64 is None:
491            # We always explicitly pass zip64 within this module.... This
492            # remains for anyone using ZipInfo.FileHeader as a public API.
493            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
494        if zip64:
495            fmt = '<HHQQ'
496            extra = extra + struct.pack(fmt,
497                                        1, struct.calcsize(fmt)-4, file_size, compress_size)
498            file_size = 0xffffffff
499            compress_size = 0xffffffff
500            min_version = ZIP64_VERSION
501
502        if self.compress_type == ZIP_BZIP2:
503            min_version = max(BZIP2_VERSION, min_version)
504        elif self.compress_type == ZIP_LZMA:
505            min_version = max(LZMA_VERSION, min_version)
506
507        self.extract_version = max(min_version, self.extract_version)
508        self.create_version = max(min_version, self.create_version)
509        filename, flag_bits = self._encodeFilenameFlags()
510        header = struct.pack(structFileHeader, stringFileHeader,
511                             self.extract_version, self.reserved, flag_bits,
512                             self.compress_type, dostime, dosdate, CRC,
513                             compress_size, file_size,
514                             len(filename), len(extra))
515        return header + filename + extra
516
517    def _encodeFilenameFlags(self):
518        try:
519            return self.filename.encode('ascii'), self.flag_bits
520        except UnicodeEncodeError:
521            return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME
522
523    def _decodeExtra(self, filename_crc):
524        # Try to decode the extra field.
525        extra = self.extra
526        unpack = struct.unpack
527        while len(extra) >= 4:
528            tp, ln = unpack('<HH', extra[:4])
529            if ln+4 > len(extra):
530                raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
531            if tp == 0x0001:
532                data = extra[4:ln+4]
533                # ZIP64 extension (large files and/or large archives)
534                try:
535                    if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
536                        field = "File size"
537                        self.file_size, = unpack('<Q', data[:8])
538                        data = data[8:]
539                    if self.compress_size == 0xFFFF_FFFF:
540                        field = "Compress size"
541                        self.compress_size, = unpack('<Q', data[:8])
542                        data = data[8:]
543                    if self.header_offset == 0xFFFF_FFFF:
544                        field = "Header offset"
545                        self.header_offset, = unpack('<Q', data[:8])
546                except struct.error:
547                    raise BadZipFile(f"Corrupt zip64 extra field. "
548                                     f"{field} not found.") from None
549            elif tp == 0x7075:
550                data = extra[4:ln+4]
551                # Unicode Path Extra Field
552                try:
553                    up_version, up_name_crc = unpack('<BL', data[:5])
554                    if up_version == 1 and up_name_crc == filename_crc:
555                        up_unicode_name = data[5:].decode('utf-8')
556                        if up_unicode_name:
557                            self.filename = _sanitize_filename(up_unicode_name)
558                        else:
559                            import warnings
560                            warnings.warn("Empty unicode path extra field (0x7075)", stacklevel=2)
561                except struct.error as e:
562                    raise BadZipFile("Corrupt unicode path extra field (0x7075)") from e
563                except UnicodeDecodeError as e:
564                    raise BadZipFile('Corrupt unicode path extra field (0x7075): invalid utf-8 bytes') from e
565
566            extra = extra[ln+4:]
567
568    @classmethod
569    def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
570        """Construct an appropriate ZipInfo for a file on the filesystem.
571
572        filename should be the path to a file or directory on the filesystem.
573
574        arcname is the name which it will have within the archive (by default,
575        this will be the same as filename, but without a drive letter and with
576        leading path separators removed).
577        """
578        if isinstance(filename, os.PathLike):
579            filename = os.fspath(filename)
580        st = os.stat(filename)
581        isdir = stat.S_ISDIR(st.st_mode)
582        mtime = time.localtime(st.st_mtime)
583        date_time = mtime[0:6]
584        if not strict_timestamps and date_time[0] < 1980:
585            date_time = (1980, 1, 1, 0, 0, 0)
586        elif not strict_timestamps and date_time[0] > 2107:
587            date_time = (2107, 12, 31, 23, 59, 59)
588        # Create ZipInfo instance to store file information
589        if arcname is None:
590            arcname = filename
591        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
592        while arcname[0] in (os.sep, os.altsep):
593            arcname = arcname[1:]
594        if isdir:
595            arcname += '/'
596        zinfo = cls(arcname, date_time)
597        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
598        if isdir:
599            zinfo.file_size = 0
600            zinfo.external_attr |= 0x10  # MS-DOS directory flag
601        else:
602            zinfo.file_size = st.st_size
603
604        return zinfo
605
606    def is_dir(self):
607        """Return True if this archive member is a directory."""
608        if self.filename.endswith('/'):
609            return True
610        # The ZIP format specification requires to use forward slashes
611        # as the directory separator, but in practice some ZIP files
612        # created on Windows can use backward slashes.  For compatibility
613        # with the extraction code which already handles this:
614        if os.path.altsep:
615            return self.filename.endswith((os.path.sep, os.path.altsep))
616        return False
617
618
619# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
620# internal keys. We noticed that a direct implementation is faster than
621# relying on binascii.crc32().
622
623_crctable = None
624def _gen_crc(crc):
625    for j in range(8):
626        if crc & 1:
627            crc = (crc >> 1) ^ 0xEDB88320
628        else:
629            crc >>= 1
630    return crc
631
632# ZIP supports a password-based form of encryption. Even though known
633# plaintext attacks have been found against it, it is still useful
634# to be able to get data out of such a file.
635#
636# Usage:
637#     zd = _ZipDecrypter(mypwd)
638#     plain_bytes = zd(cypher_bytes)
639
640def _ZipDecrypter(pwd):
641    key0 = 305419896
642    key1 = 591751049
643    key2 = 878082192
644
645    global _crctable
646    if _crctable is None:
647        _crctable = list(map(_gen_crc, range(256)))
648    crctable = _crctable
649
650    def crc32(ch, crc):
651        """Compute the CRC32 primitive on one byte."""
652        return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
653
654    def update_keys(c):
655        nonlocal key0, key1, key2
656        key0 = crc32(c, key0)
657        key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
658        key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
659        key2 = crc32(key1 >> 24, key2)
660
661    for p in pwd:
662        update_keys(p)
663
664    def decrypter(data):
665        """Decrypt a bytes object."""
666        result = bytearray()
667        append = result.append
668        for c in data:
669            k = key2 | 2
670            c ^= ((k * (k^1)) >> 8) & 0xFF
671            update_keys(c)
672            append(c)
673        return bytes(result)
674
675    return decrypter
676
677
678class LZMACompressor:
679
680    def __init__(self):
681        self._comp = None
682
683    def _init(self):
684        props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
685        self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
686            lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
687        ])
688        return struct.pack('<BBH', 9, 4, len(props)) + props
689
690    def compress(self, data):
691        if self._comp is None:
692            return self._init() + self._comp.compress(data)
693        return self._comp.compress(data)
694
695    def flush(self):
696        if self._comp is None:
697            return self._init() + self._comp.flush()
698        return self._comp.flush()
699
700
701class LZMADecompressor:
702
703    def __init__(self):
704        self._decomp = None
705        self._unconsumed = b''
706        self.eof = False
707
708    def decompress(self, data):
709        if self._decomp is None:
710            self._unconsumed += data
711            if len(self._unconsumed) <= 4:
712                return b''
713            psize, = struct.unpack('<H', self._unconsumed[2:4])
714            if len(self._unconsumed) <= 4 + psize:
715                return b''
716
717            self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
718                lzma._decode_filter_properties(lzma.FILTER_LZMA1,
719                                               self._unconsumed[4:4 + psize])
720            ])
721            data = self._unconsumed[4 + psize:]
722            del self._unconsumed
723
724        result = self._decomp.decompress(data)
725        self.eof = self._decomp.eof
726        return result
727
728
729compressor_names = {
730    0: 'store',
731    1: 'shrink',
732    2: 'reduce',
733    3: 'reduce',
734    4: 'reduce',
735    5: 'reduce',
736    6: 'implode',
737    7: 'tokenize',
738    8: 'deflate',
739    9: 'deflate64',
740    10: 'implode',
741    12: 'bzip2',
742    14: 'lzma',
743    18: 'terse',
744    19: 'lz77',
745    97: 'wavpack',
746    98: 'ppmd',
747}
748
749def _check_compression(compression):
750    if compression == ZIP_STORED:
751        pass
752    elif compression == ZIP_DEFLATED:
753        if not zlib:
754            raise RuntimeError(
755                "Compression requires the (missing) zlib module")
756    elif compression == ZIP_BZIP2:
757        if not bz2:
758            raise RuntimeError(
759                "Compression requires the (missing) bz2 module")
760    elif compression == ZIP_LZMA:
761        if not lzma:
762            raise RuntimeError(
763                "Compression requires the (missing) lzma module")
764    else:
765        raise NotImplementedError("That compression method is not supported")
766
767
768def _get_compressor(compress_type, compresslevel=None):
769    if compress_type == ZIP_DEFLATED:
770        if compresslevel is not None:
771            return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
772        return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
773    elif compress_type == ZIP_BZIP2:
774        if compresslevel is not None:
775            return bz2.BZ2Compressor(compresslevel)
776        return bz2.BZ2Compressor()
777    # compresslevel is ignored for ZIP_LZMA
778    elif compress_type == ZIP_LZMA:
779        return LZMACompressor()
780    else:
781        return None
782
783
784def _get_decompressor(compress_type):
785    _check_compression(compress_type)
786    if compress_type == ZIP_STORED:
787        return None
788    elif compress_type == ZIP_DEFLATED:
789        return zlib.decompressobj(-15)
790    elif compress_type == ZIP_BZIP2:
791        return bz2.BZ2Decompressor()
792    elif compress_type == ZIP_LZMA:
793        return LZMADecompressor()
794    else:
795        descr = compressor_names.get(compress_type)
796        if descr:
797            raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
798        else:
799            raise NotImplementedError("compression type %d" % (compress_type,))
800
801
802class _SharedFile:
803    def __init__(self, file, pos, close, lock, writing):
804        self._file = file
805        self._pos = pos
806        self._close = close
807        self._lock = lock
808        self._writing = writing
809        self.seekable = file.seekable
810
811    def tell(self):
812        return self._pos
813
814    def seek(self, offset, whence=0):
815        with self._lock:
816            if self._writing():
817                raise ValueError("Can't reposition in the ZIP file while "
818                        "there is an open writing handle on it. "
819                        "Close the writing handle before trying to read.")
820            self._file.seek(offset, whence)
821            self._pos = self._file.tell()
822            return self._pos
823
824    def read(self, n=-1):
825        with self._lock:
826            if self._writing():
827                raise ValueError("Can't read from the ZIP file while there "
828                        "is an open writing handle on it. "
829                        "Close the writing handle before trying to read.")
830            self._file.seek(self._pos)
831            data = self._file.read(n)
832            self._pos = self._file.tell()
833            return data
834
835    def close(self):
836        if self._file is not None:
837            fileobj = self._file
838            self._file = None
839            self._close(fileobj)
840
841# Provide the tell method for unseekable stream
842class _Tellable:
843    def __init__(self, fp):
844        self.fp = fp
845        self.offset = 0
846
847    def write(self, data):
848        n = self.fp.write(data)
849        self.offset += n
850        return n
851
852    def tell(self):
853        return self.offset
854
855    def flush(self):
856        self.fp.flush()
857
858    def close(self):
859        self.fp.close()
860
861
862class ZipExtFile(io.BufferedIOBase):
863    """File-like object for reading an archive member.
864       Is returned by ZipFile.open().
865    """
866
867    # Max size supported by decompressor.
868    MAX_N = 1 << 31 - 1
869
870    # Read from compressed files in 4k blocks.
871    MIN_READ_SIZE = 4096
872
873    # Chunk size to read during seek
874    MAX_SEEK_READ = 1 << 24
875
876    def __init__(self, fileobj, mode, zipinfo, pwd=None,
877                 close_fileobj=False):
878        self._fileobj = fileobj
879        self._pwd = pwd
880        self._close_fileobj = close_fileobj
881
882        self._compress_type = zipinfo.compress_type
883        self._compress_left = zipinfo.compress_size
884        self._left = zipinfo.file_size
885
886        self._decompressor = _get_decompressor(self._compress_type)
887
888        self._eof = False
889        self._readbuffer = b''
890        self._offset = 0
891
892        self.newlines = None
893
894        self.mode = mode
895        self.name = zipinfo.filename
896
897        if hasattr(zipinfo, 'CRC'):
898            self._expected_crc = zipinfo.CRC
899            self._running_crc = crc32(b'')
900        else:
901            self._expected_crc = None
902
903        self._seekable = False
904        try:
905            if fileobj.seekable():
906                self._orig_compress_start = fileobj.tell()
907                self._orig_compress_size = zipinfo.compress_size
908                self._orig_file_size = zipinfo.file_size
909                self._orig_start_crc = self._running_crc
910                self._orig_crc = self._expected_crc
911                self._seekable = True
912        except AttributeError:
913            pass
914
915        self._decrypter = None
916        if pwd:
917            if zipinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
918                # compare against the file type from extended local headers
919                check_byte = (zipinfo._raw_time >> 8) & 0xff
920            else:
921                # compare against the CRC otherwise
922                check_byte = (zipinfo.CRC >> 24) & 0xff
923            h = self._init_decrypter()
924            if h != check_byte:
925                raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
926
927
928    def _init_decrypter(self):
929        self._decrypter = _ZipDecrypter(self._pwd)
930        # The first 12 bytes in the cypher stream is an encryption header
931        #  used to strengthen the algorithm. The first 11 bytes are
932        #  completely random, while the 12th contains the MSB of the CRC,
933        #  or the MSB of the file time depending on the header type
934        #  and is used to check the correctness of the password.
935        header = self._fileobj.read(12)
936        self._compress_left -= 12
937        return self._decrypter(header)[11]
938
939    def __repr__(self):
940        result = ['<%s.%s' % (self.__class__.__module__,
941                              self.__class__.__qualname__)]
942        if not self.closed:
943            result.append(' name=%r' % (self.name,))
944            if self._compress_type != ZIP_STORED:
945                result.append(' compress_type=%s' %
946                              compressor_names.get(self._compress_type,
947                                                   self._compress_type))
948        else:
949            result.append(' [closed]')
950        result.append('>')
951        return ''.join(result)
952
953    def readline(self, limit=-1):
954        """Read and return a line from the stream.
955
956        If limit is specified, at most limit bytes will be read.
957        """
958
959        if limit < 0:
960            # Shortcut common case - newline found in buffer.
961            i = self._readbuffer.find(b'\n', self._offset) + 1
962            if i > 0:
963                line = self._readbuffer[self._offset: i]
964                self._offset = i
965                return line
966
967        return io.BufferedIOBase.readline(self, limit)
968
969    def peek(self, n=1):
970        """Returns buffered bytes without advancing the position."""
971        if n > len(self._readbuffer) - self._offset:
972            chunk = self.read(n)
973            if len(chunk) > self._offset:
974                self._readbuffer = chunk + self._readbuffer[self._offset:]
975                self._offset = 0
976            else:
977                self._offset -= len(chunk)
978
979        # Return up to 512 bytes to reduce allocation overhead for tight loops.
980        return self._readbuffer[self._offset: self._offset + 512]
981
982    def readable(self):
983        if self.closed:
984            raise ValueError("I/O operation on closed file.")
985        return True
986
987    def read(self, n=-1):
988        """Read and return up to n bytes.
989        If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
990        """
991        if self.closed:
992            raise ValueError("read from closed file.")
993        if n is None or n < 0:
994            buf = self._readbuffer[self._offset:]
995            self._readbuffer = b''
996            self._offset = 0
997            while not self._eof:
998                buf += self._read1(self.MAX_N)
999            return buf
1000
1001        end = n + self._offset
1002        if end < len(self._readbuffer):
1003            buf = self._readbuffer[self._offset:end]
1004            self._offset = end
1005            return buf
1006
1007        n = end - len(self._readbuffer)
1008        buf = self._readbuffer[self._offset:]
1009        self._readbuffer = b''
1010        self._offset = 0
1011        while n > 0 and not self._eof:
1012            data = self._read1(n)
1013            if n < len(data):
1014                self._readbuffer = data
1015                self._offset = n
1016                buf += data[:n]
1017                break
1018            buf += data
1019            n -= len(data)
1020        return buf
1021
1022    def _update_crc(self, newdata):
1023        # Update the CRC using the given data.
1024        if self._expected_crc is None:
1025            # No need to compute the CRC if we don't have a reference value
1026            return
1027        self._running_crc = crc32(newdata, self._running_crc)
1028        # Check the CRC if we're at the end of the file
1029        if self._eof and self._running_crc != self._expected_crc:
1030            raise BadZipFile("Bad CRC-32 for file %r" % self.name)
1031
1032    def read1(self, n):
1033        """Read up to n bytes with at most one read() system call."""
1034
1035        if n is None or n < 0:
1036            buf = self._readbuffer[self._offset:]
1037            self._readbuffer = b''
1038            self._offset = 0
1039            while not self._eof:
1040                data = self._read1(self.MAX_N)
1041                if data:
1042                    buf += data
1043                    break
1044            return buf
1045
1046        end = n + self._offset
1047        if end < len(self._readbuffer):
1048            buf = self._readbuffer[self._offset:end]
1049            self._offset = end
1050            return buf
1051
1052        n = end - len(self._readbuffer)
1053        buf = self._readbuffer[self._offset:]
1054        self._readbuffer = b''
1055        self._offset = 0
1056        if n > 0:
1057            while not self._eof:
1058                data = self._read1(n)
1059                if n < len(data):
1060                    self._readbuffer = data
1061                    self._offset = n
1062                    buf += data[:n]
1063                    break
1064                if data:
1065                    buf += data
1066                    break
1067        return buf
1068
1069    def _read1(self, n):
1070        # Read up to n compressed bytes with at most one read() system call,
1071        # decrypt and decompress them.
1072        if self._eof or n <= 0:
1073            return b''
1074
1075        # Read from file.
1076        if self._compress_type == ZIP_DEFLATED:
1077            ## Handle unconsumed data.
1078            data = self._decompressor.unconsumed_tail
1079            if n > len(data):
1080                data += self._read2(n - len(data))
1081        else:
1082            data = self._read2(n)
1083
1084        if self._compress_type == ZIP_STORED:
1085            self._eof = self._compress_left <= 0
1086        elif self._compress_type == ZIP_DEFLATED:
1087            n = max(n, self.MIN_READ_SIZE)
1088            data = self._decompressor.decompress(data, n)
1089            self._eof = (self._decompressor.eof or
1090                         self._compress_left <= 0 and
1091                         not self._decompressor.unconsumed_tail)
1092            if self._eof:
1093                data += self._decompressor.flush()
1094        else:
1095            data = self._decompressor.decompress(data)
1096            self._eof = self._decompressor.eof or self._compress_left <= 0
1097
1098        data = data[:self._left]
1099        self._left -= len(data)
1100        if self._left <= 0:
1101            self._eof = True
1102        self._update_crc(data)
1103        return data
1104
1105    def _read2(self, n):
1106        if self._compress_left <= 0:
1107            return b''
1108
1109        n = max(n, self.MIN_READ_SIZE)
1110        n = min(n, self._compress_left)
1111
1112        data = self._fileobj.read(n)
1113        self._compress_left -= len(data)
1114        if not data:
1115            raise EOFError
1116
1117        if self._decrypter is not None:
1118            data = self._decrypter(data)
1119        return data
1120
1121    def close(self):
1122        try:
1123            if self._close_fileobj:
1124                self._fileobj.close()
1125        finally:
1126            super().close()
1127
1128    def seekable(self):
1129        if self.closed:
1130            raise ValueError("I/O operation on closed file.")
1131        return self._seekable
1132
1133    def seek(self, offset, whence=os.SEEK_SET):
1134        if self.closed:
1135            raise ValueError("seek on closed file.")
1136        if not self._seekable:
1137            raise io.UnsupportedOperation("underlying stream is not seekable")
1138        curr_pos = self.tell()
1139        if whence == os.SEEK_SET:
1140            new_pos = offset
1141        elif whence == os.SEEK_CUR:
1142            new_pos = curr_pos + offset
1143        elif whence == os.SEEK_END:
1144            new_pos = self._orig_file_size + offset
1145        else:
1146            raise ValueError("whence must be os.SEEK_SET (0), "
1147                             "os.SEEK_CUR (1), or os.SEEK_END (2)")
1148
1149        if new_pos > self._orig_file_size:
1150            new_pos = self._orig_file_size
1151
1152        if new_pos < 0:
1153            new_pos = 0
1154
1155        read_offset = new_pos - curr_pos
1156        buff_offset = read_offset + self._offset
1157
1158        if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1159            # Just move the _offset index if the new position is in the _readbuffer
1160            self._offset = buff_offset
1161            read_offset = 0
1162        # Fast seek uncompressed unencrypted file
1163        elif self._compress_type == ZIP_STORED and self._decrypter is None and read_offset > 0:
1164            # disable CRC checking after first seeking - it would be invalid
1165            self._expected_crc = None
1166            # seek actual file taking already buffered data into account
1167            read_offset -= len(self._readbuffer) - self._offset
1168            self._fileobj.seek(read_offset, os.SEEK_CUR)
1169            self._left -= read_offset
1170            read_offset = 0
1171            # flush read buffer
1172            self._readbuffer = b''
1173            self._offset = 0
1174        elif read_offset < 0:
1175            # Position is before the current position. Reset the ZipExtFile
1176            self._fileobj.seek(self._orig_compress_start)
1177            self._running_crc = self._orig_start_crc
1178            self._expected_crc = self._orig_crc
1179            self._compress_left = self._orig_compress_size
1180            self._left = self._orig_file_size
1181            self._readbuffer = b''
1182            self._offset = 0
1183            self._decompressor = _get_decompressor(self._compress_type)
1184            self._eof = False
1185            read_offset = new_pos
1186            if self._decrypter is not None:
1187                self._init_decrypter()
1188
1189        while read_offset > 0:
1190            read_len = min(self.MAX_SEEK_READ, read_offset)
1191            self.read(read_len)
1192            read_offset -= read_len
1193
1194        return self.tell()
1195
1196    def tell(self):
1197        if self.closed:
1198            raise ValueError("tell on closed file.")
1199        if not self._seekable:
1200            raise io.UnsupportedOperation("underlying stream is not seekable")
1201        filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1202        return filepos
1203
1204
1205class _ZipWriteFile(io.BufferedIOBase):
1206    def __init__(self, zf, zinfo, zip64):
1207        self._zinfo = zinfo
1208        self._zip64 = zip64
1209        self._zipfile = zf
1210        self._compressor = _get_compressor(zinfo.compress_type,
1211                                           zinfo.compress_level)
1212        self._file_size = 0
1213        self._compress_size = 0
1214        self._crc = 0
1215
1216    @property
1217    def _fileobj(self):
1218        return self._zipfile.fp
1219
1220    @property
1221    def name(self):
1222        return self._zinfo.filename
1223
1224    @property
1225    def mode(self):
1226        return 'wb'
1227
1228    def writable(self):
1229        return True
1230
1231    def write(self, data):
1232        if self.closed:
1233            raise ValueError('I/O operation on closed file.')
1234
1235        # Accept any data that supports the buffer protocol
1236        if isinstance(data, (bytes, bytearray)):
1237            nbytes = len(data)
1238        else:
1239            data = memoryview(data)
1240            nbytes = data.nbytes
1241        self._file_size += nbytes
1242
1243        self._crc = crc32(data, self._crc)
1244        if self._compressor:
1245            data = self._compressor.compress(data)
1246            self._compress_size += len(data)
1247        self._fileobj.write(data)
1248        return nbytes
1249
1250    def close(self):
1251        if self.closed:
1252            return
1253        try:
1254            super().close()
1255            # Flush any data from the compressor, and update header info
1256            if self._compressor:
1257                buf = self._compressor.flush()
1258                self._compress_size += len(buf)
1259                self._fileobj.write(buf)
1260                self._zinfo.compress_size = self._compress_size
1261            else:
1262                self._zinfo.compress_size = self._file_size
1263            self._zinfo.CRC = self._crc
1264            self._zinfo.file_size = self._file_size
1265
1266            if not self._zip64:
1267                if self._file_size > ZIP64_LIMIT:
1268                    raise RuntimeError("File size too large, try using force_zip64")
1269                if self._compress_size > ZIP64_LIMIT:
1270                    raise RuntimeError("Compressed size too large, try using force_zip64")
1271
1272            # Write updated header info
1273            if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
1274                # Write CRC and file sizes after the file data
1275                fmt = '<LLQQ' if self._zip64 else '<LLLL'
1276                self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1277                    self._zinfo.compress_size, self._zinfo.file_size))
1278                self._zipfile.start_dir = self._fileobj.tell()
1279            else:
1280                # Seek backwards and write file header (which will now include
1281                # correct CRC and file sizes)
1282
1283                # Preserve current position in file
1284                self._zipfile.start_dir = self._fileobj.tell()
1285                self._fileobj.seek(self._zinfo.header_offset)
1286                self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1287                self._fileobj.seek(self._zipfile.start_dir)
1288
1289            # Successfully written: Add file to our caches
1290            self._zipfile.filelist.append(self._zinfo)
1291            self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1292        finally:
1293            self._zipfile._writing = False
1294
1295
1296
1297class ZipFile:
1298    """ Class with methods to open, read, write, close, list zip files.
1299
1300    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1301                compresslevel=None)
1302
1303    file: Either the path to the file, or a file-like object.
1304          If it is a path, the file will be opened and closed by ZipFile.
1305    mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1306          or append 'a'.
1307    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1308                 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1309    allowZip64: if True ZipFile will create files with ZIP64 extensions when
1310                needed, otherwise it will raise an exception when this would
1311                be necessary.
1312    compresslevel: None (default for the given compression type) or an integer
1313                   specifying the level to pass to the compressor.
1314                   When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1315                   When using ZIP_DEFLATED integers 0 through 9 are accepted.
1316                   When using ZIP_BZIP2 integers 1 through 9 are accepted.
1317
1318    """
1319
1320    fp = None                   # Set here since __del__ checks it
1321    _windows_illegal_name_trans_table = None
1322
1323    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1324                 compresslevel=None, *, strict_timestamps=True, metadata_encoding=None):
1325        """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1326        or append 'a'."""
1327        if mode not in ('r', 'w', 'x', 'a'):
1328            raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1329
1330        _check_compression(compression)
1331
1332        self._allowZip64 = allowZip64
1333        self._didModify = False
1334        self.debug = 0  # Level of printing: 0 through 3
1335        self.NameToInfo = {}    # Find file info given name
1336        self.filelist = []      # List of ZipInfo instances for archive
1337        self.compression = compression  # Method of compression
1338        self.compresslevel = compresslevel
1339        self.mode = mode
1340        self.pwd = None
1341        self._comment = b''
1342        self._strict_timestamps = strict_timestamps
1343        self.metadata_encoding = metadata_encoding
1344
1345        # Check that we don't try to write with nonconforming codecs
1346        if self.metadata_encoding and mode != 'r':
1347            raise ValueError(
1348                "metadata_encoding is only supported for reading files")
1349
1350        # Check if we were passed a file-like object
1351        if isinstance(file, os.PathLike):
1352            file = os.fspath(file)
1353        if isinstance(file, str):
1354            # No, it's a filename
1355            self._filePassed = 0
1356            self.filename = file
1357            modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1358                        'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1359            filemode = modeDict[mode]
1360            while True:
1361                try:
1362                    self.fp = io.open(file, filemode)
1363                except OSError:
1364                    if filemode in modeDict:
1365                        filemode = modeDict[filemode]
1366                        continue
1367                    raise
1368                break
1369        else:
1370            self._filePassed = 1
1371            self.fp = file
1372            self.filename = getattr(file, 'name', None)
1373        self._fileRefCnt = 1
1374        self._lock = threading.RLock()
1375        self._seekable = True
1376        self._writing = False
1377
1378        try:
1379            if mode == 'r':
1380                self._RealGetContents()
1381            elif mode in ('w', 'x'):
1382                # set the modified flag so central directory gets written
1383                # even if no files are added to the archive
1384                self._didModify = True
1385                try:
1386                    self.start_dir = self.fp.tell()
1387                except (AttributeError, OSError):
1388                    self.fp = _Tellable(self.fp)
1389                    self.start_dir = 0
1390                    self._seekable = False
1391                else:
1392                    # Some file-like objects can provide tell() but not seek()
1393                    try:
1394                        self.fp.seek(self.start_dir)
1395                    except (AttributeError, OSError):
1396                        self._seekable = False
1397            elif mode == 'a':
1398                try:
1399                    # See if file is a zip file
1400                    self._RealGetContents()
1401                    # seek to start of directory and overwrite
1402                    self.fp.seek(self.start_dir)
1403                except BadZipFile:
1404                    # file is not a zip file, just append
1405                    self.fp.seek(0, 2)
1406
1407                    # set the modified flag so central directory gets written
1408                    # even if no files are added to the archive
1409                    self._didModify = True
1410                    self.start_dir = self.fp.tell()
1411            else:
1412                raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1413        except:
1414            fp = self.fp
1415            self.fp = None
1416            self._fpclose(fp)
1417            raise
1418
1419    def __enter__(self):
1420        return self
1421
1422    def __exit__(self, type, value, traceback):
1423        self.close()
1424
1425    def __repr__(self):
1426        result = ['<%s.%s' % (self.__class__.__module__,
1427                              self.__class__.__qualname__)]
1428        if self.fp is not None:
1429            if self._filePassed:
1430                result.append(' file=%r' % self.fp)
1431            elif self.filename is not None:
1432                result.append(' filename=%r' % self.filename)
1433            result.append(' mode=%r' % self.mode)
1434        else:
1435            result.append(' [closed]')
1436        result.append('>')
1437        return ''.join(result)
1438
1439    def _RealGetContents(self):
1440        """Read in the table of contents for the ZIP file."""
1441        fp = self.fp
1442        try:
1443            endrec = _EndRecData(fp)
1444        except OSError:
1445            raise BadZipFile("File is not a zip file")
1446        if not endrec:
1447            raise BadZipFile("File is not a zip file")
1448        if self.debug > 1:
1449            print(endrec)
1450        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
1451        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
1452        self._comment = endrec[_ECD_COMMENT]    # archive comment
1453
1454        # "concat" is zero, unless zip was concatenated to another file
1455        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1456        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1457            # If Zip64 extension structures are present, account for them
1458            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1459
1460        if self.debug > 2:
1461            inferred = concat + offset_cd
1462            print("given, inferred, offset", offset_cd, inferred, concat)
1463        # self.start_dir:  Position of start of central directory
1464        self.start_dir = offset_cd + concat
1465        if self.start_dir < 0:
1466            raise BadZipFile("Bad offset for central directory")
1467        fp.seek(self.start_dir, 0)
1468        data = fp.read(size_cd)
1469        fp = io.BytesIO(data)
1470        total = 0
1471        while total < size_cd:
1472            centdir = fp.read(sizeCentralDir)
1473            if len(centdir) != sizeCentralDir:
1474                raise BadZipFile("Truncated central directory")
1475            centdir = struct.unpack(structCentralDir, centdir)
1476            if centdir[_CD_SIGNATURE] != stringCentralDir:
1477                raise BadZipFile("Bad magic number for central directory")
1478            if self.debug > 2:
1479                print(centdir)
1480            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1481            orig_filename_crc = crc32(filename)
1482            flags = centdir[_CD_FLAG_BITS]
1483            if flags & _MASK_UTF_FILENAME:
1484                # UTF-8 file names extension
1485                filename = filename.decode('utf-8')
1486            else:
1487                # Historical ZIP filename encoding
1488                filename = filename.decode(self.metadata_encoding or 'cp437')
1489            # Create ZipInfo instance to store file information
1490            x = ZipInfo(filename)
1491            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1492            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1493            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1494            (x.create_version, x.create_system, x.extract_version, x.reserved,
1495             x.flag_bits, x.compress_type, t, d,
1496             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1497            if x.extract_version > MAX_EXTRACT_VERSION:
1498                raise NotImplementedError("zip file version %.1f" %
1499                                          (x.extract_version / 10))
1500            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1501            # Convert date/time code to (year, month, day, hour, min, sec)
1502            x._raw_time = t
1503            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1504                            t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1505            x._decodeExtra(orig_filename_crc)
1506            x.header_offset = x.header_offset + concat
1507            self.filelist.append(x)
1508            self.NameToInfo[x.filename] = x
1509
1510            # update total bytes read from central directory
1511            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1512                     + centdir[_CD_EXTRA_FIELD_LENGTH]
1513                     + centdir[_CD_COMMENT_LENGTH])
1514
1515            if self.debug > 2:
1516                print("total", total)
1517
1518        end_offset = self.start_dir
1519        for zinfo in sorted(self.filelist,
1520                            key=lambda zinfo: zinfo.header_offset,
1521                            reverse=True):
1522            zinfo._end_offset = end_offset
1523            end_offset = zinfo.header_offset
1524
1525    def namelist(self):
1526        """Return a list of file names in the archive."""
1527        return [data.filename for data in self.filelist]
1528
1529    def infolist(self):
1530        """Return a list of class ZipInfo instances for files in the
1531        archive."""
1532        return self.filelist
1533
1534    def printdir(self, file=None):
1535        """Print a table of contents for the zip file."""
1536        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
1537              file=file)
1538        for zinfo in self.filelist:
1539            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1540            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1541                  file=file)
1542
1543    def testzip(self):
1544        """Read all the files and check the CRC.
1545
1546        Return None if all files could be read successfully, or the name
1547        of the offending file otherwise."""
1548        chunk_size = 2 ** 20
1549        for zinfo in self.filelist:
1550            try:
1551                # Read by chunks, to avoid an OverflowError or a
1552                # MemoryError with very large embedded files.
1553                with self.open(zinfo.filename, "r") as f:
1554                    while f.read(chunk_size):     # Check CRC-32
1555                        pass
1556            except BadZipFile:
1557                return zinfo.filename
1558
1559    def getinfo(self, name):
1560        """Return the instance of ZipInfo given 'name'."""
1561        info = self.NameToInfo.get(name)
1562        if info is None:
1563            raise KeyError(
1564                'There is no item named %r in the archive' % name)
1565
1566        return info
1567
1568    def setpassword(self, pwd):
1569        """Set default password for encrypted files."""
1570        if pwd and not isinstance(pwd, bytes):
1571            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1572        if pwd:
1573            self.pwd = pwd
1574        else:
1575            self.pwd = None
1576
1577    @property
1578    def comment(self):
1579        """The comment text associated with the ZIP file."""
1580        return self._comment
1581
1582    @comment.setter
1583    def comment(self, comment):
1584        if not isinstance(comment, bytes):
1585            raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1586        # check for valid comment length
1587        if len(comment) > ZIP_MAX_COMMENT:
1588            import warnings
1589            warnings.warn('Archive comment is too long; truncating to %d bytes'
1590                          % ZIP_MAX_COMMENT, stacklevel=2)
1591            comment = comment[:ZIP_MAX_COMMENT]
1592        self._comment = comment
1593        self._didModify = True
1594
1595    def read(self, name, pwd=None):
1596        """Return file bytes for name. 'pwd' is the password to decrypt
1597        encrypted files."""
1598        with self.open(name, "r", pwd) as fp:
1599            return fp.read()
1600
1601    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1602        """Return file-like object for 'name'.
1603
1604        name is a string for the file name within the ZIP file, or a ZipInfo
1605        object.
1606
1607        mode should be 'r' to read a file already in the ZIP file, or 'w' to
1608        write to a file newly added to the archive.
1609
1610        pwd is the password to decrypt files (only used for reading).
1611
1612        When writing, if the file size is not known in advance but may exceed
1613        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1614        files.  If the size is known in advance, it is best to pass a ZipInfo
1615        instance for name, with zinfo.file_size set.
1616        """
1617        if mode not in {"r", "w"}:
1618            raise ValueError('open() requires mode "r" or "w"')
1619        if pwd and (mode == "w"):
1620            raise ValueError("pwd is only supported for reading files")
1621        if not self.fp:
1622            raise ValueError(
1623                "Attempt to use ZIP archive that was already closed")
1624
1625        # Make sure we have an info object
1626        if isinstance(name, ZipInfo):
1627            # 'name' is already an info object
1628            zinfo = name
1629        elif mode == 'w':
1630            zinfo = ZipInfo(name)
1631            zinfo.compress_type = self.compression
1632            zinfo.compress_level = self.compresslevel
1633        else:
1634            # Get info object for name
1635            zinfo = self.getinfo(name)
1636
1637        if mode == 'w':
1638            return self._open_to_write(zinfo, force_zip64=force_zip64)
1639
1640        if self._writing:
1641            raise ValueError("Can't read from the ZIP file while there "
1642                    "is an open writing handle on it. "
1643                    "Close the writing handle before trying to read.")
1644
1645        # Open for reading:
1646        self._fileRefCnt += 1
1647        zef_file = _SharedFile(self.fp, zinfo.header_offset,
1648                               self._fpclose, self._lock, lambda: self._writing)
1649        try:
1650            # Skip the file header:
1651            fheader = zef_file.read(sizeFileHeader)
1652            if len(fheader) != sizeFileHeader:
1653                raise BadZipFile("Truncated file header")
1654            fheader = struct.unpack(structFileHeader, fheader)
1655            if fheader[_FH_SIGNATURE] != stringFileHeader:
1656                raise BadZipFile("Bad magic number for file header")
1657
1658            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1659            if fheader[_FH_EXTRA_FIELD_LENGTH]:
1660                zef_file.seek(fheader[_FH_EXTRA_FIELD_LENGTH], whence=1)
1661
1662            if zinfo.flag_bits & _MASK_COMPRESSED_PATCH:
1663                # Zip 2.7: compressed patched data
1664                raise NotImplementedError("compressed patched data (flag bit 5)")
1665
1666            if zinfo.flag_bits & _MASK_STRONG_ENCRYPTION:
1667                # strong encryption
1668                raise NotImplementedError("strong encryption (flag bit 6)")
1669
1670            if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & _MASK_UTF_FILENAME:
1671                # UTF-8 filename
1672                fname_str = fname.decode("utf-8")
1673            else:
1674                fname_str = fname.decode(self.metadata_encoding or "cp437")
1675
1676            if fname_str != zinfo.orig_filename:
1677                raise BadZipFile(
1678                    'File name in directory %r and header %r differ.'
1679                    % (zinfo.orig_filename, fname))
1680
1681            if (zinfo._end_offset is not None and
1682                zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
1683                raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)")
1684
1685            # check for encrypted flag & handle password
1686            is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
1687            if is_encrypted:
1688                if not pwd:
1689                    pwd = self.pwd
1690                if pwd and not isinstance(pwd, bytes):
1691                    raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1692                if not pwd:
1693                    raise RuntimeError("File %r is encrypted, password "
1694                                       "required for extraction" % name)
1695            else:
1696                pwd = None
1697
1698            return ZipExtFile(zef_file, mode + 'b', zinfo, pwd, True)
1699        except:
1700            zef_file.close()
1701            raise
1702
1703    def _open_to_write(self, zinfo, force_zip64=False):
1704        if force_zip64 and not self._allowZip64:
1705            raise ValueError(
1706                "force_zip64 is True, but allowZip64 was False when opening "
1707                "the ZIP file."
1708            )
1709        if self._writing:
1710            raise ValueError("Can't write to the ZIP file while there is "
1711                             "another write handle open on it. "
1712                             "Close the first handle before opening another.")
1713
1714        # Size and CRC are overwritten with correct data after processing the file
1715        zinfo.compress_size = 0
1716        zinfo.CRC = 0
1717
1718        zinfo.flag_bits = 0x00
1719        if zinfo.compress_type == ZIP_LZMA:
1720            # Compressed data includes an end-of-stream (EOS) marker
1721            zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
1722        if not self._seekable:
1723            zinfo.flag_bits |= _MASK_USE_DATA_DESCRIPTOR
1724
1725        if not zinfo.external_attr:
1726            zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
1727
1728        # Compressed size can be larger than uncompressed size
1729        zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT)
1730        if not self._allowZip64 and zip64:
1731            raise LargeZipFile("Filesize would require ZIP64 extensions")
1732
1733        if self._seekable:
1734            self.fp.seek(self.start_dir)
1735        zinfo.header_offset = self.fp.tell()
1736
1737        self._writecheck(zinfo)
1738        self._didModify = True
1739
1740        self.fp.write(zinfo.FileHeader(zip64))
1741
1742        self._writing = True
1743        return _ZipWriteFile(self, zinfo, zip64)
1744
1745    def extract(self, member, path=None, pwd=None):
1746        """Extract a member from the archive to the current working directory,
1747           using its full name. Its file information is extracted as accurately
1748           as possible. `member' may be a filename or a ZipInfo object. You can
1749           specify a different directory using `path'. You can specify the
1750           password to decrypt the file using 'pwd'.
1751        """
1752        if path is None:
1753            path = os.getcwd()
1754        else:
1755            path = os.fspath(path)
1756
1757        return self._extract_member(member, path, pwd)
1758
1759    def extractall(self, path=None, members=None, pwd=None):
1760        """Extract all members from the archive to the current working
1761           directory. `path' specifies a different directory to extract to.
1762           `members' is optional and must be a subset of the list returned
1763           by namelist(). You can specify the password to decrypt all files
1764           using 'pwd'.
1765        """
1766        if members is None:
1767            members = self.namelist()
1768
1769        if path is None:
1770            path = os.getcwd()
1771        else:
1772            path = os.fspath(path)
1773
1774        for zipinfo in members:
1775            self._extract_member(zipinfo, path, pwd)
1776
1777    @classmethod
1778    def _sanitize_windows_name(cls, arcname, pathsep):
1779        """Replace bad characters and remove trailing dots from parts."""
1780        table = cls._windows_illegal_name_trans_table
1781        if not table:
1782            illegal = ':<>|"?*'
1783            table = str.maketrans(illegal, '_' * len(illegal))
1784            cls._windows_illegal_name_trans_table = table
1785        arcname = arcname.translate(table)
1786        # remove trailing dots and spaces
1787        arcname = (x.rstrip(' .') for x in arcname.split(pathsep))
1788        # rejoin, removing empty parts.
1789        arcname = pathsep.join(x for x in arcname if x)
1790        return arcname
1791
1792    def _extract_member(self, member, targetpath, pwd):
1793        """Extract the ZipInfo object 'member' to a physical
1794           file on the path targetpath.
1795        """
1796        if not isinstance(member, ZipInfo):
1797            member = self.getinfo(member)
1798
1799        # build the destination pathname, replacing
1800        # forward slashes to platform specific separators.
1801        arcname = member.filename.replace('/', os.path.sep)
1802
1803        if os.path.altsep:
1804            arcname = arcname.replace(os.path.altsep, os.path.sep)
1805        # interpret absolute pathname as relative, remove drive letter or
1806        # UNC path, redundant separators, "." and ".." components.
1807        arcname = os.path.splitdrive(arcname)[1]
1808        invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1809        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1810                                   if x not in invalid_path_parts)
1811        if os.path.sep == '\\':
1812            # filter illegal characters on Windows
1813            arcname = self._sanitize_windows_name(arcname, os.path.sep)
1814
1815        if not arcname and not member.is_dir():
1816            raise ValueError("Empty filename.")
1817
1818        targetpath = os.path.join(targetpath, arcname)
1819        targetpath = os.path.normpath(targetpath)
1820
1821        # Create all upper directories if necessary.
1822        upperdirs = os.path.dirname(targetpath)
1823        if upperdirs and not os.path.exists(upperdirs):
1824            os.makedirs(upperdirs, exist_ok=True)
1825
1826        if member.is_dir():
1827            if not os.path.isdir(targetpath):
1828                try:
1829                    os.mkdir(targetpath)
1830                except FileExistsError:
1831                    if not os.path.isdir(targetpath):
1832                        raise
1833            return targetpath
1834
1835        with self.open(member, pwd=pwd) as source, \
1836             open(targetpath, "wb") as target:
1837            shutil.copyfileobj(source, target)
1838
1839        return targetpath
1840
1841    def _writecheck(self, zinfo):
1842        """Check for errors before writing a file to the archive."""
1843        if zinfo.filename in self.NameToInfo:
1844            import warnings
1845            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1846        if self.mode not in ('w', 'x', 'a'):
1847            raise ValueError("write() requires mode 'w', 'x', or 'a'")
1848        if not self.fp:
1849            raise ValueError(
1850                "Attempt to write ZIP archive that was already closed")
1851        _check_compression(zinfo.compress_type)
1852        if not self._allowZip64:
1853            requires_zip64 = None
1854            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1855                requires_zip64 = "Files count"
1856            elif zinfo.file_size > ZIP64_LIMIT:
1857                requires_zip64 = "Filesize"
1858            elif zinfo.header_offset > ZIP64_LIMIT:
1859                requires_zip64 = "Zipfile size"
1860            if requires_zip64:
1861                raise LargeZipFile(requires_zip64 +
1862                                   " would require ZIP64 extensions")
1863
1864    def write(self, filename, arcname=None,
1865              compress_type=None, compresslevel=None):
1866        """Put the bytes from filename into the archive under the name
1867        arcname."""
1868        if not self.fp:
1869            raise ValueError(
1870                "Attempt to write to ZIP archive that was already closed")
1871        if self._writing:
1872            raise ValueError(
1873                "Can't write to ZIP archive while an open writing handle exists"
1874            )
1875
1876        zinfo = ZipInfo.from_file(filename, arcname,
1877                                  strict_timestamps=self._strict_timestamps)
1878
1879        if zinfo.is_dir():
1880            zinfo.compress_size = 0
1881            zinfo.CRC = 0
1882            self.mkdir(zinfo)
1883        else:
1884            if compress_type is not None:
1885                zinfo.compress_type = compress_type
1886            else:
1887                zinfo.compress_type = self.compression
1888
1889            if compresslevel is not None:
1890                zinfo.compress_level = compresslevel
1891            else:
1892                zinfo.compress_level = self.compresslevel
1893
1894            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1895                shutil.copyfileobj(src, dest, 1024*8)
1896
1897    def writestr(self, zinfo_or_arcname, data,
1898                 compress_type=None, compresslevel=None):
1899        """Write a file into the archive.  The contents is 'data', which
1900        may be either a 'str' or a 'bytes' instance; if it is a 'str',
1901        it is encoded as UTF-8 first.
1902        'zinfo_or_arcname' is either a ZipInfo instance or
1903        the name of the file in the archive."""
1904        if isinstance(data, str):
1905            data = data.encode("utf-8")
1906        if not isinstance(zinfo_or_arcname, ZipInfo):
1907            zinfo = ZipInfo(filename=zinfo_or_arcname,
1908                            date_time=time.localtime(time.time())[:6])
1909            zinfo.compress_type = self.compression
1910            zinfo.compress_level = self.compresslevel
1911            if zinfo.filename.endswith('/'):
1912                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1913                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1914            else:
1915                zinfo.external_attr = 0o600 << 16     # ?rw-------
1916        else:
1917            zinfo = zinfo_or_arcname
1918
1919        if not self.fp:
1920            raise ValueError(
1921                "Attempt to write to ZIP archive that was already closed")
1922        if self._writing:
1923            raise ValueError(
1924                "Can't write to ZIP archive while an open writing handle exists."
1925            )
1926
1927        if compress_type is not None:
1928            zinfo.compress_type = compress_type
1929
1930        if compresslevel is not None:
1931            zinfo.compress_level = compresslevel
1932
1933        zinfo.file_size = len(data)            # Uncompressed size
1934        with self._lock:
1935            with self.open(zinfo, mode='w') as dest:
1936                dest.write(data)
1937
1938    def mkdir(self, zinfo_or_directory_name, mode=511):
1939        """Creates a directory inside the zip archive."""
1940        if isinstance(zinfo_or_directory_name, ZipInfo):
1941            zinfo = zinfo_or_directory_name
1942            if not zinfo.is_dir():
1943                raise ValueError("The given ZipInfo does not describe a directory")
1944        elif isinstance(zinfo_or_directory_name, str):
1945            directory_name = zinfo_or_directory_name
1946            if not directory_name.endswith("/"):
1947                directory_name += "/"
1948            zinfo = ZipInfo(directory_name)
1949            zinfo.compress_size = 0
1950            zinfo.CRC = 0
1951            zinfo.external_attr = ((0o40000 | mode) & 0xFFFF) << 16
1952            zinfo.file_size = 0
1953            zinfo.external_attr |= 0x10
1954        else:
1955            raise TypeError("Expected type str or ZipInfo")
1956
1957        with self._lock:
1958            if self._seekable:
1959                self.fp.seek(self.start_dir)
1960            zinfo.header_offset = self.fp.tell()  # Start of header bytes
1961            if zinfo.compress_type == ZIP_LZMA:
1962            # Compressed data includes an end-of-stream (EOS) marker
1963                zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
1964
1965            self._writecheck(zinfo)
1966            self._didModify = True
1967
1968            self.filelist.append(zinfo)
1969            self.NameToInfo[zinfo.filename] = zinfo
1970            self.fp.write(zinfo.FileHeader(False))
1971            self.start_dir = self.fp.tell()
1972
1973    def __del__(self):
1974        """Call the "close()" method in case the user forgot."""
1975        self.close()
1976
1977    def close(self):
1978        """Close the file, and for mode 'w', 'x' and 'a' write the ending
1979        records."""
1980        if self.fp is None:
1981            return
1982
1983        if self._writing:
1984            raise ValueError("Can't close the ZIP file while there is "
1985                             "an open writing handle on it. "
1986                             "Close the writing handle before closing the zip.")
1987
1988        try:
1989            if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1990                with self._lock:
1991                    if self._seekable:
1992                        self.fp.seek(self.start_dir)
1993                    self._write_end_record()
1994        finally:
1995            fp = self.fp
1996            self.fp = None
1997            self._fpclose(fp)
1998
1999    def _write_end_record(self):
2000        for zinfo in self.filelist:         # write central directory
2001            dt = zinfo.date_time
2002            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
2003            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
2004            extra = []
2005            if zinfo.file_size > ZIP64_LIMIT \
2006               or zinfo.compress_size > ZIP64_LIMIT:
2007                extra.append(zinfo.file_size)
2008                extra.append(zinfo.compress_size)
2009                file_size = 0xffffffff
2010                compress_size = 0xffffffff
2011            else:
2012                file_size = zinfo.file_size
2013                compress_size = zinfo.compress_size
2014
2015            if zinfo.header_offset > ZIP64_LIMIT:
2016                extra.append(zinfo.header_offset)
2017                header_offset = 0xffffffff
2018            else:
2019                header_offset = zinfo.header_offset
2020
2021            extra_data = zinfo.extra
2022            min_version = 0
2023            if extra:
2024                # Append a ZIP64 field to the extra's
2025                extra_data = _Extra.strip(extra_data, (1,))
2026                extra_data = struct.pack(
2027                    '<HH' + 'Q'*len(extra),
2028                    1, 8*len(extra), *extra) + extra_data
2029
2030                min_version = ZIP64_VERSION
2031
2032            if zinfo.compress_type == ZIP_BZIP2:
2033                min_version = max(BZIP2_VERSION, min_version)
2034            elif zinfo.compress_type == ZIP_LZMA:
2035                min_version = max(LZMA_VERSION, min_version)
2036
2037            extract_version = max(min_version, zinfo.extract_version)
2038            create_version = max(min_version, zinfo.create_version)
2039            filename, flag_bits = zinfo._encodeFilenameFlags()
2040            centdir = struct.pack(structCentralDir,
2041                                  stringCentralDir, create_version,
2042                                  zinfo.create_system, extract_version, zinfo.reserved,
2043                                  flag_bits, zinfo.compress_type, dostime, dosdate,
2044                                  zinfo.CRC, compress_size, file_size,
2045                                  len(filename), len(extra_data), len(zinfo.comment),
2046                                  0, zinfo.internal_attr, zinfo.external_attr,
2047                                  header_offset)
2048            self.fp.write(centdir)
2049            self.fp.write(filename)
2050            self.fp.write(extra_data)
2051            self.fp.write(zinfo.comment)
2052
2053        pos2 = self.fp.tell()
2054        # Write end-of-zip-archive record
2055        centDirCount = len(self.filelist)
2056        centDirSize = pos2 - self.start_dir
2057        centDirOffset = self.start_dir
2058        requires_zip64 = None
2059        if centDirCount > ZIP_FILECOUNT_LIMIT:
2060            requires_zip64 = "Files count"
2061        elif centDirOffset > ZIP64_LIMIT:
2062            requires_zip64 = "Central directory offset"
2063        elif centDirSize > ZIP64_LIMIT:
2064            requires_zip64 = "Central directory size"
2065        if requires_zip64:
2066            # Need to write the ZIP64 end-of-archive records
2067            if not self._allowZip64:
2068                raise LargeZipFile(requires_zip64 +
2069                                   " would require ZIP64 extensions")
2070            zip64endrec = struct.pack(
2071                structEndArchive64, stringEndArchive64,
2072                44, 45, 45, 0, 0, centDirCount, centDirCount,
2073                centDirSize, centDirOffset)
2074            self.fp.write(zip64endrec)
2075
2076            zip64locrec = struct.pack(
2077                structEndArchive64Locator,
2078                stringEndArchive64Locator, 0, pos2, 1)
2079            self.fp.write(zip64locrec)
2080            centDirCount = min(centDirCount, 0xFFFF)
2081            centDirSize = min(centDirSize, 0xFFFFFFFF)
2082            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
2083
2084        endrec = struct.pack(structEndArchive, stringEndArchive,
2085                             0, 0, centDirCount, centDirCount,
2086                             centDirSize, centDirOffset, len(self._comment))
2087        self.fp.write(endrec)
2088        self.fp.write(self._comment)
2089        if self.mode == "a":
2090            self.fp.truncate()
2091        self.fp.flush()
2092
2093    def _fpclose(self, fp):
2094        assert self._fileRefCnt > 0
2095        self._fileRefCnt -= 1
2096        if not self._fileRefCnt and not self._filePassed:
2097            fp.close()
2098
2099
2100class PyZipFile(ZipFile):
2101    """Class to create ZIP archives with Python library files and packages."""
2102
2103    def __init__(self, file, mode="r", compression=ZIP_STORED,
2104                 allowZip64=True, optimize=-1):
2105        ZipFile.__init__(self, file, mode=mode, compression=compression,
2106                         allowZip64=allowZip64)
2107        self._optimize = optimize
2108
2109    def writepy(self, pathname, basename="", filterfunc=None):
2110        """Add all files from "pathname" to the ZIP archive.
2111
2112        If pathname is a package directory, search the directory and
2113        all package subdirectories recursively for all *.py and enter
2114        the modules into the archive.  If pathname is a plain
2115        directory, listdir *.py and enter all modules.  Else, pathname
2116        must be a Python *.py file and the module will be put into the
2117        archive.  Added modules are always module.pyc.
2118        This method will compile the module.py into module.pyc if
2119        necessary.
2120        If filterfunc(pathname) is given, it is called with every argument.
2121        When it is False, the file or directory is skipped.
2122        """
2123        pathname = os.fspath(pathname)
2124        if filterfunc and not filterfunc(pathname):
2125            if self.debug:
2126                label = 'path' if os.path.isdir(pathname) else 'file'
2127                print('%s %r skipped by filterfunc' % (label, pathname))
2128            return
2129        dir, name = os.path.split(pathname)
2130        if os.path.isdir(pathname):
2131            initname = os.path.join(pathname, "__init__.py")
2132            if os.path.isfile(initname):
2133                # This is a package directory, add it
2134                if basename:
2135                    basename = "%s/%s" % (basename, name)
2136                else:
2137                    basename = name
2138                if self.debug:
2139                    print("Adding package in", pathname, "as", basename)
2140                fname, arcname = self._get_codename(initname[0:-3], basename)
2141                if self.debug:
2142                    print("Adding", arcname)
2143                self.write(fname, arcname)
2144                dirlist = sorted(os.listdir(pathname))
2145                dirlist.remove("__init__.py")
2146                # Add all *.py files and package subdirectories
2147                for filename in dirlist:
2148                    path = os.path.join(pathname, filename)
2149                    root, ext = os.path.splitext(filename)
2150                    if os.path.isdir(path):
2151                        if os.path.isfile(os.path.join(path, "__init__.py")):
2152                            # This is a package directory, add it
2153                            self.writepy(path, basename,
2154                                         filterfunc=filterfunc)  # Recursive call
2155                    elif ext == ".py":
2156                        if filterfunc and not filterfunc(path):
2157                            if self.debug:
2158                                print('file %r skipped by filterfunc' % path)
2159                            continue
2160                        fname, arcname = self._get_codename(path[0:-3],
2161                                                            basename)
2162                        if self.debug:
2163                            print("Adding", arcname)
2164                        self.write(fname, arcname)
2165            else:
2166                # This is NOT a package directory, add its files at top level
2167                if self.debug:
2168                    print("Adding files from directory", pathname)
2169                for filename in sorted(os.listdir(pathname)):
2170                    path = os.path.join(pathname, filename)
2171                    root, ext = os.path.splitext(filename)
2172                    if ext == ".py":
2173                        if filterfunc and not filterfunc(path):
2174                            if self.debug:
2175                                print('file %r skipped by filterfunc' % path)
2176                            continue
2177                        fname, arcname = self._get_codename(path[0:-3],
2178                                                            basename)
2179                        if self.debug:
2180                            print("Adding", arcname)
2181                        self.write(fname, arcname)
2182        else:
2183            if pathname[-3:] != ".py":
2184                raise RuntimeError(
2185                    'Files added with writepy() must end with ".py"')
2186            fname, arcname = self._get_codename(pathname[0:-3], basename)
2187            if self.debug:
2188                print("Adding file", arcname)
2189            self.write(fname, arcname)
2190
2191    def _get_codename(self, pathname, basename):
2192        """Return (filename, archivename) for the path.
2193
2194        Given a module name path, return the correct file path and
2195        archive name, compiling if necessary.  For example, given
2196        /python/lib/string, return (/python/lib/string.pyc, string).
2197        """
2198        def _compile(file, optimize=-1):
2199            import py_compile
2200            if self.debug:
2201                print("Compiling", file)
2202            try:
2203                py_compile.compile(file, doraise=True, optimize=optimize)
2204            except py_compile.PyCompileError as err:
2205                print(err.msg)
2206                return False
2207            return True
2208
2209        file_py  = pathname + ".py"
2210        file_pyc = pathname + ".pyc"
2211        pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2212        pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2213        pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2214        if self._optimize == -1:
2215            # legacy mode: use whatever file is present
2216            if (os.path.isfile(file_pyc) and
2217                  os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2218                # Use .pyc file.
2219                arcname = fname = file_pyc
2220            elif (os.path.isfile(pycache_opt0) and
2221                  os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2222                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2223                # file name in the archive.
2224                fname = pycache_opt0
2225                arcname = file_pyc
2226            elif (os.path.isfile(pycache_opt1) and
2227                  os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2228                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2229                # file name in the archive.
2230                fname = pycache_opt1
2231                arcname = file_pyc
2232            elif (os.path.isfile(pycache_opt2) and
2233                  os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2234                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2235                # file name in the archive.
2236                fname = pycache_opt2
2237                arcname = file_pyc
2238            else:
2239                # Compile py into PEP 3147 pyc file.
2240                if _compile(file_py):
2241                    if sys.flags.optimize == 0:
2242                        fname = pycache_opt0
2243                    elif sys.flags.optimize == 1:
2244                        fname = pycache_opt1
2245                    else:
2246                        fname = pycache_opt2
2247                    arcname = file_pyc
2248                else:
2249                    fname = arcname = file_py
2250        else:
2251            # new mode: use given optimization level
2252            if self._optimize == 0:
2253                fname = pycache_opt0
2254                arcname = file_pyc
2255            else:
2256                arcname = file_pyc
2257                if self._optimize == 1:
2258                    fname = pycache_opt1
2259                elif self._optimize == 2:
2260                    fname = pycache_opt2
2261                else:
2262                    msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2263                    raise ValueError(msg)
2264            if not (os.path.isfile(fname) and
2265                    os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2266                if not _compile(file_py, optimize=self._optimize):
2267                    fname = arcname = file_py
2268        archivename = os.path.split(arcname)[1]
2269        if basename:
2270            archivename = "%s/%s" % (basename, archivename)
2271        return (fname, archivename)
2272
2273
2274def main(args=None):
2275    import argparse
2276
2277    description = 'A simple command-line interface for zipfile module.'
2278    parser = argparse.ArgumentParser(description=description)
2279    group = parser.add_mutually_exclusive_group(required=True)
2280    group.add_argument('-l', '--list', metavar='<zipfile>',
2281                       help='Show listing of a zipfile')
2282    group.add_argument('-e', '--extract', nargs=2,
2283                       metavar=('<zipfile>', '<output_dir>'),
2284                       help='Extract zipfile into target dir')
2285    group.add_argument('-c', '--create', nargs='+',
2286                       metavar=('<name>', '<file>'),
2287                       help='Create zipfile from sources')
2288    group.add_argument('-t', '--test', metavar='<zipfile>',
2289                       help='Test if a zipfile is valid')
2290    parser.add_argument('--metadata-encoding', metavar='<encoding>',
2291                        help='Specify encoding of member names for -l, -e and -t')
2292    args = parser.parse_args(args)
2293
2294    encoding = args.metadata_encoding
2295
2296    if args.test is not None:
2297        src = args.test
2298        with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
2299            badfile = zf.testzip()
2300        if badfile:
2301            print("The following enclosed file is corrupted: {!r}".format(badfile))
2302        print("Done testing")
2303
2304    elif args.list is not None:
2305        src = args.list
2306        with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
2307            zf.printdir()
2308
2309    elif args.extract is not None:
2310        src, curdir = args.extract
2311        with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
2312            zf.extractall(curdir)
2313
2314    elif args.create is not None:
2315        if encoding:
2316            print("Non-conforming encodings not supported with -c.",
2317                  file=sys.stderr)
2318            sys.exit(1)
2319
2320        zip_name = args.create.pop(0)
2321        files = args.create
2322
2323        def addToZip(zf, path, zippath):
2324            if os.path.isfile(path):
2325                zf.write(path, zippath, ZIP_DEFLATED)
2326            elif os.path.isdir(path):
2327                if zippath:
2328                    zf.write(path, zippath)
2329                for nm in sorted(os.listdir(path)):
2330                    addToZip(zf,
2331                             os.path.join(path, nm), os.path.join(zippath, nm))
2332            # else: ignore
2333
2334        with ZipFile(zip_name, 'w') as zf:
2335            for path in files:
2336                zippath = os.path.basename(path)
2337                if not zippath:
2338                    zippath = os.path.basename(os.path.dirname(path))
2339                if zippath in ('', os.curdir, os.pardir):
2340                    zippath = ''
2341                addToZip(zf, path, zippath)
2342
2343
2344from ._path import (  # noqa: E402
2345    Path,
2346
2347    # used privately for tests
2348    CompleteDirs,  # noqa: F401
2349)
2350