• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2Read and write ZIP files.
3
4XXX references to utf-8 need further investigation.
5"""
6import binascii
7import importlib.util
8import io
9import itertools
10import os
11import posixpath
12import shutil
13import stat
14import struct
15import sys
16import threading
17import time
18import contextlib
19import pathlib
20
21try:
22    import zlib # We may need its compression method
23    crc32 = zlib.crc32
24except ImportError:
25    zlib = None
26    crc32 = binascii.crc32
27
28try:
29    import bz2 # We may need its compression method
30except ImportError:
31    bz2 = None
32
33try:
34    import lzma # We may need its compression method
35except ImportError:
36    lzma = None
37
38__all__ = ["BadZipFile", "BadZipfile", "error",
39           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
40           "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
41           "Path"]
42
43class BadZipFile(Exception):
44    pass
45
46
47class LargeZipFile(Exception):
48    """
49    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50    and those extensions are disabled.
51    """
52
53error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
54
55
56ZIP64_LIMIT = (1 << 31) - 1
57ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
58ZIP_MAX_COMMENT = (1 << 16) - 1
59
60# constants for Zip file compression methods
61ZIP_STORED = 0
62ZIP_DEFLATED = 8
63ZIP_BZIP2 = 12
64ZIP_LZMA = 14
65# Other ZIP compression methods not supported
66
67DEFAULT_VERSION = 20
68ZIP64_VERSION = 45
69BZIP2_VERSION = 46
70LZMA_VERSION = 63
71# we recognize (but not necessarily support) all features up to that version
72MAX_EXTRACT_VERSION = 63
73
74# Below are some formats and associated data for reading/writing headers using
75# the struct module.  The names and structures of headers/records are those used
76# in the PKWARE description of the ZIP file format:
77#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78# (URL valid as of January 2008)
79
80# The "end of central directory" structure, magic number, size, and indices
81# (section V.I in the format document)
82structEndArchive = b"<4s4H2LH"
83stringEndArchive = b"PK\005\006"
84sizeEndCentDir = struct.calcsize(structEndArchive)
85
86_ECD_SIGNATURE = 0
87_ECD_DISK_NUMBER = 1
88_ECD_DISK_START = 2
89_ECD_ENTRIES_THIS_DISK = 3
90_ECD_ENTRIES_TOTAL = 4
91_ECD_SIZE = 5
92_ECD_OFFSET = 6
93_ECD_COMMENT_SIZE = 7
94# These last two indices are not part of the structure as defined in the
95# spec, but they are used internally by this module as a convenience
96_ECD_COMMENT = 8
97_ECD_LOCATION = 9
98
99# The "central directory" structure, magic number, size, and indices
100# of entries in the structure (section V.F in the format document)
101structCentralDir = "<4s4B4HL2L5H2L"
102stringCentralDir = b"PK\001\002"
103sizeCentralDir = struct.calcsize(structCentralDir)
104
105# indexes of entries in the central directory structure
106_CD_SIGNATURE = 0
107_CD_CREATE_VERSION = 1
108_CD_CREATE_SYSTEM = 2
109_CD_EXTRACT_VERSION = 3
110_CD_EXTRACT_SYSTEM = 4
111_CD_FLAG_BITS = 5
112_CD_COMPRESS_TYPE = 6
113_CD_TIME = 7
114_CD_DATE = 8
115_CD_CRC = 9
116_CD_COMPRESSED_SIZE = 10
117_CD_UNCOMPRESSED_SIZE = 11
118_CD_FILENAME_LENGTH = 12
119_CD_EXTRA_FIELD_LENGTH = 13
120_CD_COMMENT_LENGTH = 14
121_CD_DISK_NUMBER_START = 15
122_CD_INTERNAL_FILE_ATTRIBUTES = 16
123_CD_EXTERNAL_FILE_ATTRIBUTES = 17
124_CD_LOCAL_HEADER_OFFSET = 18
125
126# The "local file header" structure, magic number, size, and indices
127# (section V.A in the format document)
128structFileHeader = "<4s2B4HL2L2H"
129stringFileHeader = b"PK\003\004"
130sizeFileHeader = struct.calcsize(structFileHeader)
131
132_FH_SIGNATURE = 0
133_FH_EXTRACT_VERSION = 1
134_FH_EXTRACT_SYSTEM = 2
135_FH_GENERAL_PURPOSE_FLAG_BITS = 3
136_FH_COMPRESSION_METHOD = 4
137_FH_LAST_MOD_TIME = 5
138_FH_LAST_MOD_DATE = 6
139_FH_CRC = 7
140_FH_COMPRESSED_SIZE = 8
141_FH_UNCOMPRESSED_SIZE = 9
142_FH_FILENAME_LENGTH = 10
143_FH_EXTRA_FIELD_LENGTH = 11
144
145# The "Zip64 end of central directory locator" structure, magic number, and size
146structEndArchive64Locator = "<4sLQL"
147stringEndArchive64Locator = b"PK\x06\x07"
148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
149
150# The "Zip64 end of central directory" record, magic number, size, and indices
151# (section V.G in the format document)
152structEndArchive64 = "<4sQ2H2L4Q"
153stringEndArchive64 = b"PK\x06\x06"
154sizeEndCentDir64 = struct.calcsize(structEndArchive64)
155
156_CD64_SIGNATURE = 0
157_CD64_DIRECTORY_RECSIZE = 1
158_CD64_CREATE_VERSION = 2
159_CD64_EXTRACT_VERSION = 3
160_CD64_DISK_NUMBER = 4
161_CD64_DISK_NUMBER_START = 5
162_CD64_NUMBER_ENTRIES_THIS_DISK = 6
163_CD64_NUMBER_ENTRIES_TOTAL = 7
164_CD64_DIRECTORY_SIZE = 8
165_CD64_OFFSET_START_CENTDIR = 9
166
167_DD_SIGNATURE = 0x08074b50
168
169_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
170
171def _strip_extra(extra, xids):
172    # Remove Extra Fields with specified IDs.
173    unpack = _EXTRA_FIELD_STRUCT.unpack
174    modified = False
175    buffer = []
176    start = i = 0
177    while i + 4 <= len(extra):
178        xid, xlen = unpack(extra[i : i + 4])
179        j = i + 4 + xlen
180        if xid in xids:
181            if i != start:
182                buffer.append(extra[start : i])
183            start = j
184            modified = True
185        i = j
186    if not modified:
187        return extra
188    return b''.join(buffer)
189
190def _check_zipfile(fp):
191    try:
192        if _EndRecData(fp):
193            return True         # file has correct magic number
194    except OSError:
195        pass
196    return False
197
198def is_zipfile(filename):
199    """Quickly see if a file is a ZIP file by checking the magic number.
200
201    The filename argument may be a file or file-like object too.
202    """
203    result = False
204    try:
205        if hasattr(filename, "read"):
206            result = _check_zipfile(fp=filename)
207        else:
208            with open(filename, "rb") as fp:
209                result = _check_zipfile(fp)
210    except OSError:
211        pass
212    return result
213
214def _EndRecData64(fpin, offset, endrec):
215    """
216    Read the ZIP64 end-of-archive records and use that to update endrec
217    """
218    try:
219        fpin.seek(offset - sizeEndCentDir64Locator, 2)
220    except OSError:
221        # If the seek fails, the file is not large enough to contain a ZIP64
222        # end-of-archive record, so just return the end record we were given.
223        return endrec
224
225    data = fpin.read(sizeEndCentDir64Locator)
226    if len(data) != sizeEndCentDir64Locator:
227        return endrec
228    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
229    if sig != stringEndArchive64Locator:
230        return endrec
231
232    if diskno != 0 or disks > 1:
233        raise BadZipFile("zipfiles that span multiple disks are not supported")
234
235    # Assume no 'zip64 extensible data'
236    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
237    data = fpin.read(sizeEndCentDir64)
238    if len(data) != sizeEndCentDir64:
239        return endrec
240    sig, sz, create_version, read_version, disk_num, disk_dir, \
241        dircount, dircount2, dirsize, diroffset = \
242        struct.unpack(structEndArchive64, data)
243    if sig != stringEndArchive64:
244        return endrec
245
246    # Update the original endrec using data from the ZIP64 record
247    endrec[_ECD_SIGNATURE] = sig
248    endrec[_ECD_DISK_NUMBER] = disk_num
249    endrec[_ECD_DISK_START] = disk_dir
250    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
251    endrec[_ECD_ENTRIES_TOTAL] = dircount2
252    endrec[_ECD_SIZE] = dirsize
253    endrec[_ECD_OFFSET] = diroffset
254    return endrec
255
256
257def _EndRecData(fpin):
258    """Return data from the "End of Central Directory" record, or None.
259
260    The data is a list of the nine items in the ZIP "End of central dir"
261    record followed by a tenth item, the file seek offset of this record."""
262
263    # Determine file size
264    fpin.seek(0, 2)
265    filesize = fpin.tell()
266
267    # Check to see if this is ZIP file with no archive comment (the
268    # "end of central directory" structure should be the last item in the
269    # file if this is the case).
270    try:
271        fpin.seek(-sizeEndCentDir, 2)
272    except OSError:
273        return None
274    data = fpin.read()
275    if (len(data) == sizeEndCentDir and
276        data[0:4] == stringEndArchive and
277        data[-2:] == b"\000\000"):
278        # the signature is correct and there's no comment, unpack structure
279        endrec = struct.unpack(structEndArchive, data)
280        endrec=list(endrec)
281
282        # Append a blank comment and record start offset
283        endrec.append(b"")
284        endrec.append(filesize - sizeEndCentDir)
285
286        # Try to read the "Zip64 end of central directory" structure
287        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
288
289    # Either this is not a ZIP file, or it is a ZIP file with an archive
290    # comment.  Search the end of the file for the "end of central directory"
291    # record signature. The comment is the last item in the ZIP file and may be
292    # up to 64K long.  It is assumed that the "end of central directory" magic
293    # number does not appear in the comment.
294    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
295    fpin.seek(maxCommentStart, 0)
296    data = fpin.read()
297    start = data.rfind(stringEndArchive)
298    if start >= 0:
299        # found the magic number; attempt to unpack and interpret
300        recData = data[start:start+sizeEndCentDir]
301        if len(recData) != sizeEndCentDir:
302            # Zip file is corrupted.
303            return None
304        endrec = list(struct.unpack(structEndArchive, recData))
305        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
306        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
307        endrec.append(comment)
308        endrec.append(maxCommentStart + start)
309
310        # Try to read the "Zip64 end of central directory" structure
311        return _EndRecData64(fpin, maxCommentStart + start - filesize,
312                             endrec)
313
314    # Unable to find a valid end of central directory structure
315    return None
316
317
318class ZipInfo (object):
319    """Class with attributes describing each file in the ZIP archive."""
320
321    __slots__ = (
322        'orig_filename',
323        'filename',
324        'date_time',
325        'compress_type',
326        '_compresslevel',
327        'comment',
328        'extra',
329        'create_system',
330        'create_version',
331        'extract_version',
332        'reserved',
333        'flag_bits',
334        'volume',
335        'internal_attr',
336        'external_attr',
337        'header_offset',
338        'CRC',
339        'compress_size',
340        'file_size',
341        '_raw_time',
342        '_end_offset',
343    )
344
345    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
346        self.orig_filename = filename   # Original file name in archive
347
348        # Terminate the file name at the first null byte.  Null bytes in file
349        # names are used as tricks by viruses in archives.
350        null_byte = filename.find(chr(0))
351        if null_byte >= 0:
352            filename = filename[0:null_byte]
353        # This is used to ensure paths in generated ZIP files always use
354        # forward slashes as the directory separator, as required by the
355        # ZIP format specification.
356        if os.sep != "/" and os.sep in filename:
357            filename = filename.replace(os.sep, "/")
358
359        self.filename = filename        # Normalized file name
360        self.date_time = date_time      # year, month, day, hour, min, sec
361
362        if date_time[0] < 1980:
363            raise ValueError('ZIP does not support timestamps before 1980')
364
365        # Standard values:
366        self.compress_type = ZIP_STORED # Type of compression for the file
367        self._compresslevel = None      # Level for the compressor
368        self.comment = b""              # Comment for each file
369        self.extra = b""                # ZIP extra data
370        if sys.platform == 'win32':
371            self.create_system = 0          # System which created ZIP archive
372        else:
373            # Assume everything else is unix-y
374            self.create_system = 3          # System which created ZIP archive
375        self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
376        self.extract_version = DEFAULT_VERSION # Version needed to extract archive
377        self.reserved = 0               # Must be zero
378        self.flag_bits = 0              # ZIP flag bits
379        self.volume = 0                 # Volume number of file header
380        self.internal_attr = 0          # Internal attributes
381        self.external_attr = 0          # External file attributes
382        self.compress_size = 0          # Size of the compressed file
383        self.file_size = 0              # Size of the uncompressed file
384        self._end_offset = None         # Start of the next local header or central directory
385        # Other attributes are set by class ZipFile:
386        # header_offset         Byte offset to the file header
387        # CRC                   CRC-32 of the uncompressed file
388
389    def __repr__(self):
390        result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
391        if self.compress_type != ZIP_STORED:
392            result.append(' compress_type=%s' %
393                          compressor_names.get(self.compress_type,
394                                               self.compress_type))
395        hi = self.external_attr >> 16
396        lo = self.external_attr & 0xFFFF
397        if hi:
398            result.append(' filemode=%r' % stat.filemode(hi))
399        if lo:
400            result.append(' external_attr=%#x' % lo)
401        isdir = self.is_dir()
402        if not isdir or self.file_size:
403            result.append(' file_size=%r' % self.file_size)
404        if ((not isdir or self.compress_size) and
405            (self.compress_type != ZIP_STORED or
406             self.file_size != self.compress_size)):
407            result.append(' compress_size=%r' % self.compress_size)
408        result.append('>')
409        return ''.join(result)
410
411    def FileHeader(self, zip64=None):
412        """Return the per-file header as a bytes object."""
413        dt = self.date_time
414        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
415        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
416        if self.flag_bits & 0x08:
417            # Set these to zero because we write them after the file data
418            CRC = compress_size = file_size = 0
419        else:
420            CRC = self.CRC
421            compress_size = self.compress_size
422            file_size = self.file_size
423
424        extra = self.extra
425
426        min_version = 0
427        if zip64 is None:
428            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
429        if zip64:
430            fmt = '<HHQQ'
431            extra = extra + struct.pack(fmt,
432                                        1, struct.calcsize(fmt)-4, file_size, compress_size)
433        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
434            if not zip64:
435                raise LargeZipFile("Filesize would require ZIP64 extensions")
436            # File is larger than what fits into a 4 byte integer,
437            # fall back to the ZIP64 extension
438            file_size = 0xffffffff
439            compress_size = 0xffffffff
440            min_version = ZIP64_VERSION
441
442        if self.compress_type == ZIP_BZIP2:
443            min_version = max(BZIP2_VERSION, min_version)
444        elif self.compress_type == ZIP_LZMA:
445            min_version = max(LZMA_VERSION, min_version)
446
447        self.extract_version = max(min_version, self.extract_version)
448        self.create_version = max(min_version, self.create_version)
449        filename, flag_bits = self._encodeFilenameFlags()
450        header = struct.pack(structFileHeader, stringFileHeader,
451                             self.extract_version, self.reserved, flag_bits,
452                             self.compress_type, dostime, dosdate, CRC,
453                             compress_size, file_size,
454                             len(filename), len(extra))
455        return header + filename + extra
456
457    def _encodeFilenameFlags(self):
458        try:
459            return self.filename.encode('ascii'), self.flag_bits
460        except UnicodeEncodeError:
461            return self.filename.encode('utf-8'), self.flag_bits | 0x800
462
463    def _decodeExtra(self):
464        # Try to decode the extra field.
465        extra = self.extra
466        unpack = struct.unpack
467        while len(extra) >= 4:
468            tp, ln = unpack('<HH', extra[:4])
469            if ln+4 > len(extra):
470                raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
471            if tp == 0x0001:
472                data = extra[4:ln+4]
473                # ZIP64 extension (large files and/or large archives)
474                try:
475                    if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
476                        field = "File size"
477                        self.file_size, = unpack('<Q', data[:8])
478                        data = data[8:]
479                    if self.compress_size == 0xFFFF_FFFF:
480                        field = "Compress size"
481                        self.compress_size, = unpack('<Q', data[:8])
482                        data = data[8:]
483                    if self.header_offset == 0xFFFF_FFFF:
484                        field = "Header offset"
485                        self.header_offset, = unpack('<Q', data[:8])
486                except struct.error:
487                    raise BadZipFile(f"Corrupt zip64 extra field. "
488                                     f"{field} not found.") from None
489
490            extra = extra[ln+4:]
491
492    @classmethod
493    def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
494        """Construct an appropriate ZipInfo for a file on the filesystem.
495
496        filename should be the path to a file or directory on the filesystem.
497
498        arcname is the name which it will have within the archive (by default,
499        this will be the same as filename, but without a drive letter and with
500        leading path separators removed).
501        """
502        if isinstance(filename, os.PathLike):
503            filename = os.fspath(filename)
504        st = os.stat(filename)
505        isdir = stat.S_ISDIR(st.st_mode)
506        mtime = time.localtime(st.st_mtime)
507        date_time = mtime[0:6]
508        if not strict_timestamps and date_time[0] < 1980:
509            date_time = (1980, 1, 1, 0, 0, 0)
510        elif not strict_timestamps and date_time[0] > 2107:
511            date_time = (2107, 12, 31, 23, 59, 59)
512        # Create ZipInfo instance to store file information
513        if arcname is None:
514            arcname = filename
515        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
516        while arcname[0] in (os.sep, os.altsep):
517            arcname = arcname[1:]
518        if isdir:
519            arcname += '/'
520        zinfo = cls(arcname, date_time)
521        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
522        if isdir:
523            zinfo.file_size = 0
524            zinfo.external_attr |= 0x10  # MS-DOS directory flag
525        else:
526            zinfo.file_size = st.st_size
527
528        return zinfo
529
530    def is_dir(self):
531        """Return True if this archive member is a directory."""
532        return self.filename[-1] == '/'
533
534
535# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
536# internal keys. We noticed that a direct implementation is faster than
537# relying on binascii.crc32().
538
539_crctable = None
540def _gen_crc(crc):
541    for j in range(8):
542        if crc & 1:
543            crc = (crc >> 1) ^ 0xEDB88320
544        else:
545            crc >>= 1
546    return crc
547
548# ZIP supports a password-based form of encryption. Even though known
549# plaintext attacks have been found against it, it is still useful
550# to be able to get data out of such a file.
551#
552# Usage:
553#     zd = _ZipDecrypter(mypwd)
554#     plain_bytes = zd(cypher_bytes)
555
556def _ZipDecrypter(pwd):
557    key0 = 305419896
558    key1 = 591751049
559    key2 = 878082192
560
561    global _crctable
562    if _crctable is None:
563        _crctable = list(map(_gen_crc, range(256)))
564    crctable = _crctable
565
566    def crc32(ch, crc):
567        """Compute the CRC32 primitive on one byte."""
568        return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
569
570    def update_keys(c):
571        nonlocal key0, key1, key2
572        key0 = crc32(c, key0)
573        key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
574        key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
575        key2 = crc32(key1 >> 24, key2)
576
577    for p in pwd:
578        update_keys(p)
579
580    def decrypter(data):
581        """Decrypt a bytes object."""
582        result = bytearray()
583        append = result.append
584        for c in data:
585            k = key2 | 2
586            c ^= ((k * (k^1)) >> 8) & 0xFF
587            update_keys(c)
588            append(c)
589        return bytes(result)
590
591    return decrypter
592
593
594class LZMACompressor:
595
596    def __init__(self):
597        self._comp = None
598
599    def _init(self):
600        props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
601        self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
602            lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
603        ])
604        return struct.pack('<BBH', 9, 4, len(props)) + props
605
606    def compress(self, data):
607        if self._comp is None:
608            return self._init() + self._comp.compress(data)
609        return self._comp.compress(data)
610
611    def flush(self):
612        if self._comp is None:
613            return self._init() + self._comp.flush()
614        return self._comp.flush()
615
616
617class LZMADecompressor:
618
619    def __init__(self):
620        self._decomp = None
621        self._unconsumed = b''
622        self.eof = False
623
624    def decompress(self, data):
625        if self._decomp is None:
626            self._unconsumed += data
627            if len(self._unconsumed) <= 4:
628                return b''
629            psize, = struct.unpack('<H', self._unconsumed[2:4])
630            if len(self._unconsumed) <= 4 + psize:
631                return b''
632
633            self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
634                lzma._decode_filter_properties(lzma.FILTER_LZMA1,
635                                               self._unconsumed[4:4 + psize])
636            ])
637            data = self._unconsumed[4 + psize:]
638            del self._unconsumed
639
640        result = self._decomp.decompress(data)
641        self.eof = self._decomp.eof
642        return result
643
644
645compressor_names = {
646    0: 'store',
647    1: 'shrink',
648    2: 'reduce',
649    3: 'reduce',
650    4: 'reduce',
651    5: 'reduce',
652    6: 'implode',
653    7: 'tokenize',
654    8: 'deflate',
655    9: 'deflate64',
656    10: 'implode',
657    12: 'bzip2',
658    14: 'lzma',
659    18: 'terse',
660    19: 'lz77',
661    97: 'wavpack',
662    98: 'ppmd',
663}
664
665def _check_compression(compression):
666    if compression == ZIP_STORED:
667        pass
668    elif compression == ZIP_DEFLATED:
669        if not zlib:
670            raise RuntimeError(
671                "Compression requires the (missing) zlib module")
672    elif compression == ZIP_BZIP2:
673        if not bz2:
674            raise RuntimeError(
675                "Compression requires the (missing) bz2 module")
676    elif compression == ZIP_LZMA:
677        if not lzma:
678            raise RuntimeError(
679                "Compression requires the (missing) lzma module")
680    else:
681        raise NotImplementedError("That compression method is not supported")
682
683
684def _get_compressor(compress_type, compresslevel=None):
685    if compress_type == ZIP_DEFLATED:
686        if compresslevel is not None:
687            return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
688        return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
689    elif compress_type == ZIP_BZIP2:
690        if compresslevel is not None:
691            return bz2.BZ2Compressor(compresslevel)
692        return bz2.BZ2Compressor()
693    # compresslevel is ignored for ZIP_LZMA
694    elif compress_type == ZIP_LZMA:
695        return LZMACompressor()
696    else:
697        return None
698
699
700def _get_decompressor(compress_type):
701    _check_compression(compress_type)
702    if compress_type == ZIP_STORED:
703        return None
704    elif compress_type == ZIP_DEFLATED:
705        return zlib.decompressobj(-15)
706    elif compress_type == ZIP_BZIP2:
707        return bz2.BZ2Decompressor()
708    elif compress_type == ZIP_LZMA:
709        return LZMADecompressor()
710    else:
711        descr = compressor_names.get(compress_type)
712        if descr:
713            raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
714        else:
715            raise NotImplementedError("compression type %d" % (compress_type,))
716
717
718class _SharedFile:
719    def __init__(self, file, pos, close, lock, writing):
720        self._file = file
721        self._pos = pos
722        self._close = close
723        self._lock = lock
724        self._writing = writing
725        self.seekable = file.seekable
726        self.tell = file.tell
727
728    def seek(self, offset, whence=0):
729        with self._lock:
730            if self._writing():
731                raise ValueError("Can't reposition in the ZIP file while "
732                        "there is an open writing handle on it. "
733                        "Close the writing handle before trying to read.")
734            self._file.seek(offset, whence)
735            self._pos = self._file.tell()
736            return self._pos
737
738    def read(self, n=-1):
739        with self._lock:
740            if self._writing():
741                raise ValueError("Can't read from the ZIP file while there "
742                        "is an open writing handle on it. "
743                        "Close the writing handle before trying to read.")
744            self._file.seek(self._pos)
745            data = self._file.read(n)
746            self._pos = self._file.tell()
747            return data
748
749    def close(self):
750        if self._file is not None:
751            fileobj = self._file
752            self._file = None
753            self._close(fileobj)
754
755# Provide the tell method for unseekable stream
756class _Tellable:
757    def __init__(self, fp):
758        self.fp = fp
759        self.offset = 0
760
761    def write(self, data):
762        n = self.fp.write(data)
763        self.offset += n
764        return n
765
766    def tell(self):
767        return self.offset
768
769    def flush(self):
770        self.fp.flush()
771
772    def close(self):
773        self.fp.close()
774
775
776class ZipExtFile(io.BufferedIOBase):
777    """File-like object for reading an archive member.
778       Is returned by ZipFile.open().
779    """
780
781    # Max size supported by decompressor.
782    MAX_N = 1 << 31 - 1
783
784    # Read from compressed files in 4k blocks.
785    MIN_READ_SIZE = 4096
786
787    # Chunk size to read during seek
788    MAX_SEEK_READ = 1 << 24
789
790    def __init__(self, fileobj, mode, zipinfo, pwd=None,
791                 close_fileobj=False):
792        self._fileobj = fileobj
793        self._pwd = pwd
794        self._close_fileobj = close_fileobj
795
796        self._compress_type = zipinfo.compress_type
797        self._compress_left = zipinfo.compress_size
798        self._left = zipinfo.file_size
799
800        self._decompressor = _get_decompressor(self._compress_type)
801
802        self._eof = False
803        self._readbuffer = b''
804        self._offset = 0
805
806        self.newlines = None
807
808        self.mode = mode
809        self.name = zipinfo.filename
810
811        if hasattr(zipinfo, 'CRC'):
812            self._expected_crc = zipinfo.CRC
813            self._running_crc = crc32(b'')
814        else:
815            self._expected_crc = None
816
817        self._seekable = False
818        try:
819            if fileobj.seekable():
820                self._orig_compress_start = fileobj.tell()
821                self._orig_compress_size = zipinfo.compress_size
822                self._orig_file_size = zipinfo.file_size
823                self._orig_start_crc = self._running_crc
824                self._seekable = True
825        except AttributeError:
826            pass
827
828        self._decrypter = None
829        if pwd:
830            if zipinfo.flag_bits & 0x8:
831                # compare against the file type from extended local headers
832                check_byte = (zipinfo._raw_time >> 8) & 0xff
833            else:
834                # compare against the CRC otherwise
835                check_byte = (zipinfo.CRC >> 24) & 0xff
836            h = self._init_decrypter()
837            if h != check_byte:
838                raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
839
840
841    def _init_decrypter(self):
842        self._decrypter = _ZipDecrypter(self._pwd)
843        # The first 12 bytes in the cypher stream is an encryption header
844        #  used to strengthen the algorithm. The first 11 bytes are
845        #  completely random, while the 12th contains the MSB of the CRC,
846        #  or the MSB of the file time depending on the header type
847        #  and is used to check the correctness of the password.
848        header = self._fileobj.read(12)
849        self._compress_left -= 12
850        return self._decrypter(header)[11]
851
852    def __repr__(self):
853        result = ['<%s.%s' % (self.__class__.__module__,
854                              self.__class__.__qualname__)]
855        if not self.closed:
856            result.append(' name=%r mode=%r' % (self.name, self.mode))
857            if self._compress_type != ZIP_STORED:
858                result.append(' compress_type=%s' %
859                              compressor_names.get(self._compress_type,
860                                                   self._compress_type))
861        else:
862            result.append(' [closed]')
863        result.append('>')
864        return ''.join(result)
865
866    def readline(self, limit=-1):
867        """Read and return a line from the stream.
868
869        If limit is specified, at most limit bytes will be read.
870        """
871
872        if limit < 0:
873            # Shortcut common case - newline found in buffer.
874            i = self._readbuffer.find(b'\n', self._offset) + 1
875            if i > 0:
876                line = self._readbuffer[self._offset: i]
877                self._offset = i
878                return line
879
880        return io.BufferedIOBase.readline(self, limit)
881
882    def peek(self, n=1):
883        """Returns buffered bytes without advancing the position."""
884        if n > len(self._readbuffer) - self._offset:
885            chunk = self.read(n)
886            if len(chunk) > self._offset:
887                self._readbuffer = chunk + self._readbuffer[self._offset:]
888                self._offset = 0
889            else:
890                self._offset -= len(chunk)
891
892        # Return up to 512 bytes to reduce allocation overhead for tight loops.
893        return self._readbuffer[self._offset: self._offset + 512]
894
895    def readable(self):
896        if self.closed:
897            raise ValueError("I/O operation on closed file.")
898        return True
899
900    def read(self, n=-1):
901        """Read and return up to n bytes.
902        If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
903        """
904        if self.closed:
905            raise ValueError("read from closed file.")
906        if n is None or n < 0:
907            buf = self._readbuffer[self._offset:]
908            self._readbuffer = b''
909            self._offset = 0
910            while not self._eof:
911                buf += self._read1(self.MAX_N)
912            return buf
913
914        end = n + self._offset
915        if end < len(self._readbuffer):
916            buf = self._readbuffer[self._offset:end]
917            self._offset = end
918            return buf
919
920        n = end - len(self._readbuffer)
921        buf = self._readbuffer[self._offset:]
922        self._readbuffer = b''
923        self._offset = 0
924        while n > 0 and not self._eof:
925            data = self._read1(n)
926            if n < len(data):
927                self._readbuffer = data
928                self._offset = n
929                buf += data[:n]
930                break
931            buf += data
932            n -= len(data)
933        return buf
934
935    def _update_crc(self, newdata):
936        # Update the CRC using the given data.
937        if self._expected_crc is None:
938            # No need to compute the CRC if we don't have a reference value
939            return
940        self._running_crc = crc32(newdata, self._running_crc)
941        # Check the CRC if we're at the end of the file
942        if self._eof and self._running_crc != self._expected_crc:
943            raise BadZipFile("Bad CRC-32 for file %r" % self.name)
944
945    def read1(self, n):
946        """Read up to n bytes with at most one read() system call."""
947
948        if n is None or n < 0:
949            buf = self._readbuffer[self._offset:]
950            self._readbuffer = b''
951            self._offset = 0
952            while not self._eof:
953                data = self._read1(self.MAX_N)
954                if data:
955                    buf += data
956                    break
957            return buf
958
959        end = n + self._offset
960        if end < len(self._readbuffer):
961            buf = self._readbuffer[self._offset:end]
962            self._offset = end
963            return buf
964
965        n = end - len(self._readbuffer)
966        buf = self._readbuffer[self._offset:]
967        self._readbuffer = b''
968        self._offset = 0
969        if n > 0:
970            while not self._eof:
971                data = self._read1(n)
972                if n < len(data):
973                    self._readbuffer = data
974                    self._offset = n
975                    buf += data[:n]
976                    break
977                if data:
978                    buf += data
979                    break
980        return buf
981
982    def _read1(self, n):
983        # Read up to n compressed bytes with at most one read() system call,
984        # decrypt and decompress them.
985        if self._eof or n <= 0:
986            return b''
987
988        # Read from file.
989        if self._compress_type == ZIP_DEFLATED:
990            ## Handle unconsumed data.
991            data = self._decompressor.unconsumed_tail
992            if n > len(data):
993                data += self._read2(n - len(data))
994        else:
995            data = self._read2(n)
996
997        if self._compress_type == ZIP_STORED:
998            self._eof = self._compress_left <= 0
999        elif self._compress_type == ZIP_DEFLATED:
1000            n = max(n, self.MIN_READ_SIZE)
1001            data = self._decompressor.decompress(data, n)
1002            self._eof = (self._decompressor.eof or
1003                         self._compress_left <= 0 and
1004                         not self._decompressor.unconsumed_tail)
1005            if self._eof:
1006                data += self._decompressor.flush()
1007        else:
1008            data = self._decompressor.decompress(data)
1009            self._eof = self._decompressor.eof or self._compress_left <= 0
1010
1011        data = data[:self._left]
1012        self._left -= len(data)
1013        if self._left <= 0:
1014            self._eof = True
1015        self._update_crc(data)
1016        return data
1017
1018    def _read2(self, n):
1019        if self._compress_left <= 0:
1020            return b''
1021
1022        n = max(n, self.MIN_READ_SIZE)
1023        n = min(n, self._compress_left)
1024
1025        data = self._fileobj.read(n)
1026        self._compress_left -= len(data)
1027        if not data:
1028            raise EOFError
1029
1030        if self._decrypter is not None:
1031            data = self._decrypter(data)
1032        return data
1033
1034    def close(self):
1035        try:
1036            if self._close_fileobj:
1037                self._fileobj.close()
1038        finally:
1039            super().close()
1040
1041    def seekable(self):
1042        if self.closed:
1043            raise ValueError("I/O operation on closed file.")
1044        return self._seekable
1045
1046    def seek(self, offset, whence=0):
1047        if self.closed:
1048            raise ValueError("seek on closed file.")
1049        if not self._seekable:
1050            raise io.UnsupportedOperation("underlying stream is not seekable")
1051        curr_pos = self.tell()
1052        if whence == 0: # Seek from start of file
1053            new_pos = offset
1054        elif whence == 1: # Seek from current position
1055            new_pos = curr_pos + offset
1056        elif whence == 2: # Seek from EOF
1057            new_pos = self._orig_file_size + offset
1058        else:
1059            raise ValueError("whence must be os.SEEK_SET (0), "
1060                             "os.SEEK_CUR (1), or os.SEEK_END (2)")
1061
1062        if new_pos > self._orig_file_size:
1063            new_pos = self._orig_file_size
1064
1065        if new_pos < 0:
1066            new_pos = 0
1067
1068        read_offset = new_pos - curr_pos
1069        buff_offset = read_offset + self._offset
1070
1071        if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1072            # Just move the _offset index if the new position is in the _readbuffer
1073            self._offset = buff_offset
1074            read_offset = 0
1075        elif read_offset < 0:
1076            # Position is before the current position. Reset the ZipExtFile
1077            self._fileobj.seek(self._orig_compress_start)
1078            self._running_crc = self._orig_start_crc
1079            self._compress_left = self._orig_compress_size
1080            self._left = self._orig_file_size
1081            self._readbuffer = b''
1082            self._offset = 0
1083            self._decompressor = _get_decompressor(self._compress_type)
1084            self._eof = False
1085            read_offset = new_pos
1086            if self._decrypter is not None:
1087                self._init_decrypter()
1088
1089        while read_offset > 0:
1090            read_len = min(self.MAX_SEEK_READ, read_offset)
1091            self.read(read_len)
1092            read_offset -= read_len
1093
1094        return self.tell()
1095
1096    def tell(self):
1097        if self.closed:
1098            raise ValueError("tell on closed file.")
1099        if not self._seekable:
1100            raise io.UnsupportedOperation("underlying stream is not seekable")
1101        filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1102        return filepos
1103
1104
1105class _ZipWriteFile(io.BufferedIOBase):
1106    def __init__(self, zf, zinfo, zip64):
1107        self._zinfo = zinfo
1108        self._zip64 = zip64
1109        self._zipfile = zf
1110        self._compressor = _get_compressor(zinfo.compress_type,
1111                                           zinfo._compresslevel)
1112        self._file_size = 0
1113        self._compress_size = 0
1114        self._crc = 0
1115
1116    @property
1117    def _fileobj(self):
1118        return self._zipfile.fp
1119
1120    def writable(self):
1121        return True
1122
1123    def write(self, data):
1124        if self.closed:
1125            raise ValueError('I/O operation on closed file.')
1126        nbytes = len(data)
1127        self._file_size += nbytes
1128        self._crc = crc32(data, self._crc)
1129        if self._compressor:
1130            data = self._compressor.compress(data)
1131            self._compress_size += len(data)
1132        self._fileobj.write(data)
1133        return nbytes
1134
1135    def close(self):
1136        if self.closed:
1137            return
1138        try:
1139            super().close()
1140            # Flush any data from the compressor, and update header info
1141            if self._compressor:
1142                buf = self._compressor.flush()
1143                self._compress_size += len(buf)
1144                self._fileobj.write(buf)
1145                self._zinfo.compress_size = self._compress_size
1146            else:
1147                self._zinfo.compress_size = self._file_size
1148            self._zinfo.CRC = self._crc
1149            self._zinfo.file_size = self._file_size
1150
1151            # Write updated header info
1152            if self._zinfo.flag_bits & 0x08:
1153                # Write CRC and file sizes after the file data
1154                fmt = '<LLQQ' if self._zip64 else '<LLLL'
1155                self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1156                    self._zinfo.compress_size, self._zinfo.file_size))
1157                self._zipfile.start_dir = self._fileobj.tell()
1158            else:
1159                if not self._zip64:
1160                    if self._file_size > ZIP64_LIMIT:
1161                        raise RuntimeError(
1162                            'File size unexpectedly exceeded ZIP64 limit')
1163                    if self._compress_size > ZIP64_LIMIT:
1164                        raise RuntimeError(
1165                            'Compressed size unexpectedly exceeded ZIP64 limit')
1166                # Seek backwards and write file header (which will now include
1167                # correct CRC and file sizes)
1168
1169                # Preserve current position in file
1170                self._zipfile.start_dir = self._fileobj.tell()
1171                self._fileobj.seek(self._zinfo.header_offset)
1172                self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1173                self._fileobj.seek(self._zipfile.start_dir)
1174
1175            # Successfully written: Add file to our caches
1176            self._zipfile.filelist.append(self._zinfo)
1177            self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1178        finally:
1179            self._zipfile._writing = False
1180
1181
1182
1183class ZipFile:
1184    """ Class with methods to open, read, write, close, list zip files.
1185
1186    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1187                compresslevel=None)
1188
1189    file: Either the path to the file, or a file-like object.
1190          If it is a path, the file will be opened and closed by ZipFile.
1191    mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1192          or append 'a'.
1193    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1194                 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1195    allowZip64: if True ZipFile will create files with ZIP64 extensions when
1196                needed, otherwise it will raise an exception when this would
1197                be necessary.
1198    compresslevel: None (default for the given compression type) or an integer
1199                   specifying the level to pass to the compressor.
1200                   When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1201                   When using ZIP_DEFLATED integers 0 through 9 are accepted.
1202                   When using ZIP_BZIP2 integers 1 through 9 are accepted.
1203
1204    """
1205
1206    fp = None                   # Set here since __del__ checks it
1207    _windows_illegal_name_trans_table = None
1208
1209    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1210                 compresslevel=None, *, strict_timestamps=True):
1211        """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1212        or append 'a'."""
1213        if mode not in ('r', 'w', 'x', 'a'):
1214            raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1215
1216        _check_compression(compression)
1217
1218        self._allowZip64 = allowZip64
1219        self._didModify = False
1220        self.debug = 0  # Level of printing: 0 through 3
1221        self.NameToInfo = {}    # Find file info given name
1222        self.filelist = []      # List of ZipInfo instances for archive
1223        self.compression = compression  # Method of compression
1224        self.compresslevel = compresslevel
1225        self.mode = mode
1226        self.pwd = None
1227        self._comment = b''
1228        self._strict_timestamps = strict_timestamps
1229
1230        # Check if we were passed a file-like object
1231        if isinstance(file, os.PathLike):
1232            file = os.fspath(file)
1233        if isinstance(file, str):
1234            # No, it's a filename
1235            self._filePassed = 0
1236            self.filename = file
1237            modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1238                        'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1239            filemode = modeDict[mode]
1240            while True:
1241                try:
1242                    self.fp = io.open(file, filemode)
1243                except OSError:
1244                    if filemode in modeDict:
1245                        filemode = modeDict[filemode]
1246                        continue
1247                    raise
1248                break
1249        else:
1250            self._filePassed = 1
1251            self.fp = file
1252            self.filename = getattr(file, 'name', None)
1253        self._fileRefCnt = 1
1254        self._lock = threading.RLock()
1255        self._seekable = True
1256        self._writing = False
1257
1258        try:
1259            if mode == 'r':
1260                self._RealGetContents()
1261            elif mode in ('w', 'x'):
1262                # set the modified flag so central directory gets written
1263                # even if no files are added to the archive
1264                self._didModify = True
1265                try:
1266                    self.start_dir = self.fp.tell()
1267                except (AttributeError, OSError):
1268                    self.fp = _Tellable(self.fp)
1269                    self.start_dir = 0
1270                    self._seekable = False
1271                else:
1272                    # Some file-like objects can provide tell() but not seek()
1273                    try:
1274                        self.fp.seek(self.start_dir)
1275                    except (AttributeError, OSError):
1276                        self._seekable = False
1277            elif mode == 'a':
1278                try:
1279                    # See if file is a zip file
1280                    self._RealGetContents()
1281                    # seek to start of directory and overwrite
1282                    self.fp.seek(self.start_dir)
1283                except BadZipFile:
1284                    # file is not a zip file, just append
1285                    self.fp.seek(0, 2)
1286
1287                    # set the modified flag so central directory gets written
1288                    # even if no files are added to the archive
1289                    self._didModify = True
1290                    self.start_dir = self.fp.tell()
1291            else:
1292                raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1293        except:
1294            fp = self.fp
1295            self.fp = None
1296            self._fpclose(fp)
1297            raise
1298
1299    def __enter__(self):
1300        return self
1301
1302    def __exit__(self, type, value, traceback):
1303        self.close()
1304
1305    def __repr__(self):
1306        result = ['<%s.%s' % (self.__class__.__module__,
1307                              self.__class__.__qualname__)]
1308        if self.fp is not None:
1309            if self._filePassed:
1310                result.append(' file=%r' % self.fp)
1311            elif self.filename is not None:
1312                result.append(' filename=%r' % self.filename)
1313            result.append(' mode=%r' % self.mode)
1314        else:
1315            result.append(' [closed]')
1316        result.append('>')
1317        return ''.join(result)
1318
1319    def _RealGetContents(self):
1320        """Read in the table of contents for the ZIP file."""
1321        fp = self.fp
1322        try:
1323            endrec = _EndRecData(fp)
1324        except OSError:
1325            raise BadZipFile("File is not a zip file")
1326        if not endrec:
1327            raise BadZipFile("File is not a zip file")
1328        if self.debug > 1:
1329            print(endrec)
1330        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
1331        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
1332        self._comment = endrec[_ECD_COMMENT]    # archive comment
1333
1334        # "concat" is zero, unless zip was concatenated to another file
1335        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1336        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1337            # If Zip64 extension structures are present, account for them
1338            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1339
1340        if self.debug > 2:
1341            inferred = concat + offset_cd
1342            print("given, inferred, offset", offset_cd, inferred, concat)
1343        # self.start_dir:  Position of start of central directory
1344        self.start_dir = offset_cd + concat
1345        fp.seek(self.start_dir, 0)
1346        data = fp.read(size_cd)
1347        fp = io.BytesIO(data)
1348        total = 0
1349        while total < size_cd:
1350            centdir = fp.read(sizeCentralDir)
1351            if len(centdir) != sizeCentralDir:
1352                raise BadZipFile("Truncated central directory")
1353            centdir = struct.unpack(structCentralDir, centdir)
1354            if centdir[_CD_SIGNATURE] != stringCentralDir:
1355                raise BadZipFile("Bad magic number for central directory")
1356            if self.debug > 2:
1357                print(centdir)
1358            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1359            flags = centdir[5]
1360            if flags & 0x800:
1361                # UTF-8 file names extension
1362                filename = filename.decode('utf-8')
1363            else:
1364                # Historical ZIP filename encoding
1365                filename = filename.decode('cp437')
1366            # Create ZipInfo instance to store file information
1367            x = ZipInfo(filename)
1368            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1369            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1370            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1371            (x.create_version, x.create_system, x.extract_version, x.reserved,
1372             x.flag_bits, x.compress_type, t, d,
1373             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1374            if x.extract_version > MAX_EXTRACT_VERSION:
1375                raise NotImplementedError("zip file version %.1f" %
1376                                          (x.extract_version / 10))
1377            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1378            # Convert date/time code to (year, month, day, hour, min, sec)
1379            x._raw_time = t
1380            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1381                            t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1382
1383            x._decodeExtra()
1384            x.header_offset = x.header_offset + concat
1385            self.filelist.append(x)
1386            self.NameToInfo[x.filename] = x
1387
1388            # update total bytes read from central directory
1389            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1390                     + centdir[_CD_EXTRA_FIELD_LENGTH]
1391                     + centdir[_CD_COMMENT_LENGTH])
1392
1393            if self.debug > 2:
1394                print("total", total)
1395
1396        end_offset = self.start_dir
1397        for zinfo in sorted(self.filelist,
1398                            key=lambda zinfo: zinfo.header_offset,
1399                            reverse=True):
1400            zinfo._end_offset = end_offset
1401            end_offset = zinfo.header_offset
1402
1403    def namelist(self):
1404        """Return a list of file names in the archive."""
1405        return [data.filename for data in self.filelist]
1406
1407    def infolist(self):
1408        """Return a list of class ZipInfo instances for files in the
1409        archive."""
1410        return self.filelist
1411
1412    def printdir(self, file=None):
1413        """Print a table of contents for the zip file."""
1414        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
1415              file=file)
1416        for zinfo in self.filelist:
1417            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1418            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1419                  file=file)
1420
1421    def testzip(self):
1422        """Read all the files and check the CRC."""
1423        chunk_size = 2 ** 20
1424        for zinfo in self.filelist:
1425            try:
1426                # Read by chunks, to avoid an OverflowError or a
1427                # MemoryError with very large embedded files.
1428                with self.open(zinfo.filename, "r") as f:
1429                    while f.read(chunk_size):     # Check CRC-32
1430                        pass
1431            except BadZipFile:
1432                return zinfo.filename
1433
1434    def getinfo(self, name):
1435        """Return the instance of ZipInfo given 'name'."""
1436        info = self.NameToInfo.get(name)
1437        if info is None:
1438            raise KeyError(
1439                'There is no item named %r in the archive' % name)
1440
1441        return info
1442
1443    def setpassword(self, pwd):
1444        """Set default password for encrypted files."""
1445        if pwd and not isinstance(pwd, bytes):
1446            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1447        if pwd:
1448            self.pwd = pwd
1449        else:
1450            self.pwd = None
1451
1452    @property
1453    def comment(self):
1454        """The comment text associated with the ZIP file."""
1455        return self._comment
1456
1457    @comment.setter
1458    def comment(self, comment):
1459        if not isinstance(comment, bytes):
1460            raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1461        # check for valid comment length
1462        if len(comment) > ZIP_MAX_COMMENT:
1463            import warnings
1464            warnings.warn('Archive comment is too long; truncating to %d bytes'
1465                          % ZIP_MAX_COMMENT, stacklevel=2)
1466            comment = comment[:ZIP_MAX_COMMENT]
1467        self._comment = comment
1468        self._didModify = True
1469
1470    def read(self, name, pwd=None):
1471        """Return file bytes for name."""
1472        with self.open(name, "r", pwd) as fp:
1473            return fp.read()
1474
1475    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1476        """Return file-like object for 'name'.
1477
1478        name is a string for the file name within the ZIP file, or a ZipInfo
1479        object.
1480
1481        mode should be 'r' to read a file already in the ZIP file, or 'w' to
1482        write to a file newly added to the archive.
1483
1484        pwd is the password to decrypt files (only used for reading).
1485
1486        When writing, if the file size is not known in advance but may exceed
1487        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1488        files.  If the size is known in advance, it is best to pass a ZipInfo
1489        instance for name, with zinfo.file_size set.
1490        """
1491        if mode not in {"r", "w"}:
1492            raise ValueError('open() requires mode "r" or "w"')
1493        if pwd and not isinstance(pwd, bytes):
1494            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1495        if pwd and (mode == "w"):
1496            raise ValueError("pwd is only supported for reading files")
1497        if not self.fp:
1498            raise ValueError(
1499                "Attempt to use ZIP archive that was already closed")
1500
1501        # Make sure we have an info object
1502        if isinstance(name, ZipInfo):
1503            # 'name' is already an info object
1504            zinfo = name
1505        elif mode == 'w':
1506            zinfo = ZipInfo(name)
1507            zinfo.compress_type = self.compression
1508            zinfo._compresslevel = self.compresslevel
1509        else:
1510            # Get info object for name
1511            zinfo = self.getinfo(name)
1512
1513        if mode == 'w':
1514            return self._open_to_write(zinfo, force_zip64=force_zip64)
1515
1516        if self._writing:
1517            raise ValueError("Can't read from the ZIP file while there "
1518                    "is an open writing handle on it. "
1519                    "Close the writing handle before trying to read.")
1520
1521        # Open for reading:
1522        self._fileRefCnt += 1
1523        zef_file = _SharedFile(self.fp, zinfo.header_offset,
1524                               self._fpclose, self._lock, lambda: self._writing)
1525        try:
1526            # Skip the file header:
1527            fheader = zef_file.read(sizeFileHeader)
1528            if len(fheader) != sizeFileHeader:
1529                raise BadZipFile("Truncated file header")
1530            fheader = struct.unpack(structFileHeader, fheader)
1531            if fheader[_FH_SIGNATURE] != stringFileHeader:
1532                raise BadZipFile("Bad magic number for file header")
1533
1534            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1535            if fheader[_FH_EXTRA_FIELD_LENGTH]:
1536                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1537
1538            if zinfo.flag_bits & 0x20:
1539                # Zip 2.7: compressed patched data
1540                raise NotImplementedError("compressed patched data (flag bit 5)")
1541
1542            if zinfo.flag_bits & 0x40:
1543                # strong encryption
1544                raise NotImplementedError("strong encryption (flag bit 6)")
1545
1546            if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800:
1547                # UTF-8 filename
1548                fname_str = fname.decode("utf-8")
1549            else:
1550                fname_str = fname.decode("cp437")
1551
1552            if fname_str != zinfo.orig_filename:
1553                raise BadZipFile(
1554                    'File name in directory %r and header %r differ.'
1555                    % (zinfo.orig_filename, fname))
1556
1557            if (zinfo._end_offset is not None and
1558                zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
1559                raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)")
1560
1561            # check for encrypted flag & handle password
1562            is_encrypted = zinfo.flag_bits & 0x1
1563            if is_encrypted:
1564                if not pwd:
1565                    pwd = self.pwd
1566                if not pwd:
1567                    raise RuntimeError("File %r is encrypted, password "
1568                                       "required for extraction" % name)
1569            else:
1570                pwd = None
1571
1572            return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1573        except:
1574            zef_file.close()
1575            raise
1576
1577    def _open_to_write(self, zinfo, force_zip64=False):
1578        if force_zip64 and not self._allowZip64:
1579            raise ValueError(
1580                "force_zip64 is True, but allowZip64 was False when opening "
1581                "the ZIP file."
1582            )
1583        if self._writing:
1584            raise ValueError("Can't write to the ZIP file while there is "
1585                             "another write handle open on it. "
1586                             "Close the first handle before opening another.")
1587
1588        # Size and CRC are overwritten with correct data after processing the file
1589        zinfo.compress_size = 0
1590        zinfo.CRC = 0
1591
1592        zinfo.flag_bits = 0x00
1593        if zinfo.compress_type == ZIP_LZMA:
1594            # Compressed data includes an end-of-stream (EOS) marker
1595            zinfo.flag_bits |= 0x02
1596        if not self._seekable:
1597            zinfo.flag_bits |= 0x08
1598
1599        if not zinfo.external_attr:
1600            zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
1601
1602        # Compressed size can be larger than uncompressed size
1603        zip64 = self._allowZip64 and \
1604                (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1605
1606        if self._seekable:
1607            self.fp.seek(self.start_dir)
1608        zinfo.header_offset = self.fp.tell()
1609
1610        self._writecheck(zinfo)
1611        self._didModify = True
1612
1613        self.fp.write(zinfo.FileHeader(zip64))
1614
1615        self._writing = True
1616        return _ZipWriteFile(self, zinfo, zip64)
1617
1618    def extract(self, member, path=None, pwd=None):
1619        """Extract a member from the archive to the current working directory,
1620           using its full name. Its file information is extracted as accurately
1621           as possible. `member' may be a filename or a ZipInfo object. You can
1622           specify a different directory using `path'.
1623        """
1624        if path is None:
1625            path = os.getcwd()
1626        else:
1627            path = os.fspath(path)
1628
1629        return self._extract_member(member, path, pwd)
1630
1631    def extractall(self, path=None, members=None, pwd=None):
1632        """Extract all members from the archive to the current working
1633           directory. `path' specifies a different directory to extract to.
1634           `members' is optional and must be a subset of the list returned
1635           by namelist().
1636        """
1637        if members is None:
1638            members = self.namelist()
1639
1640        if path is None:
1641            path = os.getcwd()
1642        else:
1643            path = os.fspath(path)
1644
1645        for zipinfo in members:
1646            self._extract_member(zipinfo, path, pwd)
1647
1648    @classmethod
1649    def _sanitize_windows_name(cls, arcname, pathsep):
1650        """Replace bad characters and remove trailing dots from parts."""
1651        table = cls._windows_illegal_name_trans_table
1652        if not table:
1653            illegal = ':<>|"?*'
1654            table = str.maketrans(illegal, '_' * len(illegal))
1655            cls._windows_illegal_name_trans_table = table
1656        arcname = arcname.translate(table)
1657        # remove trailing dots
1658        arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1659        # rejoin, removing empty parts.
1660        arcname = pathsep.join(x for x in arcname if x)
1661        return arcname
1662
1663    def _extract_member(self, member, targetpath, pwd):
1664        """Extract the ZipInfo object 'member' to a physical
1665           file on the path targetpath.
1666        """
1667        if not isinstance(member, ZipInfo):
1668            member = self.getinfo(member)
1669
1670        # build the destination pathname, replacing
1671        # forward slashes to platform specific separators.
1672        arcname = member.filename.replace('/', os.path.sep)
1673
1674        if os.path.altsep:
1675            arcname = arcname.replace(os.path.altsep, os.path.sep)
1676        # interpret absolute pathname as relative, remove drive letter or
1677        # UNC path, redundant separators, "." and ".." components.
1678        arcname = os.path.splitdrive(arcname)[1]
1679        invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1680        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1681                                   if x not in invalid_path_parts)
1682        if os.path.sep == '\\':
1683            # filter illegal characters on Windows
1684            arcname = self._sanitize_windows_name(arcname, os.path.sep)
1685
1686        targetpath = os.path.join(targetpath, arcname)
1687        targetpath = os.path.normpath(targetpath)
1688
1689        # Create all upper directories if necessary.
1690        upperdirs = os.path.dirname(targetpath)
1691        if upperdirs and not os.path.exists(upperdirs):
1692            os.makedirs(upperdirs)
1693
1694        if member.is_dir():
1695            if not os.path.isdir(targetpath):
1696                os.mkdir(targetpath)
1697            return targetpath
1698
1699        with self.open(member, pwd=pwd) as source, \
1700             open(targetpath, "wb") as target:
1701            shutil.copyfileobj(source, target)
1702
1703        return targetpath
1704
1705    def _writecheck(self, zinfo):
1706        """Check for errors before writing a file to the archive."""
1707        if zinfo.filename in self.NameToInfo:
1708            import warnings
1709            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1710        if self.mode not in ('w', 'x', 'a'):
1711            raise ValueError("write() requires mode 'w', 'x', or 'a'")
1712        if not self.fp:
1713            raise ValueError(
1714                "Attempt to write ZIP archive that was already closed")
1715        _check_compression(zinfo.compress_type)
1716        if not self._allowZip64:
1717            requires_zip64 = None
1718            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1719                requires_zip64 = "Files count"
1720            elif zinfo.file_size > ZIP64_LIMIT:
1721                requires_zip64 = "Filesize"
1722            elif zinfo.header_offset > ZIP64_LIMIT:
1723                requires_zip64 = "Zipfile size"
1724            if requires_zip64:
1725                raise LargeZipFile(requires_zip64 +
1726                                   " would require ZIP64 extensions")
1727
1728    def write(self, filename, arcname=None,
1729              compress_type=None, compresslevel=None):
1730        """Put the bytes from filename into the archive under the name
1731        arcname."""
1732        if not self.fp:
1733            raise ValueError(
1734                "Attempt to write to ZIP archive that was already closed")
1735        if self._writing:
1736            raise ValueError(
1737                "Can't write to ZIP archive while an open writing handle exists"
1738            )
1739
1740        zinfo = ZipInfo.from_file(filename, arcname,
1741                                  strict_timestamps=self._strict_timestamps)
1742
1743        if zinfo.is_dir():
1744            zinfo.compress_size = 0
1745            zinfo.CRC = 0
1746        else:
1747            if compress_type is not None:
1748                zinfo.compress_type = compress_type
1749            else:
1750                zinfo.compress_type = self.compression
1751
1752            if compresslevel is not None:
1753                zinfo._compresslevel = compresslevel
1754            else:
1755                zinfo._compresslevel = self.compresslevel
1756
1757        if zinfo.is_dir():
1758            with self._lock:
1759                if self._seekable:
1760                    self.fp.seek(self.start_dir)
1761                zinfo.header_offset = self.fp.tell()  # Start of header bytes
1762                if zinfo.compress_type == ZIP_LZMA:
1763                # Compressed data includes an end-of-stream (EOS) marker
1764                    zinfo.flag_bits |= 0x02
1765
1766                self._writecheck(zinfo)
1767                self._didModify = True
1768
1769                self.filelist.append(zinfo)
1770                self.NameToInfo[zinfo.filename] = zinfo
1771                self.fp.write(zinfo.FileHeader(False))
1772                self.start_dir = self.fp.tell()
1773        else:
1774            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1775                shutil.copyfileobj(src, dest, 1024*8)
1776
1777    def writestr(self, zinfo_or_arcname, data,
1778                 compress_type=None, compresslevel=None):
1779        """Write a file into the archive.  The contents is 'data', which
1780        may be either a 'str' or a 'bytes' instance; if it is a 'str',
1781        it is encoded as UTF-8 first.
1782        'zinfo_or_arcname' is either a ZipInfo instance or
1783        the name of the file in the archive."""
1784        if isinstance(data, str):
1785            data = data.encode("utf-8")
1786        if not isinstance(zinfo_or_arcname, ZipInfo):
1787            zinfo = ZipInfo(filename=zinfo_or_arcname,
1788                            date_time=time.localtime(time.time())[:6])
1789            zinfo.compress_type = self.compression
1790            zinfo._compresslevel = self.compresslevel
1791            if zinfo.filename[-1] == '/':
1792                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1793                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1794            else:
1795                zinfo.external_attr = 0o600 << 16     # ?rw-------
1796        else:
1797            zinfo = zinfo_or_arcname
1798
1799        if not self.fp:
1800            raise ValueError(
1801                "Attempt to write to ZIP archive that was already closed")
1802        if self._writing:
1803            raise ValueError(
1804                "Can't write to ZIP archive while an open writing handle exists."
1805            )
1806
1807        if compress_type is not None:
1808            zinfo.compress_type = compress_type
1809
1810        if compresslevel is not None:
1811            zinfo._compresslevel = compresslevel
1812
1813        zinfo.file_size = len(data)            # Uncompressed size
1814        with self._lock:
1815            with self.open(zinfo, mode='w') as dest:
1816                dest.write(data)
1817
1818    def __del__(self):
1819        """Call the "close()" method in case the user forgot."""
1820        self.close()
1821
1822    def close(self):
1823        """Close the file, and for mode 'w', 'x' and 'a' write the ending
1824        records."""
1825        if self.fp is None:
1826            return
1827
1828        if self._writing:
1829            raise ValueError("Can't close the ZIP file while there is "
1830                             "an open writing handle on it. "
1831                             "Close the writing handle before closing the zip.")
1832
1833        try:
1834            if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1835                with self._lock:
1836                    if self._seekable:
1837                        self.fp.seek(self.start_dir)
1838                    self._write_end_record()
1839        finally:
1840            fp = self.fp
1841            self.fp = None
1842            self._fpclose(fp)
1843
1844    def _write_end_record(self):
1845        for zinfo in self.filelist:         # write central directory
1846            dt = zinfo.date_time
1847            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1848            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1849            extra = []
1850            if zinfo.file_size > ZIP64_LIMIT \
1851               or zinfo.compress_size > ZIP64_LIMIT:
1852                extra.append(zinfo.file_size)
1853                extra.append(zinfo.compress_size)
1854                file_size = 0xffffffff
1855                compress_size = 0xffffffff
1856            else:
1857                file_size = zinfo.file_size
1858                compress_size = zinfo.compress_size
1859
1860            if zinfo.header_offset > ZIP64_LIMIT:
1861                extra.append(zinfo.header_offset)
1862                header_offset = 0xffffffff
1863            else:
1864                header_offset = zinfo.header_offset
1865
1866            extra_data = zinfo.extra
1867            min_version = 0
1868            if extra:
1869                # Append a ZIP64 field to the extra's
1870                extra_data = _strip_extra(extra_data, (1,))
1871                extra_data = struct.pack(
1872                    '<HH' + 'Q'*len(extra),
1873                    1, 8*len(extra), *extra) + extra_data
1874
1875                min_version = ZIP64_VERSION
1876
1877            if zinfo.compress_type == ZIP_BZIP2:
1878                min_version = max(BZIP2_VERSION, min_version)
1879            elif zinfo.compress_type == ZIP_LZMA:
1880                min_version = max(LZMA_VERSION, min_version)
1881
1882            extract_version = max(min_version, zinfo.extract_version)
1883            create_version = max(min_version, zinfo.create_version)
1884            filename, flag_bits = zinfo._encodeFilenameFlags()
1885            centdir = struct.pack(structCentralDir,
1886                                  stringCentralDir, create_version,
1887                                  zinfo.create_system, extract_version, zinfo.reserved,
1888                                  flag_bits, zinfo.compress_type, dostime, dosdate,
1889                                  zinfo.CRC, compress_size, file_size,
1890                                  len(filename), len(extra_data), len(zinfo.comment),
1891                                  0, zinfo.internal_attr, zinfo.external_attr,
1892                                  header_offset)
1893            self.fp.write(centdir)
1894            self.fp.write(filename)
1895            self.fp.write(extra_data)
1896            self.fp.write(zinfo.comment)
1897
1898        pos2 = self.fp.tell()
1899        # Write end-of-zip-archive record
1900        centDirCount = len(self.filelist)
1901        centDirSize = pos2 - self.start_dir
1902        centDirOffset = self.start_dir
1903        requires_zip64 = None
1904        if centDirCount > ZIP_FILECOUNT_LIMIT:
1905            requires_zip64 = "Files count"
1906        elif centDirOffset > ZIP64_LIMIT:
1907            requires_zip64 = "Central directory offset"
1908        elif centDirSize > ZIP64_LIMIT:
1909            requires_zip64 = "Central directory size"
1910        if requires_zip64:
1911            # Need to write the ZIP64 end-of-archive records
1912            if not self._allowZip64:
1913                raise LargeZipFile(requires_zip64 +
1914                                   " would require ZIP64 extensions")
1915            zip64endrec = struct.pack(
1916                structEndArchive64, stringEndArchive64,
1917                44, 45, 45, 0, 0, centDirCount, centDirCount,
1918                centDirSize, centDirOffset)
1919            self.fp.write(zip64endrec)
1920
1921            zip64locrec = struct.pack(
1922                structEndArchive64Locator,
1923                stringEndArchive64Locator, 0, pos2, 1)
1924            self.fp.write(zip64locrec)
1925            centDirCount = min(centDirCount, 0xFFFF)
1926            centDirSize = min(centDirSize, 0xFFFFFFFF)
1927            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1928
1929        endrec = struct.pack(structEndArchive, stringEndArchive,
1930                             0, 0, centDirCount, centDirCount,
1931                             centDirSize, centDirOffset, len(self._comment))
1932        self.fp.write(endrec)
1933        self.fp.write(self._comment)
1934        if self.mode == "a":
1935            self.fp.truncate()
1936        self.fp.flush()
1937
1938    def _fpclose(self, fp):
1939        assert self._fileRefCnt > 0
1940        self._fileRefCnt -= 1
1941        if not self._fileRefCnt and not self._filePassed:
1942            fp.close()
1943
1944
1945class PyZipFile(ZipFile):
1946    """Class to create ZIP archives with Python library files and packages."""
1947
1948    def __init__(self, file, mode="r", compression=ZIP_STORED,
1949                 allowZip64=True, optimize=-1):
1950        ZipFile.__init__(self, file, mode=mode, compression=compression,
1951                         allowZip64=allowZip64)
1952        self._optimize = optimize
1953
1954    def writepy(self, pathname, basename="", filterfunc=None):
1955        """Add all files from "pathname" to the ZIP archive.
1956
1957        If pathname is a package directory, search the directory and
1958        all package subdirectories recursively for all *.py and enter
1959        the modules into the archive.  If pathname is a plain
1960        directory, listdir *.py and enter all modules.  Else, pathname
1961        must be a Python *.py file and the module will be put into the
1962        archive.  Added modules are always module.pyc.
1963        This method will compile the module.py into module.pyc if
1964        necessary.
1965        If filterfunc(pathname) is given, it is called with every argument.
1966        When it is False, the file or directory is skipped.
1967        """
1968        pathname = os.fspath(pathname)
1969        if filterfunc and not filterfunc(pathname):
1970            if self.debug:
1971                label = 'path' if os.path.isdir(pathname) else 'file'
1972                print('%s %r skipped by filterfunc' % (label, pathname))
1973            return
1974        dir, name = os.path.split(pathname)
1975        if os.path.isdir(pathname):
1976            initname = os.path.join(pathname, "__init__.py")
1977            if os.path.isfile(initname):
1978                # This is a package directory, add it
1979                if basename:
1980                    basename = "%s/%s" % (basename, name)
1981                else:
1982                    basename = name
1983                if self.debug:
1984                    print("Adding package in", pathname, "as", basename)
1985                fname, arcname = self._get_codename(initname[0:-3], basename)
1986                if self.debug:
1987                    print("Adding", arcname)
1988                self.write(fname, arcname)
1989                dirlist = sorted(os.listdir(pathname))
1990                dirlist.remove("__init__.py")
1991                # Add all *.py files and package subdirectories
1992                for filename in dirlist:
1993                    path = os.path.join(pathname, filename)
1994                    root, ext = os.path.splitext(filename)
1995                    if os.path.isdir(path):
1996                        if os.path.isfile(os.path.join(path, "__init__.py")):
1997                            # This is a package directory, add it
1998                            self.writepy(path, basename,
1999                                         filterfunc=filterfunc)  # Recursive call
2000                    elif ext == ".py":
2001                        if filterfunc and not filterfunc(path):
2002                            if self.debug:
2003                                print('file %r skipped by filterfunc' % path)
2004                            continue
2005                        fname, arcname = self._get_codename(path[0:-3],
2006                                                            basename)
2007                        if self.debug:
2008                            print("Adding", arcname)
2009                        self.write(fname, arcname)
2010            else:
2011                # This is NOT a package directory, add its files at top level
2012                if self.debug:
2013                    print("Adding files from directory", pathname)
2014                for filename in sorted(os.listdir(pathname)):
2015                    path = os.path.join(pathname, filename)
2016                    root, ext = os.path.splitext(filename)
2017                    if ext == ".py":
2018                        if filterfunc and not filterfunc(path):
2019                            if self.debug:
2020                                print('file %r skipped by filterfunc' % path)
2021                            continue
2022                        fname, arcname = self._get_codename(path[0:-3],
2023                                                            basename)
2024                        if self.debug:
2025                            print("Adding", arcname)
2026                        self.write(fname, arcname)
2027        else:
2028            if pathname[-3:] != ".py":
2029                raise RuntimeError(
2030                    'Files added with writepy() must end with ".py"')
2031            fname, arcname = self._get_codename(pathname[0:-3], basename)
2032            if self.debug:
2033                print("Adding file", arcname)
2034            self.write(fname, arcname)
2035
2036    def _get_codename(self, pathname, basename):
2037        """Return (filename, archivename) for the path.
2038
2039        Given a module name path, return the correct file path and
2040        archive name, compiling if necessary.  For example, given
2041        /python/lib/string, return (/python/lib/string.pyc, string).
2042        """
2043        def _compile(file, optimize=-1):
2044            import py_compile
2045            if self.debug:
2046                print("Compiling", file)
2047            try:
2048                py_compile.compile(file, doraise=True, optimize=optimize)
2049            except py_compile.PyCompileError as err:
2050                print(err.msg)
2051                return False
2052            return True
2053
2054        file_py  = pathname + ".py"
2055        file_pyc = pathname + ".pyc"
2056        pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2057        pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2058        pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2059        if self._optimize == -1:
2060            # legacy mode: use whatever file is present
2061            if (os.path.isfile(file_pyc) and
2062                  os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2063                # Use .pyc file.
2064                arcname = fname = file_pyc
2065            elif (os.path.isfile(pycache_opt0) and
2066                  os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2067                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2068                # file name in the archive.
2069                fname = pycache_opt0
2070                arcname = file_pyc
2071            elif (os.path.isfile(pycache_opt1) and
2072                  os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2073                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2074                # file name in the archive.
2075                fname = pycache_opt1
2076                arcname = file_pyc
2077            elif (os.path.isfile(pycache_opt2) and
2078                  os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2079                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2080                # file name in the archive.
2081                fname = pycache_opt2
2082                arcname = file_pyc
2083            else:
2084                # Compile py into PEP 3147 pyc file.
2085                if _compile(file_py):
2086                    if sys.flags.optimize == 0:
2087                        fname = pycache_opt0
2088                    elif sys.flags.optimize == 1:
2089                        fname = pycache_opt1
2090                    else:
2091                        fname = pycache_opt2
2092                    arcname = file_pyc
2093                else:
2094                    fname = arcname = file_py
2095        else:
2096            # new mode: use given optimization level
2097            if self._optimize == 0:
2098                fname = pycache_opt0
2099                arcname = file_pyc
2100            else:
2101                arcname = file_pyc
2102                if self._optimize == 1:
2103                    fname = pycache_opt1
2104                elif self._optimize == 2:
2105                    fname = pycache_opt2
2106                else:
2107                    msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2108                    raise ValueError(msg)
2109            if not (os.path.isfile(fname) and
2110                    os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2111                if not _compile(file_py, optimize=self._optimize):
2112                    fname = arcname = file_py
2113        archivename = os.path.split(arcname)[1]
2114        if basename:
2115            archivename = "%s/%s" % (basename, archivename)
2116        return (fname, archivename)
2117
2118
2119def _parents(path):
2120    """
2121    Given a path with elements separated by
2122    posixpath.sep, generate all parents of that path.
2123
2124    >>> list(_parents('b/d'))
2125    ['b']
2126    >>> list(_parents('/b/d/'))
2127    ['/b']
2128    >>> list(_parents('b/d/f/'))
2129    ['b/d', 'b']
2130    >>> list(_parents('b'))
2131    []
2132    >>> list(_parents(''))
2133    []
2134    """
2135    return itertools.islice(_ancestry(path), 1, None)
2136
2137
2138def _ancestry(path):
2139    """
2140    Given a path with elements separated by
2141    posixpath.sep, generate all elements of that path
2142
2143    >>> list(_ancestry('b/d'))
2144    ['b/d', 'b']
2145    >>> list(_ancestry('/b/d/'))
2146    ['/b/d', '/b']
2147    >>> list(_ancestry('b/d/f/'))
2148    ['b/d/f', 'b/d', 'b']
2149    >>> list(_ancestry('b'))
2150    ['b']
2151    >>> list(_ancestry(''))
2152    []
2153    """
2154    path = path.rstrip(posixpath.sep)
2155    while path and path != posixpath.sep:
2156        yield path
2157        path, tail = posixpath.split(path)
2158
2159
2160_dedupe = dict.fromkeys
2161"""Deduplicate an iterable in original order"""
2162
2163
2164def _difference(minuend, subtrahend):
2165    """
2166    Return items in minuend not in subtrahend, retaining order
2167    with O(1) lookup.
2168    """
2169    return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2170
2171
2172class CompleteDirs(ZipFile):
2173    """
2174    A ZipFile subclass that ensures that implied directories
2175    are always included in the namelist.
2176    """
2177
2178    @staticmethod
2179    def _implied_dirs(names):
2180        parents = itertools.chain.from_iterable(map(_parents, names))
2181        as_dirs = (p + posixpath.sep for p in parents)
2182        return _dedupe(_difference(as_dirs, names))
2183
2184    def namelist(self):
2185        names = super(CompleteDirs, self).namelist()
2186        return names + list(self._implied_dirs(names))
2187
2188    def _name_set(self):
2189        return set(self.namelist())
2190
2191    def resolve_dir(self, name):
2192        """
2193        If the name represents a directory, return that name
2194        as a directory (with the trailing slash).
2195        """
2196        names = self._name_set()
2197        dirname = name + '/'
2198        dir_match = name not in names and dirname in names
2199        return dirname if dir_match else name
2200
2201    @classmethod
2202    def make(cls, source):
2203        """
2204        Given a source (filename or zipfile), return an
2205        appropriate CompleteDirs subclass.
2206        """
2207        if isinstance(source, CompleteDirs):
2208            return source
2209
2210        if not isinstance(source, ZipFile):
2211            return cls(source)
2212
2213        # Only allow for FastLookup when supplied zipfile is read-only
2214        if 'r' not in source.mode:
2215            cls = CompleteDirs
2216
2217        source.__class__ = cls
2218        return source
2219
2220
2221class FastLookup(CompleteDirs):
2222    """
2223    ZipFile subclass to ensure implicit
2224    dirs exist and are resolved rapidly.
2225    """
2226
2227    def namelist(self):
2228        with contextlib.suppress(AttributeError):
2229            return self.__names
2230        self.__names = super(FastLookup, self).namelist()
2231        return self.__names
2232
2233    def _name_set(self):
2234        with contextlib.suppress(AttributeError):
2235            return self.__lookup
2236        self.__lookup = super(FastLookup, self)._name_set()
2237        return self.__lookup
2238
2239
2240class Path:
2241    """
2242    A pathlib-compatible interface for zip files.
2243
2244    Consider a zip file with this structure::
2245
2246        .
2247        ├── a.txt
2248        └── b
2249            ├── c.txt
2250            └── d
2251                └── e.txt
2252
2253    >>> data = io.BytesIO()
2254    >>> zf = ZipFile(data, 'w')
2255    >>> zf.writestr('a.txt', 'content of a')
2256    >>> zf.writestr('b/c.txt', 'content of c')
2257    >>> zf.writestr('b/d/e.txt', 'content of e')
2258    >>> zf.filename = 'mem/abcde.zip'
2259
2260    Path accepts the zipfile object itself or a filename
2261
2262    >>> root = Path(zf)
2263
2264    From there, several path operations are available.
2265
2266    Directory iteration (including the zip file itself):
2267
2268    >>> a, b = root.iterdir()
2269    >>> a
2270    Path('mem/abcde.zip', 'a.txt')
2271    >>> b
2272    Path('mem/abcde.zip', 'b/')
2273
2274    name property:
2275
2276    >>> b.name
2277    'b'
2278
2279    join with divide operator:
2280
2281    >>> c = b / 'c.txt'
2282    >>> c
2283    Path('mem/abcde.zip', 'b/c.txt')
2284    >>> c.name
2285    'c.txt'
2286
2287    Read text:
2288
2289    >>> c.read_text()
2290    'content of c'
2291
2292    existence:
2293
2294    >>> c.exists()
2295    True
2296    >>> (b / 'missing.txt').exists()
2297    False
2298
2299    Coercion to string:
2300
2301    >>> import os
2302    >>> str(c).replace(os.sep, posixpath.sep)
2303    'mem/abcde.zip/b/c.txt'
2304
2305    At the root, ``name``, ``filename``, and ``parent``
2306    resolve to the zipfile. Note these attributes are not
2307    valid and will raise a ``ValueError`` if the zipfile
2308    has no filename.
2309
2310    >>> root.name
2311    'abcde.zip'
2312    >>> str(root.filename).replace(os.sep, posixpath.sep)
2313    'mem/abcde.zip'
2314    >>> str(root.parent)
2315    'mem'
2316    """
2317
2318    __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2319
2320    def __init__(self, root, at=""):
2321        """
2322        Construct a Path from a ZipFile or filename.
2323
2324        Note: When the source is an existing ZipFile object,
2325        its type (__class__) will be mutated to a
2326        specialized type. If the caller wishes to retain the
2327        original type, the caller should either create a
2328        separate ZipFile object or pass a filename.
2329        """
2330        self.root = FastLookup.make(root)
2331        self.at = at
2332
2333    def open(self, mode='r', *args, pwd=None, **kwargs):
2334        """
2335        Open this entry as text or binary following the semantics
2336        of ``pathlib.Path.open()`` by passing arguments through
2337        to io.TextIOWrapper().
2338        """
2339        if self.is_dir():
2340            raise IsADirectoryError(self)
2341        zip_mode = mode[0]
2342        if not self.exists() and zip_mode == 'r':
2343            raise FileNotFoundError(self)
2344        stream = self.root.open(self.at, zip_mode, pwd=pwd)
2345        if 'b' in mode:
2346            if args or kwargs:
2347                raise ValueError("encoding args invalid for binary operation")
2348            return stream
2349        else:
2350            kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
2351        return io.TextIOWrapper(stream, *args, **kwargs)
2352
2353    @property
2354    def name(self):
2355        return pathlib.Path(self.at).name or self.filename.name
2356
2357    @property
2358    def filename(self):
2359        return pathlib.Path(self.root.filename).joinpath(self.at)
2360
2361    def read_text(self, *args, **kwargs):
2362        kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
2363        with self.open('r', *args, **kwargs) as strm:
2364            return strm.read()
2365
2366    def read_bytes(self):
2367        with self.open('rb') as strm:
2368            return strm.read()
2369
2370    def _is_child(self, path):
2371        return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2372
2373    def _next(self, at):
2374        return self.__class__(self.root, at)
2375
2376    def is_dir(self):
2377        return not self.at or self.at.endswith("/")
2378
2379    def is_file(self):
2380        return self.exists() and not self.is_dir()
2381
2382    def exists(self):
2383        return self.at in self.root._name_set()
2384
2385    def iterdir(self):
2386        if not self.is_dir():
2387            raise ValueError("Can't listdir a file")
2388        subs = map(self._next, self.root.namelist())
2389        return filter(self._is_child, subs)
2390
2391    def __str__(self):
2392        return posixpath.join(self.root.filename, self.at)
2393
2394    def __repr__(self):
2395        return self.__repr.format(self=self)
2396
2397    def joinpath(self, *other):
2398        next = posixpath.join(self.at, *other)
2399        return self._next(self.root.resolve_dir(next))
2400
2401    __truediv__ = joinpath
2402
2403    @property
2404    def parent(self):
2405        if not self.at:
2406            return self.filename.parent
2407        parent_at = posixpath.dirname(self.at.rstrip('/'))
2408        if parent_at:
2409            parent_at += '/'
2410        return self._next(parent_at)
2411
2412
2413def main(args=None):
2414    import argparse
2415
2416    description = 'A simple command-line interface for zipfile module.'
2417    parser = argparse.ArgumentParser(description=description)
2418    group = parser.add_mutually_exclusive_group(required=True)
2419    group.add_argument('-l', '--list', metavar='<zipfile>',
2420                       help='Show listing of a zipfile')
2421    group.add_argument('-e', '--extract', nargs=2,
2422                       metavar=('<zipfile>', '<output_dir>'),
2423                       help='Extract zipfile into target dir')
2424    group.add_argument('-c', '--create', nargs='+',
2425                       metavar=('<name>', '<file>'),
2426                       help='Create zipfile from sources')
2427    group.add_argument('-t', '--test', metavar='<zipfile>',
2428                       help='Test if a zipfile is valid')
2429    args = parser.parse_args(args)
2430
2431    if args.test is not None:
2432        src = args.test
2433        with ZipFile(src, 'r') as zf:
2434            badfile = zf.testzip()
2435        if badfile:
2436            print("The following enclosed file is corrupted: {!r}".format(badfile))
2437        print("Done testing")
2438
2439    elif args.list is not None:
2440        src = args.list
2441        with ZipFile(src, 'r') as zf:
2442            zf.printdir()
2443
2444    elif args.extract is not None:
2445        src, curdir = args.extract
2446        with ZipFile(src, 'r') as zf:
2447            zf.extractall(curdir)
2448
2449    elif args.create is not None:
2450        zip_name = args.create.pop(0)
2451        files = args.create
2452
2453        def addToZip(zf, path, zippath):
2454            if os.path.isfile(path):
2455                zf.write(path, zippath, ZIP_DEFLATED)
2456            elif os.path.isdir(path):
2457                if zippath:
2458                    zf.write(path, zippath)
2459                for nm in sorted(os.listdir(path)):
2460                    addToZip(zf,
2461                             os.path.join(path, nm), os.path.join(zippath, nm))
2462            # else: ignore
2463
2464        with ZipFile(zip_name, 'w') as zf:
2465            for path in files:
2466                zippath = os.path.basename(path)
2467                if not zippath:
2468                    zippath = os.path.basename(os.path.dirname(path))
2469                if zippath in ('', os.curdir, os.pardir):
2470                    zippath = ''
2471                addToZip(zf, path, zippath)
2472
2473
2474if __name__ == "__main__":
2475    main()
2476