• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2Read and write ZIP files.
3
4XXX references to utf-8 need further investigation.
5"""
6import binascii
7import importlib.util
8import io
9import itertools
10import os
11import posixpath
12import shutil
13import stat
14import struct
15import sys
16import threading
17import time
18import contextlib
19import pathlib
20
21try:
22    import zlib # We may need its compression method
23    crc32 = zlib.crc32
24except ImportError:
25    zlib = None
26    crc32 = binascii.crc32
27
28try:
29    import bz2 # We may need its compression method
30except ImportError:
31    bz2 = None
32
33try:
34    import lzma # We may need its compression method
35except ImportError:
36    lzma = None
37
38__all__ = ["BadZipFile", "BadZipfile", "error",
39           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
40           "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
41           "Path"]
42
43class BadZipFile(Exception):
44    pass
45
46
47class LargeZipFile(Exception):
48    """
49    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50    and those extensions are disabled.
51    """
52
53error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
54
55
56ZIP64_LIMIT = (1 << 31) - 1
57ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
58ZIP_MAX_COMMENT = (1 << 16) - 1
59
60# constants for Zip file compression methods
61ZIP_STORED = 0
62ZIP_DEFLATED = 8
63ZIP_BZIP2 = 12
64ZIP_LZMA = 14
65# Other ZIP compression methods not supported
66
67DEFAULT_VERSION = 20
68ZIP64_VERSION = 45
69BZIP2_VERSION = 46
70LZMA_VERSION = 63
71# we recognize (but not necessarily support) all features up to that version
72MAX_EXTRACT_VERSION = 63
73
74# Below are some formats and associated data for reading/writing headers using
75# the struct module.  The names and structures of headers/records are those used
76# in the PKWARE description of the ZIP file format:
77#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78# (URL valid as of January 2008)
79
80# The "end of central directory" structure, magic number, size, and indices
81# (section V.I in the format document)
82structEndArchive = b"<4s4H2LH"
83stringEndArchive = b"PK\005\006"
84sizeEndCentDir = struct.calcsize(structEndArchive)
85
86_ECD_SIGNATURE = 0
87_ECD_DISK_NUMBER = 1
88_ECD_DISK_START = 2
89_ECD_ENTRIES_THIS_DISK = 3
90_ECD_ENTRIES_TOTAL = 4
91_ECD_SIZE = 5
92_ECD_OFFSET = 6
93_ECD_COMMENT_SIZE = 7
94# These last two indices are not part of the structure as defined in the
95# spec, but they are used internally by this module as a convenience
96_ECD_COMMENT = 8
97_ECD_LOCATION = 9
98
99# The "central directory" structure, magic number, size, and indices
100# of entries in the structure (section V.F in the format document)
101structCentralDir = "<4s4B4HL2L5H2L"
102stringCentralDir = b"PK\001\002"
103sizeCentralDir = struct.calcsize(structCentralDir)
104
105# indexes of entries in the central directory structure
106_CD_SIGNATURE = 0
107_CD_CREATE_VERSION = 1
108_CD_CREATE_SYSTEM = 2
109_CD_EXTRACT_VERSION = 3
110_CD_EXTRACT_SYSTEM = 4
111_CD_FLAG_BITS = 5
112_CD_COMPRESS_TYPE = 6
113_CD_TIME = 7
114_CD_DATE = 8
115_CD_CRC = 9
116_CD_COMPRESSED_SIZE = 10
117_CD_UNCOMPRESSED_SIZE = 11
118_CD_FILENAME_LENGTH = 12
119_CD_EXTRA_FIELD_LENGTH = 13
120_CD_COMMENT_LENGTH = 14
121_CD_DISK_NUMBER_START = 15
122_CD_INTERNAL_FILE_ATTRIBUTES = 16
123_CD_EXTERNAL_FILE_ATTRIBUTES = 17
124_CD_LOCAL_HEADER_OFFSET = 18
125
126# The "local file header" structure, magic number, size, and indices
127# (section V.A in the format document)
128structFileHeader = "<4s2B4HL2L2H"
129stringFileHeader = b"PK\003\004"
130sizeFileHeader = struct.calcsize(structFileHeader)
131
132_FH_SIGNATURE = 0
133_FH_EXTRACT_VERSION = 1
134_FH_EXTRACT_SYSTEM = 2
135_FH_GENERAL_PURPOSE_FLAG_BITS = 3
136_FH_COMPRESSION_METHOD = 4
137_FH_LAST_MOD_TIME = 5
138_FH_LAST_MOD_DATE = 6
139_FH_CRC = 7
140_FH_COMPRESSED_SIZE = 8
141_FH_UNCOMPRESSED_SIZE = 9
142_FH_FILENAME_LENGTH = 10
143_FH_EXTRA_FIELD_LENGTH = 11
144
145# The "Zip64 end of central directory locator" structure, magic number, and size
146structEndArchive64Locator = "<4sLQL"
147stringEndArchive64Locator = b"PK\x06\x07"
148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
149
150# The "Zip64 end of central directory" record, magic number, size, and indices
151# (section V.G in the format document)
152structEndArchive64 = "<4sQ2H2L4Q"
153stringEndArchive64 = b"PK\x06\x06"
154sizeEndCentDir64 = struct.calcsize(structEndArchive64)
155
156_CD64_SIGNATURE = 0
157_CD64_DIRECTORY_RECSIZE = 1
158_CD64_CREATE_VERSION = 2
159_CD64_EXTRACT_VERSION = 3
160_CD64_DISK_NUMBER = 4
161_CD64_DISK_NUMBER_START = 5
162_CD64_NUMBER_ENTRIES_THIS_DISK = 6
163_CD64_NUMBER_ENTRIES_TOTAL = 7
164_CD64_DIRECTORY_SIZE = 8
165_CD64_OFFSET_START_CENTDIR = 9
166
167_DD_SIGNATURE = 0x08074b50
168
169_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
170
171def _strip_extra(extra, xids):
172    # Remove Extra Fields with specified IDs.
173    unpack = _EXTRA_FIELD_STRUCT.unpack
174    modified = False
175    buffer = []
176    start = i = 0
177    while i + 4 <= len(extra):
178        xid, xlen = unpack(extra[i : i + 4])
179        j = i + 4 + xlen
180        if xid in xids:
181            if i != start:
182                buffer.append(extra[start : i])
183            start = j
184            modified = True
185        i = j
186    if not modified:
187        return extra
188    return b''.join(buffer)
189
190def _check_zipfile(fp):
191    try:
192        if _EndRecData(fp):
193            return True         # file has correct magic number
194    except OSError:
195        pass
196    return False
197
198def is_zipfile(filename):
199    """Quickly see if a file is a ZIP file by checking the magic number.
200
201    The filename argument may be a file or file-like object too.
202    """
203    result = False
204    try:
205        if hasattr(filename, "read"):
206            result = _check_zipfile(fp=filename)
207        else:
208            with open(filename, "rb") as fp:
209                result = _check_zipfile(fp)
210    except OSError:
211        pass
212    return result
213
214def _EndRecData64(fpin, offset, endrec):
215    """
216    Read the ZIP64 end-of-archive records and use that to update endrec
217    """
218    try:
219        fpin.seek(offset - sizeEndCentDir64Locator, 2)
220    except OSError:
221        # If the seek fails, the file is not large enough to contain a ZIP64
222        # end-of-archive record, so just return the end record we were given.
223        return endrec
224
225    data = fpin.read(sizeEndCentDir64Locator)
226    if len(data) != sizeEndCentDir64Locator:
227        return endrec
228    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
229    if sig != stringEndArchive64Locator:
230        return endrec
231
232    if diskno != 0 or disks > 1:
233        raise BadZipFile("zipfiles that span multiple disks are not supported")
234
235    # Assume no 'zip64 extensible data'
236    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
237    data = fpin.read(sizeEndCentDir64)
238    if len(data) != sizeEndCentDir64:
239        return endrec
240    sig, sz, create_version, read_version, disk_num, disk_dir, \
241        dircount, dircount2, dirsize, diroffset = \
242        struct.unpack(structEndArchive64, data)
243    if sig != stringEndArchive64:
244        return endrec
245
246    # Update the original endrec using data from the ZIP64 record
247    endrec[_ECD_SIGNATURE] = sig
248    endrec[_ECD_DISK_NUMBER] = disk_num
249    endrec[_ECD_DISK_START] = disk_dir
250    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
251    endrec[_ECD_ENTRIES_TOTAL] = dircount2
252    endrec[_ECD_SIZE] = dirsize
253    endrec[_ECD_OFFSET] = diroffset
254    return endrec
255
256
257def _EndRecData(fpin):
258    """Return data from the "End of Central Directory" record, or None.
259
260    The data is a list of the nine items in the ZIP "End of central dir"
261    record followed by a tenth item, the file seek offset of this record."""
262
263    # Determine file size
264    fpin.seek(0, 2)
265    filesize = fpin.tell()
266
267    # Check to see if this is ZIP file with no archive comment (the
268    # "end of central directory" structure should be the last item in the
269    # file if this is the case).
270    try:
271        fpin.seek(-sizeEndCentDir, 2)
272    except OSError:
273        return None
274    data = fpin.read()
275    if (len(data) == sizeEndCentDir and
276        data[0:4] == stringEndArchive and
277        data[-2:] == b"\000\000"):
278        # the signature is correct and there's no comment, unpack structure
279        endrec = struct.unpack(structEndArchive, data)
280        endrec=list(endrec)
281
282        # Append a blank comment and record start offset
283        endrec.append(b"")
284        endrec.append(filesize - sizeEndCentDir)
285
286        # Try to read the "Zip64 end of central directory" structure
287        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
288
289    # Either this is not a ZIP file, or it is a ZIP file with an archive
290    # comment.  Search the end of the file for the "end of central directory"
291    # record signature. The comment is the last item in the ZIP file and may be
292    # up to 64K long.  It is assumed that the "end of central directory" magic
293    # number does not appear in the comment.
294    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
295    fpin.seek(maxCommentStart, 0)
296    data = fpin.read()
297    start = data.rfind(stringEndArchive)
298    if start >= 0:
299        # found the magic number; attempt to unpack and interpret
300        recData = data[start:start+sizeEndCentDir]
301        if len(recData) != sizeEndCentDir:
302            # Zip file is corrupted.
303            return None
304        endrec = list(struct.unpack(structEndArchive, recData))
305        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
306        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
307        endrec.append(comment)
308        endrec.append(maxCommentStart + start)
309
310        # Try to read the "Zip64 end of central directory" structure
311        return _EndRecData64(fpin, maxCommentStart + start - filesize,
312                             endrec)
313
314    # Unable to find a valid end of central directory structure
315    return None
316
317
318class ZipInfo (object):
319    """Class with attributes describing each file in the ZIP archive."""
320
321    __slots__ = (
322        'orig_filename',
323        'filename',
324        'date_time',
325        'compress_type',
326        '_compresslevel',
327        'comment',
328        'extra',
329        'create_system',
330        'create_version',
331        'extract_version',
332        'reserved',
333        'flag_bits',
334        'volume',
335        'internal_attr',
336        'external_attr',
337        'header_offset',
338        'CRC',
339        'compress_size',
340        'file_size',
341        '_raw_time',
342    )
343
344    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
345        self.orig_filename = filename   # Original file name in archive
346
347        # Terminate the file name at the first null byte.  Null bytes in file
348        # names are used as tricks by viruses in archives.
349        null_byte = filename.find(chr(0))
350        if null_byte >= 0:
351            filename = filename[0:null_byte]
352        # This is used to ensure paths in generated ZIP files always use
353        # forward slashes as the directory separator, as required by the
354        # ZIP format specification.
355        if os.sep != "/" and os.sep in filename:
356            filename = filename.replace(os.sep, "/")
357
358        self.filename = filename        # Normalized file name
359        self.date_time = date_time      # year, month, day, hour, min, sec
360
361        if date_time[0] < 1980:
362            raise ValueError('ZIP does not support timestamps before 1980')
363
364        # Standard values:
365        self.compress_type = ZIP_STORED # Type of compression for the file
366        self._compresslevel = None      # Level for the compressor
367        self.comment = b""              # Comment for each file
368        self.extra = b""                # ZIP extra data
369        if sys.platform == 'win32':
370            self.create_system = 0          # System which created ZIP archive
371        else:
372            # Assume everything else is unix-y
373            self.create_system = 3          # System which created ZIP archive
374        self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
375        self.extract_version = DEFAULT_VERSION # Version needed to extract archive
376        self.reserved = 0               # Must be zero
377        self.flag_bits = 0              # ZIP flag bits
378        self.volume = 0                 # Volume number of file header
379        self.internal_attr = 0          # Internal attributes
380        self.external_attr = 0          # External file attributes
381        self.compress_size = 0          # Size of the compressed file
382        self.file_size = 0              # Size of the uncompressed file
383        # Other attributes are set by class ZipFile:
384        # header_offset         Byte offset to the file header
385        # CRC                   CRC-32 of the uncompressed file
386
387    def __repr__(self):
388        result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
389        if self.compress_type != ZIP_STORED:
390            result.append(' compress_type=%s' %
391                          compressor_names.get(self.compress_type,
392                                               self.compress_type))
393        hi = self.external_attr >> 16
394        lo = self.external_attr & 0xFFFF
395        if hi:
396            result.append(' filemode=%r' % stat.filemode(hi))
397        if lo:
398            result.append(' external_attr=%#x' % lo)
399        isdir = self.is_dir()
400        if not isdir or self.file_size:
401            result.append(' file_size=%r' % self.file_size)
402        if ((not isdir or self.compress_size) and
403            (self.compress_type != ZIP_STORED or
404             self.file_size != self.compress_size)):
405            result.append(' compress_size=%r' % self.compress_size)
406        result.append('>')
407        return ''.join(result)
408
409    def FileHeader(self, zip64=None):
410        """Return the per-file header as a bytes object."""
411        dt = self.date_time
412        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
413        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
414        if self.flag_bits & 0x08:
415            # Set these to zero because we write them after the file data
416            CRC = compress_size = file_size = 0
417        else:
418            CRC = self.CRC
419            compress_size = self.compress_size
420            file_size = self.file_size
421
422        extra = self.extra
423
424        min_version = 0
425        if zip64 is None:
426            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
427        if zip64:
428            fmt = '<HHQQ'
429            extra = extra + struct.pack(fmt,
430                                        1, struct.calcsize(fmt)-4, file_size, compress_size)
431        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
432            if not zip64:
433                raise LargeZipFile("Filesize would require ZIP64 extensions")
434            # File is larger than what fits into a 4 byte integer,
435            # fall back to the ZIP64 extension
436            file_size = 0xffffffff
437            compress_size = 0xffffffff
438            min_version = ZIP64_VERSION
439
440        if self.compress_type == ZIP_BZIP2:
441            min_version = max(BZIP2_VERSION, min_version)
442        elif self.compress_type == ZIP_LZMA:
443            min_version = max(LZMA_VERSION, min_version)
444
445        self.extract_version = max(min_version, self.extract_version)
446        self.create_version = max(min_version, self.create_version)
447        filename, flag_bits = self._encodeFilenameFlags()
448        header = struct.pack(structFileHeader, stringFileHeader,
449                             self.extract_version, self.reserved, flag_bits,
450                             self.compress_type, dostime, dosdate, CRC,
451                             compress_size, file_size,
452                             len(filename), len(extra))
453        return header + filename + extra
454
455    def _encodeFilenameFlags(self):
456        try:
457            return self.filename.encode('ascii'), self.flag_bits
458        except UnicodeEncodeError:
459            return self.filename.encode('utf-8'), self.flag_bits | 0x800
460
461    def _decodeExtra(self):
462        # Try to decode the extra field.
463        extra = self.extra
464        unpack = struct.unpack
465        while len(extra) >= 4:
466            tp, ln = unpack('<HH', extra[:4])
467            if ln+4 > len(extra):
468                raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
469            if tp == 0x0001:
470                data = extra[4:ln+4]
471                # ZIP64 extension (large files and/or large archives)
472                try:
473                    if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
474                        field = "File size"
475                        self.file_size, = unpack('<Q', data[:8])
476                        data = data[8:]
477                    if self.compress_size == 0xFFFF_FFFF:
478                        field = "Compress size"
479                        self.compress_size, = unpack('<Q', data[:8])
480                        data = data[8:]
481                    if self.header_offset == 0xFFFF_FFFF:
482                        field = "Header offset"
483                        self.header_offset, = unpack('<Q', data[:8])
484                except struct.error:
485                    raise BadZipFile(f"Corrupt zip64 extra field. "
486                                     f"{field} not found.") from None
487
488            extra = extra[ln+4:]
489
490    @classmethod
491    def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
492        """Construct an appropriate ZipInfo for a file on the filesystem.
493
494        filename should be the path to a file or directory on the filesystem.
495
496        arcname is the name which it will have within the archive (by default,
497        this will be the same as filename, but without a drive letter and with
498        leading path separators removed).
499        """
500        if isinstance(filename, os.PathLike):
501            filename = os.fspath(filename)
502        st = os.stat(filename)
503        isdir = stat.S_ISDIR(st.st_mode)
504        mtime = time.localtime(st.st_mtime)
505        date_time = mtime[0:6]
506        if not strict_timestamps and date_time[0] < 1980:
507            date_time = (1980, 1, 1, 0, 0, 0)
508        elif not strict_timestamps and date_time[0] > 2107:
509            date_time = (2107, 12, 31, 23, 59, 59)
510        # Create ZipInfo instance to store file information
511        if arcname is None:
512            arcname = filename
513        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
514        while arcname[0] in (os.sep, os.altsep):
515            arcname = arcname[1:]
516        if isdir:
517            arcname += '/'
518        zinfo = cls(arcname, date_time)
519        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
520        if isdir:
521            zinfo.file_size = 0
522            zinfo.external_attr |= 0x10  # MS-DOS directory flag
523        else:
524            zinfo.file_size = st.st_size
525
526        return zinfo
527
528    def is_dir(self):
529        """Return True if this archive member is a directory."""
530        return self.filename[-1] == '/'
531
532
533# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
534# internal keys. We noticed that a direct implementation is faster than
535# relying on binascii.crc32().
536
537_crctable = None
538def _gen_crc(crc):
539    for j in range(8):
540        if crc & 1:
541            crc = (crc >> 1) ^ 0xEDB88320
542        else:
543            crc >>= 1
544    return crc
545
546# ZIP supports a password-based form of encryption. Even though known
547# plaintext attacks have been found against it, it is still useful
548# to be able to get data out of such a file.
549#
550# Usage:
551#     zd = _ZipDecrypter(mypwd)
552#     plain_bytes = zd(cypher_bytes)
553
554def _ZipDecrypter(pwd):
555    key0 = 305419896
556    key1 = 591751049
557    key2 = 878082192
558
559    global _crctable
560    if _crctable is None:
561        _crctable = list(map(_gen_crc, range(256)))
562    crctable = _crctable
563
564    def crc32(ch, crc):
565        """Compute the CRC32 primitive on one byte."""
566        return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
567
568    def update_keys(c):
569        nonlocal key0, key1, key2
570        key0 = crc32(c, key0)
571        key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
572        key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
573        key2 = crc32(key1 >> 24, key2)
574
575    for p in pwd:
576        update_keys(p)
577
578    def decrypter(data):
579        """Decrypt a bytes object."""
580        result = bytearray()
581        append = result.append
582        for c in data:
583            k = key2 | 2
584            c ^= ((k * (k^1)) >> 8) & 0xFF
585            update_keys(c)
586            append(c)
587        return bytes(result)
588
589    return decrypter
590
591
592class LZMACompressor:
593
594    def __init__(self):
595        self._comp = None
596
597    def _init(self):
598        props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
599        self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
600            lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
601        ])
602        return struct.pack('<BBH', 9, 4, len(props)) + props
603
604    def compress(self, data):
605        if self._comp is None:
606            return self._init() + self._comp.compress(data)
607        return self._comp.compress(data)
608
609    def flush(self):
610        if self._comp is None:
611            return self._init() + self._comp.flush()
612        return self._comp.flush()
613
614
615class LZMADecompressor:
616
617    def __init__(self):
618        self._decomp = None
619        self._unconsumed = b''
620        self.eof = False
621
622    def decompress(self, data):
623        if self._decomp is None:
624            self._unconsumed += data
625            if len(self._unconsumed) <= 4:
626                return b''
627            psize, = struct.unpack('<H', self._unconsumed[2:4])
628            if len(self._unconsumed) <= 4 + psize:
629                return b''
630
631            self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
632                lzma._decode_filter_properties(lzma.FILTER_LZMA1,
633                                               self._unconsumed[4:4 + psize])
634            ])
635            data = self._unconsumed[4 + psize:]
636            del self._unconsumed
637
638        result = self._decomp.decompress(data)
639        self.eof = self._decomp.eof
640        return result
641
642
643compressor_names = {
644    0: 'store',
645    1: 'shrink',
646    2: 'reduce',
647    3: 'reduce',
648    4: 'reduce',
649    5: 'reduce',
650    6: 'implode',
651    7: 'tokenize',
652    8: 'deflate',
653    9: 'deflate64',
654    10: 'implode',
655    12: 'bzip2',
656    14: 'lzma',
657    18: 'terse',
658    19: 'lz77',
659    97: 'wavpack',
660    98: 'ppmd',
661}
662
663def _check_compression(compression):
664    if compression == ZIP_STORED:
665        pass
666    elif compression == ZIP_DEFLATED:
667        if not zlib:
668            raise RuntimeError(
669                "Compression requires the (missing) zlib module")
670    elif compression == ZIP_BZIP2:
671        if not bz2:
672            raise RuntimeError(
673                "Compression requires the (missing) bz2 module")
674    elif compression == ZIP_LZMA:
675        if not lzma:
676            raise RuntimeError(
677                "Compression requires the (missing) lzma module")
678    else:
679        raise NotImplementedError("That compression method is not supported")
680
681
682def _get_compressor(compress_type, compresslevel=None):
683    if compress_type == ZIP_DEFLATED:
684        if compresslevel is not None:
685            return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
686        return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
687    elif compress_type == ZIP_BZIP2:
688        if compresslevel is not None:
689            return bz2.BZ2Compressor(compresslevel)
690        return bz2.BZ2Compressor()
691    # compresslevel is ignored for ZIP_LZMA
692    elif compress_type == ZIP_LZMA:
693        return LZMACompressor()
694    else:
695        return None
696
697
698def _get_decompressor(compress_type):
699    _check_compression(compress_type)
700    if compress_type == ZIP_STORED:
701        return None
702    elif compress_type == ZIP_DEFLATED:
703        return zlib.decompressobj(-15)
704    elif compress_type == ZIP_BZIP2:
705        return bz2.BZ2Decompressor()
706    elif compress_type == ZIP_LZMA:
707        return LZMADecompressor()
708    else:
709        descr = compressor_names.get(compress_type)
710        if descr:
711            raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
712        else:
713            raise NotImplementedError("compression type %d" % (compress_type,))
714
715
716class _SharedFile:
717    def __init__(self, file, pos, close, lock, writing):
718        self._file = file
719        self._pos = pos
720        self._close = close
721        self._lock = lock
722        self._writing = writing
723        self.seekable = file.seekable
724        self.tell = file.tell
725
726    def seek(self, offset, whence=0):
727        with self._lock:
728            if self._writing():
729                raise ValueError("Can't reposition in the ZIP file while "
730                        "there is an open writing handle on it. "
731                        "Close the writing handle before trying to read.")
732            self._file.seek(offset, whence)
733            self._pos = self._file.tell()
734            return self._pos
735
736    def read(self, n=-1):
737        with self._lock:
738            if self._writing():
739                raise ValueError("Can't read from the ZIP file while there "
740                        "is an open writing handle on it. "
741                        "Close the writing handle before trying to read.")
742            self._file.seek(self._pos)
743            data = self._file.read(n)
744            self._pos = self._file.tell()
745            return data
746
747    def close(self):
748        if self._file is not None:
749            fileobj = self._file
750            self._file = None
751            self._close(fileobj)
752
753# Provide the tell method for unseekable stream
754class _Tellable:
755    def __init__(self, fp):
756        self.fp = fp
757        self.offset = 0
758
759    def write(self, data):
760        n = self.fp.write(data)
761        self.offset += n
762        return n
763
764    def tell(self):
765        return self.offset
766
767    def flush(self):
768        self.fp.flush()
769
770    def close(self):
771        self.fp.close()
772
773
774class ZipExtFile(io.BufferedIOBase):
775    """File-like object for reading an archive member.
776       Is returned by ZipFile.open().
777    """
778
779    # Max size supported by decompressor.
780    MAX_N = 1 << 31 - 1
781
782    # Read from compressed files in 4k blocks.
783    MIN_READ_SIZE = 4096
784
785    # Chunk size to read during seek
786    MAX_SEEK_READ = 1 << 24
787
788    def __init__(self, fileobj, mode, zipinfo, pwd=None,
789                 close_fileobj=False):
790        self._fileobj = fileobj
791        self._pwd = pwd
792        self._close_fileobj = close_fileobj
793
794        self._compress_type = zipinfo.compress_type
795        self._compress_left = zipinfo.compress_size
796        self._left = zipinfo.file_size
797
798        self._decompressor = _get_decompressor(self._compress_type)
799
800        self._eof = False
801        self._readbuffer = b''
802        self._offset = 0
803
804        self.newlines = None
805
806        self.mode = mode
807        self.name = zipinfo.filename
808
809        if hasattr(zipinfo, 'CRC'):
810            self._expected_crc = zipinfo.CRC
811            self._running_crc = crc32(b'')
812        else:
813            self._expected_crc = None
814
815        self._seekable = False
816        try:
817            if fileobj.seekable():
818                self._orig_compress_start = fileobj.tell()
819                self._orig_compress_size = zipinfo.compress_size
820                self._orig_file_size = zipinfo.file_size
821                self._orig_start_crc = self._running_crc
822                self._seekable = True
823        except AttributeError:
824            pass
825
826        self._decrypter = None
827        if pwd:
828            if zipinfo.flag_bits & 0x8:
829                # compare against the file type from extended local headers
830                check_byte = (zipinfo._raw_time >> 8) & 0xff
831            else:
832                # compare against the CRC otherwise
833                check_byte = (zipinfo.CRC >> 24) & 0xff
834            h = self._init_decrypter()
835            if h != check_byte:
836                raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
837
838
839    def _init_decrypter(self):
840        self._decrypter = _ZipDecrypter(self._pwd)
841        # The first 12 bytes in the cypher stream is an encryption header
842        #  used to strengthen the algorithm. The first 11 bytes are
843        #  completely random, while the 12th contains the MSB of the CRC,
844        #  or the MSB of the file time depending on the header type
845        #  and is used to check the correctness of the password.
846        header = self._fileobj.read(12)
847        self._compress_left -= 12
848        return self._decrypter(header)[11]
849
850    def __repr__(self):
851        result = ['<%s.%s' % (self.__class__.__module__,
852                              self.__class__.__qualname__)]
853        if not self.closed:
854            result.append(' name=%r mode=%r' % (self.name, self.mode))
855            if self._compress_type != ZIP_STORED:
856                result.append(' compress_type=%s' %
857                              compressor_names.get(self._compress_type,
858                                                   self._compress_type))
859        else:
860            result.append(' [closed]')
861        result.append('>')
862        return ''.join(result)
863
864    def readline(self, limit=-1):
865        """Read and return a line from the stream.
866
867        If limit is specified, at most limit bytes will be read.
868        """
869
870        if limit < 0:
871            # Shortcut common case - newline found in buffer.
872            i = self._readbuffer.find(b'\n', self._offset) + 1
873            if i > 0:
874                line = self._readbuffer[self._offset: i]
875                self._offset = i
876                return line
877
878        return io.BufferedIOBase.readline(self, limit)
879
880    def peek(self, n=1):
881        """Returns buffered bytes without advancing the position."""
882        if n > len(self._readbuffer) - self._offset:
883            chunk = self.read(n)
884            if len(chunk) > self._offset:
885                self._readbuffer = chunk + self._readbuffer[self._offset:]
886                self._offset = 0
887            else:
888                self._offset -= len(chunk)
889
890        # Return up to 512 bytes to reduce allocation overhead for tight loops.
891        return self._readbuffer[self._offset: self._offset + 512]
892
893    def readable(self):
894        if self.closed:
895            raise ValueError("I/O operation on closed file.")
896        return True
897
898    def read(self, n=-1):
899        """Read and return up to n bytes.
900        If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
901        """
902        if self.closed:
903            raise ValueError("read from closed file.")
904        if n is None or n < 0:
905            buf = self._readbuffer[self._offset:]
906            self._readbuffer = b''
907            self._offset = 0
908            while not self._eof:
909                buf += self._read1(self.MAX_N)
910            return buf
911
912        end = n + self._offset
913        if end < len(self._readbuffer):
914            buf = self._readbuffer[self._offset:end]
915            self._offset = end
916            return buf
917
918        n = end - len(self._readbuffer)
919        buf = self._readbuffer[self._offset:]
920        self._readbuffer = b''
921        self._offset = 0
922        while n > 0 and not self._eof:
923            data = self._read1(n)
924            if n < len(data):
925                self._readbuffer = data
926                self._offset = n
927                buf += data[:n]
928                break
929            buf += data
930            n -= len(data)
931        return buf
932
933    def _update_crc(self, newdata):
934        # Update the CRC using the given data.
935        if self._expected_crc is None:
936            # No need to compute the CRC if we don't have a reference value
937            return
938        self._running_crc = crc32(newdata, self._running_crc)
939        # Check the CRC if we're at the end of the file
940        if self._eof and self._running_crc != self._expected_crc:
941            raise BadZipFile("Bad CRC-32 for file %r" % self.name)
942
943    def read1(self, n):
944        """Read up to n bytes with at most one read() system call."""
945
946        if n is None or n < 0:
947            buf = self._readbuffer[self._offset:]
948            self._readbuffer = b''
949            self._offset = 0
950            while not self._eof:
951                data = self._read1(self.MAX_N)
952                if data:
953                    buf += data
954                    break
955            return buf
956
957        end = n + self._offset
958        if end < len(self._readbuffer):
959            buf = self._readbuffer[self._offset:end]
960            self._offset = end
961            return buf
962
963        n = end - len(self._readbuffer)
964        buf = self._readbuffer[self._offset:]
965        self._readbuffer = b''
966        self._offset = 0
967        if n > 0:
968            while not self._eof:
969                data = self._read1(n)
970                if n < len(data):
971                    self._readbuffer = data
972                    self._offset = n
973                    buf += data[:n]
974                    break
975                if data:
976                    buf += data
977                    break
978        return buf
979
980    def _read1(self, n):
981        # Read up to n compressed bytes with at most one read() system call,
982        # decrypt and decompress them.
983        if self._eof or n <= 0:
984            return b''
985
986        # Read from file.
987        if self._compress_type == ZIP_DEFLATED:
988            ## Handle unconsumed data.
989            data = self._decompressor.unconsumed_tail
990            if n > len(data):
991                data += self._read2(n - len(data))
992        else:
993            data = self._read2(n)
994
995        if self._compress_type == ZIP_STORED:
996            self._eof = self._compress_left <= 0
997        elif self._compress_type == ZIP_DEFLATED:
998            n = max(n, self.MIN_READ_SIZE)
999            data = self._decompressor.decompress(data, n)
1000            self._eof = (self._decompressor.eof or
1001                         self._compress_left <= 0 and
1002                         not self._decompressor.unconsumed_tail)
1003            if self._eof:
1004                data += self._decompressor.flush()
1005        else:
1006            data = self._decompressor.decompress(data)
1007            self._eof = self._decompressor.eof or self._compress_left <= 0
1008
1009        data = data[:self._left]
1010        self._left -= len(data)
1011        if self._left <= 0:
1012            self._eof = True
1013        self._update_crc(data)
1014        return data
1015
1016    def _read2(self, n):
1017        if self._compress_left <= 0:
1018            return b''
1019
1020        n = max(n, self.MIN_READ_SIZE)
1021        n = min(n, self._compress_left)
1022
1023        data = self._fileobj.read(n)
1024        self._compress_left -= len(data)
1025        if not data:
1026            raise EOFError
1027
1028        if self._decrypter is not None:
1029            data = self._decrypter(data)
1030        return data
1031
1032    def close(self):
1033        try:
1034            if self._close_fileobj:
1035                self._fileobj.close()
1036        finally:
1037            super().close()
1038
1039    def seekable(self):
1040        if self.closed:
1041            raise ValueError("I/O operation on closed file.")
1042        return self._seekable
1043
1044    def seek(self, offset, whence=0):
1045        if self.closed:
1046            raise ValueError("seek on closed file.")
1047        if not self._seekable:
1048            raise io.UnsupportedOperation("underlying stream is not seekable")
1049        curr_pos = self.tell()
1050        if whence == 0: # Seek from start of file
1051            new_pos = offset
1052        elif whence == 1: # Seek from current position
1053            new_pos = curr_pos + offset
1054        elif whence == 2: # Seek from EOF
1055            new_pos = self._orig_file_size + offset
1056        else:
1057            raise ValueError("whence must be os.SEEK_SET (0), "
1058                             "os.SEEK_CUR (1), or os.SEEK_END (2)")
1059
1060        if new_pos > self._orig_file_size:
1061            new_pos = self._orig_file_size
1062
1063        if new_pos < 0:
1064            new_pos = 0
1065
1066        read_offset = new_pos - curr_pos
1067        buff_offset = read_offset + self._offset
1068
1069        if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1070            # Just move the _offset index if the new position is in the _readbuffer
1071            self._offset = buff_offset
1072            read_offset = 0
1073        elif read_offset < 0:
1074            # Position is before the current position. Reset the ZipExtFile
1075            self._fileobj.seek(self._orig_compress_start)
1076            self._running_crc = self._orig_start_crc
1077            self._compress_left = self._orig_compress_size
1078            self._left = self._orig_file_size
1079            self._readbuffer = b''
1080            self._offset = 0
1081            self._decompressor = _get_decompressor(self._compress_type)
1082            self._eof = False
1083            read_offset = new_pos
1084            if self._decrypter is not None:
1085                self._init_decrypter()
1086
1087        while read_offset > 0:
1088            read_len = min(self.MAX_SEEK_READ, read_offset)
1089            self.read(read_len)
1090            read_offset -= read_len
1091
1092        return self.tell()
1093
1094    def tell(self):
1095        if self.closed:
1096            raise ValueError("tell on closed file.")
1097        if not self._seekable:
1098            raise io.UnsupportedOperation("underlying stream is not seekable")
1099        filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1100        return filepos
1101
1102
1103class _ZipWriteFile(io.BufferedIOBase):
1104    def __init__(self, zf, zinfo, zip64):
1105        self._zinfo = zinfo
1106        self._zip64 = zip64
1107        self._zipfile = zf
1108        self._compressor = _get_compressor(zinfo.compress_type,
1109                                           zinfo._compresslevel)
1110        self._file_size = 0
1111        self._compress_size = 0
1112        self._crc = 0
1113
1114    @property
1115    def _fileobj(self):
1116        return self._zipfile.fp
1117
1118    def writable(self):
1119        return True
1120
1121    def write(self, data):
1122        if self.closed:
1123            raise ValueError('I/O operation on closed file.')
1124
1125        # Accept any data that supports the buffer protocol
1126        if isinstance(data, (bytes, bytearray)):
1127            nbytes = len(data)
1128        else:
1129            data = memoryview(data)
1130            nbytes = data.nbytes
1131        self._file_size += nbytes
1132
1133        self._crc = crc32(data, self._crc)
1134        if self._compressor:
1135            data = self._compressor.compress(data)
1136            self._compress_size += len(data)
1137        self._fileobj.write(data)
1138        return nbytes
1139
1140    def close(self):
1141        if self.closed:
1142            return
1143        try:
1144            super().close()
1145            # Flush any data from the compressor, and update header info
1146            if self._compressor:
1147                buf = self._compressor.flush()
1148                self._compress_size += len(buf)
1149                self._fileobj.write(buf)
1150                self._zinfo.compress_size = self._compress_size
1151            else:
1152                self._zinfo.compress_size = self._file_size
1153            self._zinfo.CRC = self._crc
1154            self._zinfo.file_size = self._file_size
1155
1156            # Write updated header info
1157            if self._zinfo.flag_bits & 0x08:
1158                # Write CRC and file sizes after the file data
1159                fmt = '<LLQQ' if self._zip64 else '<LLLL'
1160                self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1161                    self._zinfo.compress_size, self._zinfo.file_size))
1162                self._zipfile.start_dir = self._fileobj.tell()
1163            else:
1164                if not self._zip64:
1165                    if self._file_size > ZIP64_LIMIT:
1166                        raise RuntimeError(
1167                            'File size unexpectedly exceeded ZIP64 limit')
1168                    if self._compress_size > ZIP64_LIMIT:
1169                        raise RuntimeError(
1170                            'Compressed size unexpectedly exceeded ZIP64 limit')
1171                # Seek backwards and write file header (which will now include
1172                # correct CRC and file sizes)
1173
1174                # Preserve current position in file
1175                self._zipfile.start_dir = self._fileobj.tell()
1176                self._fileobj.seek(self._zinfo.header_offset)
1177                self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1178                self._fileobj.seek(self._zipfile.start_dir)
1179
1180            # Successfully written: Add file to our caches
1181            self._zipfile.filelist.append(self._zinfo)
1182            self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1183        finally:
1184            self._zipfile._writing = False
1185
1186
1187
1188class ZipFile:
1189    """ Class with methods to open, read, write, close, list zip files.
1190
1191    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1192                compresslevel=None)
1193
1194    file: Either the path to the file, or a file-like object.
1195          If it is a path, the file will be opened and closed by ZipFile.
1196    mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1197          or append 'a'.
1198    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1199                 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1200    allowZip64: if True ZipFile will create files with ZIP64 extensions when
1201                needed, otherwise it will raise an exception when this would
1202                be necessary.
1203    compresslevel: None (default for the given compression type) or an integer
1204                   specifying the level to pass to the compressor.
1205                   When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1206                   When using ZIP_DEFLATED integers 0 through 9 are accepted.
1207                   When using ZIP_BZIP2 integers 1 through 9 are accepted.
1208
1209    """
1210
1211    fp = None                   # Set here since __del__ checks it
1212    _windows_illegal_name_trans_table = None
1213
1214    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1215                 compresslevel=None, *, strict_timestamps=True):
1216        """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1217        or append 'a'."""
1218        if mode not in ('r', 'w', 'x', 'a'):
1219            raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1220
1221        _check_compression(compression)
1222
1223        self._allowZip64 = allowZip64
1224        self._didModify = False
1225        self.debug = 0  # Level of printing: 0 through 3
1226        self.NameToInfo = {}    # Find file info given name
1227        self.filelist = []      # List of ZipInfo instances for archive
1228        self.compression = compression  # Method of compression
1229        self.compresslevel = compresslevel
1230        self.mode = mode
1231        self.pwd = None
1232        self._comment = b''
1233        self._strict_timestamps = strict_timestamps
1234
1235        # Check if we were passed a file-like object
1236        if isinstance(file, os.PathLike):
1237            file = os.fspath(file)
1238        if isinstance(file, str):
1239            # No, it's a filename
1240            self._filePassed = 0
1241            self.filename = file
1242            modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1243                        'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1244            filemode = modeDict[mode]
1245            while True:
1246                try:
1247                    self.fp = io.open(file, filemode)
1248                except OSError:
1249                    if filemode in modeDict:
1250                        filemode = modeDict[filemode]
1251                        continue
1252                    raise
1253                break
1254        else:
1255            self._filePassed = 1
1256            self.fp = file
1257            self.filename = getattr(file, 'name', None)
1258        self._fileRefCnt = 1
1259        self._lock = threading.RLock()
1260        self._seekable = True
1261        self._writing = False
1262
1263        try:
1264            if mode == 'r':
1265                self._RealGetContents()
1266            elif mode in ('w', 'x'):
1267                # set the modified flag so central directory gets written
1268                # even if no files are added to the archive
1269                self._didModify = True
1270                try:
1271                    self.start_dir = self.fp.tell()
1272                except (AttributeError, OSError):
1273                    self.fp = _Tellable(self.fp)
1274                    self.start_dir = 0
1275                    self._seekable = False
1276                else:
1277                    # Some file-like objects can provide tell() but not seek()
1278                    try:
1279                        self.fp.seek(self.start_dir)
1280                    except (AttributeError, OSError):
1281                        self._seekable = False
1282            elif mode == 'a':
1283                try:
1284                    # See if file is a zip file
1285                    self._RealGetContents()
1286                    # seek to start of directory and overwrite
1287                    self.fp.seek(self.start_dir)
1288                except BadZipFile:
1289                    # file is not a zip file, just append
1290                    self.fp.seek(0, 2)
1291
1292                    # set the modified flag so central directory gets written
1293                    # even if no files are added to the archive
1294                    self._didModify = True
1295                    self.start_dir = self.fp.tell()
1296            else:
1297                raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1298        except:
1299            fp = self.fp
1300            self.fp = None
1301            self._fpclose(fp)
1302            raise
1303
1304    def __enter__(self):
1305        return self
1306
1307    def __exit__(self, type, value, traceback):
1308        self.close()
1309
1310    def __repr__(self):
1311        result = ['<%s.%s' % (self.__class__.__module__,
1312                              self.__class__.__qualname__)]
1313        if self.fp is not None:
1314            if self._filePassed:
1315                result.append(' file=%r' % self.fp)
1316            elif self.filename is not None:
1317                result.append(' filename=%r' % self.filename)
1318            result.append(' mode=%r' % self.mode)
1319        else:
1320            result.append(' [closed]')
1321        result.append('>')
1322        return ''.join(result)
1323
1324    def _RealGetContents(self):
1325        """Read in the table of contents for the ZIP file."""
1326        fp = self.fp
1327        try:
1328            endrec = _EndRecData(fp)
1329        except OSError:
1330            raise BadZipFile("File is not a zip file")
1331        if not endrec:
1332            raise BadZipFile("File is not a zip file")
1333        if self.debug > 1:
1334            print(endrec)
1335        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
1336        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
1337        self._comment = endrec[_ECD_COMMENT]    # archive comment
1338
1339        # "concat" is zero, unless zip was concatenated to another file
1340        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1341        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1342            # If Zip64 extension structures are present, account for them
1343            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1344
1345        if self.debug > 2:
1346            inferred = concat + offset_cd
1347            print("given, inferred, offset", offset_cd, inferred, concat)
1348        # self.start_dir:  Position of start of central directory
1349        self.start_dir = offset_cd + concat
1350        fp.seek(self.start_dir, 0)
1351        data = fp.read(size_cd)
1352        fp = io.BytesIO(data)
1353        total = 0
1354        while total < size_cd:
1355            centdir = fp.read(sizeCentralDir)
1356            if len(centdir) != sizeCentralDir:
1357                raise BadZipFile("Truncated central directory")
1358            centdir = struct.unpack(structCentralDir, centdir)
1359            if centdir[_CD_SIGNATURE] != stringCentralDir:
1360                raise BadZipFile("Bad magic number for central directory")
1361            if self.debug > 2:
1362                print(centdir)
1363            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1364            flags = centdir[5]
1365            if flags & 0x800:
1366                # UTF-8 file names extension
1367                filename = filename.decode('utf-8')
1368            else:
1369                # Historical ZIP filename encoding
1370                filename = filename.decode('cp437')
1371            # Create ZipInfo instance to store file information
1372            x = ZipInfo(filename)
1373            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1374            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1375            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1376            (x.create_version, x.create_system, x.extract_version, x.reserved,
1377             x.flag_bits, x.compress_type, t, d,
1378             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1379            if x.extract_version > MAX_EXTRACT_VERSION:
1380                raise NotImplementedError("zip file version %.1f" %
1381                                          (x.extract_version / 10))
1382            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1383            # Convert date/time code to (year, month, day, hour, min, sec)
1384            x._raw_time = t
1385            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1386                            t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1387
1388            x._decodeExtra()
1389            x.header_offset = x.header_offset + concat
1390            self.filelist.append(x)
1391            self.NameToInfo[x.filename] = x
1392
1393            # update total bytes read from central directory
1394            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1395                     + centdir[_CD_EXTRA_FIELD_LENGTH]
1396                     + centdir[_CD_COMMENT_LENGTH])
1397
1398            if self.debug > 2:
1399                print("total", total)
1400
1401
1402    def namelist(self):
1403        """Return a list of file names in the archive."""
1404        return [data.filename for data in self.filelist]
1405
1406    def infolist(self):
1407        """Return a list of class ZipInfo instances for files in the
1408        archive."""
1409        return self.filelist
1410
1411    def printdir(self, file=None):
1412        """Print a table of contents for the zip file."""
1413        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
1414              file=file)
1415        for zinfo in self.filelist:
1416            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1417            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1418                  file=file)
1419
1420    def testzip(self):
1421        """Read all the files and check the CRC."""
1422        chunk_size = 2 ** 20
1423        for zinfo in self.filelist:
1424            try:
1425                # Read by chunks, to avoid an OverflowError or a
1426                # MemoryError with very large embedded files.
1427                with self.open(zinfo.filename, "r") as f:
1428                    while f.read(chunk_size):     # Check CRC-32
1429                        pass
1430            except BadZipFile:
1431                return zinfo.filename
1432
1433    def getinfo(self, name):
1434        """Return the instance of ZipInfo given 'name'."""
1435        info = self.NameToInfo.get(name)
1436        if info is None:
1437            raise KeyError(
1438                'There is no item named %r in the archive' % name)
1439
1440        return info
1441
1442    def setpassword(self, pwd):
1443        """Set default password for encrypted files."""
1444        if pwd and not isinstance(pwd, bytes):
1445            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1446        if pwd:
1447            self.pwd = pwd
1448        else:
1449            self.pwd = None
1450
1451    @property
1452    def comment(self):
1453        """The comment text associated with the ZIP file."""
1454        return self._comment
1455
1456    @comment.setter
1457    def comment(self, comment):
1458        if not isinstance(comment, bytes):
1459            raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1460        # check for valid comment length
1461        if len(comment) > ZIP_MAX_COMMENT:
1462            import warnings
1463            warnings.warn('Archive comment is too long; truncating to %d bytes'
1464                          % ZIP_MAX_COMMENT, stacklevel=2)
1465            comment = comment[:ZIP_MAX_COMMENT]
1466        self._comment = comment
1467        self._didModify = True
1468
1469    def read(self, name, pwd=None):
1470        """Return file bytes for name."""
1471        with self.open(name, "r", pwd) as fp:
1472            return fp.read()
1473
1474    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1475        """Return file-like object for 'name'.
1476
1477        name is a string for the file name within the ZIP file, or a ZipInfo
1478        object.
1479
1480        mode should be 'r' to read a file already in the ZIP file, or 'w' to
1481        write to a file newly added to the archive.
1482
1483        pwd is the password to decrypt files (only used for reading).
1484
1485        When writing, if the file size is not known in advance but may exceed
1486        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1487        files.  If the size is known in advance, it is best to pass a ZipInfo
1488        instance for name, with zinfo.file_size set.
1489        """
1490        if mode not in {"r", "w"}:
1491            raise ValueError('open() requires mode "r" or "w"')
1492        if pwd and not isinstance(pwd, bytes):
1493            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1494        if pwd and (mode == "w"):
1495            raise ValueError("pwd is only supported for reading files")
1496        if not self.fp:
1497            raise ValueError(
1498                "Attempt to use ZIP archive that was already closed")
1499
1500        # Make sure we have an info object
1501        if isinstance(name, ZipInfo):
1502            # 'name' is already an info object
1503            zinfo = name
1504        elif mode == 'w':
1505            zinfo = ZipInfo(name)
1506            zinfo.compress_type = self.compression
1507            zinfo._compresslevel = self.compresslevel
1508        else:
1509            # Get info object for name
1510            zinfo = self.getinfo(name)
1511
1512        if mode == 'w':
1513            return self._open_to_write(zinfo, force_zip64=force_zip64)
1514
1515        if self._writing:
1516            raise ValueError("Can't read from the ZIP file while there "
1517                    "is an open writing handle on it. "
1518                    "Close the writing handle before trying to read.")
1519
1520        # Open for reading:
1521        self._fileRefCnt += 1
1522        zef_file = _SharedFile(self.fp, zinfo.header_offset,
1523                               self._fpclose, self._lock, lambda: self._writing)
1524        try:
1525            # Skip the file header:
1526            fheader = zef_file.read(sizeFileHeader)
1527            if len(fheader) != sizeFileHeader:
1528                raise BadZipFile("Truncated file header")
1529            fheader = struct.unpack(structFileHeader, fheader)
1530            if fheader[_FH_SIGNATURE] != stringFileHeader:
1531                raise BadZipFile("Bad magic number for file header")
1532
1533            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1534            if fheader[_FH_EXTRA_FIELD_LENGTH]:
1535                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1536
1537            if zinfo.flag_bits & 0x20:
1538                # Zip 2.7: compressed patched data
1539                raise NotImplementedError("compressed patched data (flag bit 5)")
1540
1541            if zinfo.flag_bits & 0x40:
1542                # strong encryption
1543                raise NotImplementedError("strong encryption (flag bit 6)")
1544
1545            if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800:
1546                # UTF-8 filename
1547                fname_str = fname.decode("utf-8")
1548            else:
1549                fname_str = fname.decode("cp437")
1550
1551            if fname_str != zinfo.orig_filename:
1552                raise BadZipFile(
1553                    'File name in directory %r and header %r differ.'
1554                    % (zinfo.orig_filename, fname))
1555
1556            # check for encrypted flag & handle password
1557            is_encrypted = zinfo.flag_bits & 0x1
1558            if is_encrypted:
1559                if not pwd:
1560                    pwd = self.pwd
1561                if not pwd:
1562                    raise RuntimeError("File %r is encrypted, password "
1563                                       "required for extraction" % name)
1564            else:
1565                pwd = None
1566
1567            return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1568        except:
1569            zef_file.close()
1570            raise
1571
1572    def _open_to_write(self, zinfo, force_zip64=False):
1573        if force_zip64 and not self._allowZip64:
1574            raise ValueError(
1575                "force_zip64 is True, but allowZip64 was False when opening "
1576                "the ZIP file."
1577            )
1578        if self._writing:
1579            raise ValueError("Can't write to the ZIP file while there is "
1580                             "another write handle open on it. "
1581                             "Close the first handle before opening another.")
1582
1583        # Size and CRC are overwritten with correct data after processing the file
1584        zinfo.compress_size = 0
1585        zinfo.CRC = 0
1586
1587        zinfo.flag_bits = 0x00
1588        if zinfo.compress_type == ZIP_LZMA:
1589            # Compressed data includes an end-of-stream (EOS) marker
1590            zinfo.flag_bits |= 0x02
1591        if not self._seekable:
1592            zinfo.flag_bits |= 0x08
1593
1594        if not zinfo.external_attr:
1595            zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
1596
1597        # Compressed size can be larger than uncompressed size
1598        zip64 = self._allowZip64 and \
1599                (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1600
1601        if self._seekable:
1602            self.fp.seek(self.start_dir)
1603        zinfo.header_offset = self.fp.tell()
1604
1605        self._writecheck(zinfo)
1606        self._didModify = True
1607
1608        self.fp.write(zinfo.FileHeader(zip64))
1609
1610        self._writing = True
1611        return _ZipWriteFile(self, zinfo, zip64)
1612
1613    def extract(self, member, path=None, pwd=None):
1614        """Extract a member from the archive to the current working directory,
1615           using its full name. Its file information is extracted as accurately
1616           as possible. `member' may be a filename or a ZipInfo object. You can
1617           specify a different directory using `path'.
1618        """
1619        if path is None:
1620            path = os.getcwd()
1621        else:
1622            path = os.fspath(path)
1623
1624        return self._extract_member(member, path, pwd)
1625
1626    def extractall(self, path=None, members=None, pwd=None):
1627        """Extract all members from the archive to the current working
1628           directory. `path' specifies a different directory to extract to.
1629           `members' is optional and must be a subset of the list returned
1630           by namelist().
1631        """
1632        if members is None:
1633            members = self.namelist()
1634
1635        if path is None:
1636            path = os.getcwd()
1637        else:
1638            path = os.fspath(path)
1639
1640        for zipinfo in members:
1641            self._extract_member(zipinfo, path, pwd)
1642
1643    @classmethod
1644    def _sanitize_windows_name(cls, arcname, pathsep):
1645        """Replace bad characters and remove trailing dots from parts."""
1646        table = cls._windows_illegal_name_trans_table
1647        if not table:
1648            illegal = ':<>|"?*'
1649            table = str.maketrans(illegal, '_' * len(illegal))
1650            cls._windows_illegal_name_trans_table = table
1651        arcname = arcname.translate(table)
1652        # remove trailing dots
1653        arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1654        # rejoin, removing empty parts.
1655        arcname = pathsep.join(x for x in arcname if x)
1656        return arcname
1657
1658    def _extract_member(self, member, targetpath, pwd):
1659        """Extract the ZipInfo object 'member' to a physical
1660           file on the path targetpath.
1661        """
1662        if not isinstance(member, ZipInfo):
1663            member = self.getinfo(member)
1664
1665        # build the destination pathname, replacing
1666        # forward slashes to platform specific separators.
1667        arcname = member.filename.replace('/', os.path.sep)
1668
1669        if os.path.altsep:
1670            arcname = arcname.replace(os.path.altsep, os.path.sep)
1671        # interpret absolute pathname as relative, remove drive letter or
1672        # UNC path, redundant separators, "." and ".." components.
1673        arcname = os.path.splitdrive(arcname)[1]
1674        invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1675        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1676                                   if x not in invalid_path_parts)
1677        if os.path.sep == '\\':
1678            # filter illegal characters on Windows
1679            arcname = self._sanitize_windows_name(arcname, os.path.sep)
1680
1681        targetpath = os.path.join(targetpath, arcname)
1682        targetpath = os.path.normpath(targetpath)
1683
1684        # Create all upper directories if necessary.
1685        upperdirs = os.path.dirname(targetpath)
1686        if upperdirs and not os.path.exists(upperdirs):
1687            os.makedirs(upperdirs)
1688
1689        if member.is_dir():
1690            if not os.path.isdir(targetpath):
1691                os.mkdir(targetpath)
1692            return targetpath
1693
1694        with self.open(member, pwd=pwd) as source, \
1695             open(targetpath, "wb") as target:
1696            shutil.copyfileobj(source, target)
1697
1698        return targetpath
1699
1700    def _writecheck(self, zinfo):
1701        """Check for errors before writing a file to the archive."""
1702        if zinfo.filename in self.NameToInfo:
1703            import warnings
1704            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1705        if self.mode not in ('w', 'x', 'a'):
1706            raise ValueError("write() requires mode 'w', 'x', or 'a'")
1707        if not self.fp:
1708            raise ValueError(
1709                "Attempt to write ZIP archive that was already closed")
1710        _check_compression(zinfo.compress_type)
1711        if not self._allowZip64:
1712            requires_zip64 = None
1713            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1714                requires_zip64 = "Files count"
1715            elif zinfo.file_size > ZIP64_LIMIT:
1716                requires_zip64 = "Filesize"
1717            elif zinfo.header_offset > ZIP64_LIMIT:
1718                requires_zip64 = "Zipfile size"
1719            if requires_zip64:
1720                raise LargeZipFile(requires_zip64 +
1721                                   " would require ZIP64 extensions")
1722
1723    def write(self, filename, arcname=None,
1724              compress_type=None, compresslevel=None):
1725        """Put the bytes from filename into the archive under the name
1726        arcname."""
1727        if not self.fp:
1728            raise ValueError(
1729                "Attempt to write to ZIP archive that was already closed")
1730        if self._writing:
1731            raise ValueError(
1732                "Can't write to ZIP archive while an open writing handle exists"
1733            )
1734
1735        zinfo = ZipInfo.from_file(filename, arcname,
1736                                  strict_timestamps=self._strict_timestamps)
1737
1738        if zinfo.is_dir():
1739            zinfo.compress_size = 0
1740            zinfo.CRC = 0
1741        else:
1742            if compress_type is not None:
1743                zinfo.compress_type = compress_type
1744            else:
1745                zinfo.compress_type = self.compression
1746
1747            if compresslevel is not None:
1748                zinfo._compresslevel = compresslevel
1749            else:
1750                zinfo._compresslevel = self.compresslevel
1751
1752        if zinfo.is_dir():
1753            with self._lock:
1754                if self._seekable:
1755                    self.fp.seek(self.start_dir)
1756                zinfo.header_offset = self.fp.tell()  # Start of header bytes
1757                if zinfo.compress_type == ZIP_LZMA:
1758                # Compressed data includes an end-of-stream (EOS) marker
1759                    zinfo.flag_bits |= 0x02
1760
1761                self._writecheck(zinfo)
1762                self._didModify = True
1763
1764                self.filelist.append(zinfo)
1765                self.NameToInfo[zinfo.filename] = zinfo
1766                self.fp.write(zinfo.FileHeader(False))
1767                self.start_dir = self.fp.tell()
1768        else:
1769            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1770                shutil.copyfileobj(src, dest, 1024*8)
1771
1772    def writestr(self, zinfo_or_arcname, data,
1773                 compress_type=None, compresslevel=None):
1774        """Write a file into the archive.  The contents is 'data', which
1775        may be either a 'str' or a 'bytes' instance; if it is a 'str',
1776        it is encoded as UTF-8 first.
1777        'zinfo_or_arcname' is either a ZipInfo instance or
1778        the name of the file in the archive."""
1779        if isinstance(data, str):
1780            data = data.encode("utf-8")
1781        if not isinstance(zinfo_or_arcname, ZipInfo):
1782            zinfo = ZipInfo(filename=zinfo_or_arcname,
1783                            date_time=time.localtime(time.time())[:6])
1784            zinfo.compress_type = self.compression
1785            zinfo._compresslevel = self.compresslevel
1786            if zinfo.filename[-1] == '/':
1787                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1788                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1789            else:
1790                zinfo.external_attr = 0o600 << 16     # ?rw-------
1791        else:
1792            zinfo = zinfo_or_arcname
1793
1794        if not self.fp:
1795            raise ValueError(
1796                "Attempt to write to ZIP archive that was already closed")
1797        if self._writing:
1798            raise ValueError(
1799                "Can't write to ZIP archive while an open writing handle exists."
1800            )
1801
1802        if compress_type is not None:
1803            zinfo.compress_type = compress_type
1804
1805        if compresslevel is not None:
1806            zinfo._compresslevel = compresslevel
1807
1808        zinfo.file_size = len(data)            # Uncompressed size
1809        with self._lock:
1810            with self.open(zinfo, mode='w') as dest:
1811                dest.write(data)
1812
1813    def __del__(self):
1814        """Call the "close()" method in case the user forgot."""
1815        self.close()
1816
1817    def close(self):
1818        """Close the file, and for mode 'w', 'x' and 'a' write the ending
1819        records."""
1820        if self.fp is None:
1821            return
1822
1823        if self._writing:
1824            raise ValueError("Can't close the ZIP file while there is "
1825                             "an open writing handle on it. "
1826                             "Close the writing handle before closing the zip.")
1827
1828        try:
1829            if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1830                with self._lock:
1831                    if self._seekable:
1832                        self.fp.seek(self.start_dir)
1833                    self._write_end_record()
1834        finally:
1835            fp = self.fp
1836            self.fp = None
1837            self._fpclose(fp)
1838
1839    def _write_end_record(self):
1840        for zinfo in self.filelist:         # write central directory
1841            dt = zinfo.date_time
1842            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1843            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1844            extra = []
1845            if zinfo.file_size > ZIP64_LIMIT \
1846               or zinfo.compress_size > ZIP64_LIMIT:
1847                extra.append(zinfo.file_size)
1848                extra.append(zinfo.compress_size)
1849                file_size = 0xffffffff
1850                compress_size = 0xffffffff
1851            else:
1852                file_size = zinfo.file_size
1853                compress_size = zinfo.compress_size
1854
1855            if zinfo.header_offset > ZIP64_LIMIT:
1856                extra.append(zinfo.header_offset)
1857                header_offset = 0xffffffff
1858            else:
1859                header_offset = zinfo.header_offset
1860
1861            extra_data = zinfo.extra
1862            min_version = 0
1863            if extra:
1864                # Append a ZIP64 field to the extra's
1865                extra_data = _strip_extra(extra_data, (1,))
1866                extra_data = struct.pack(
1867                    '<HH' + 'Q'*len(extra),
1868                    1, 8*len(extra), *extra) + extra_data
1869
1870                min_version = ZIP64_VERSION
1871
1872            if zinfo.compress_type == ZIP_BZIP2:
1873                min_version = max(BZIP2_VERSION, min_version)
1874            elif zinfo.compress_type == ZIP_LZMA:
1875                min_version = max(LZMA_VERSION, min_version)
1876
1877            extract_version = max(min_version, zinfo.extract_version)
1878            create_version = max(min_version, zinfo.create_version)
1879            filename, flag_bits = zinfo._encodeFilenameFlags()
1880            centdir = struct.pack(structCentralDir,
1881                                  stringCentralDir, create_version,
1882                                  zinfo.create_system, extract_version, zinfo.reserved,
1883                                  flag_bits, zinfo.compress_type, dostime, dosdate,
1884                                  zinfo.CRC, compress_size, file_size,
1885                                  len(filename), len(extra_data), len(zinfo.comment),
1886                                  0, zinfo.internal_attr, zinfo.external_attr,
1887                                  header_offset)
1888            self.fp.write(centdir)
1889            self.fp.write(filename)
1890            self.fp.write(extra_data)
1891            self.fp.write(zinfo.comment)
1892
1893        pos2 = self.fp.tell()
1894        # Write end-of-zip-archive record
1895        centDirCount = len(self.filelist)
1896        centDirSize = pos2 - self.start_dir
1897        centDirOffset = self.start_dir
1898        requires_zip64 = None
1899        if centDirCount > ZIP_FILECOUNT_LIMIT:
1900            requires_zip64 = "Files count"
1901        elif centDirOffset > ZIP64_LIMIT:
1902            requires_zip64 = "Central directory offset"
1903        elif centDirSize > ZIP64_LIMIT:
1904            requires_zip64 = "Central directory size"
1905        if requires_zip64:
1906            # Need to write the ZIP64 end-of-archive records
1907            if not self._allowZip64:
1908                raise LargeZipFile(requires_zip64 +
1909                                   " would require ZIP64 extensions")
1910            zip64endrec = struct.pack(
1911                structEndArchive64, stringEndArchive64,
1912                44, 45, 45, 0, 0, centDirCount, centDirCount,
1913                centDirSize, centDirOffset)
1914            self.fp.write(zip64endrec)
1915
1916            zip64locrec = struct.pack(
1917                structEndArchive64Locator,
1918                stringEndArchive64Locator, 0, pos2, 1)
1919            self.fp.write(zip64locrec)
1920            centDirCount = min(centDirCount, 0xFFFF)
1921            centDirSize = min(centDirSize, 0xFFFFFFFF)
1922            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1923
1924        endrec = struct.pack(structEndArchive, stringEndArchive,
1925                             0, 0, centDirCount, centDirCount,
1926                             centDirSize, centDirOffset, len(self._comment))
1927        self.fp.write(endrec)
1928        self.fp.write(self._comment)
1929        if self.mode == "a":
1930            self.fp.truncate()
1931        self.fp.flush()
1932
1933    def _fpclose(self, fp):
1934        assert self._fileRefCnt > 0
1935        self._fileRefCnt -= 1
1936        if not self._fileRefCnt and not self._filePassed:
1937            fp.close()
1938
1939
1940class PyZipFile(ZipFile):
1941    """Class to create ZIP archives with Python library files and packages."""
1942
1943    def __init__(self, file, mode="r", compression=ZIP_STORED,
1944                 allowZip64=True, optimize=-1):
1945        ZipFile.__init__(self, file, mode=mode, compression=compression,
1946                         allowZip64=allowZip64)
1947        self._optimize = optimize
1948
1949    def writepy(self, pathname, basename="", filterfunc=None):
1950        """Add all files from "pathname" to the ZIP archive.
1951
1952        If pathname is a package directory, search the directory and
1953        all package subdirectories recursively for all *.py and enter
1954        the modules into the archive.  If pathname is a plain
1955        directory, listdir *.py and enter all modules.  Else, pathname
1956        must be a Python *.py file and the module will be put into the
1957        archive.  Added modules are always module.pyc.
1958        This method will compile the module.py into module.pyc if
1959        necessary.
1960        If filterfunc(pathname) is given, it is called with every argument.
1961        When it is False, the file or directory is skipped.
1962        """
1963        pathname = os.fspath(pathname)
1964        if filterfunc and not filterfunc(pathname):
1965            if self.debug:
1966                label = 'path' if os.path.isdir(pathname) else 'file'
1967                print('%s %r skipped by filterfunc' % (label, pathname))
1968            return
1969        dir, name = os.path.split(pathname)
1970        if os.path.isdir(pathname):
1971            initname = os.path.join(pathname, "__init__.py")
1972            if os.path.isfile(initname):
1973                # This is a package directory, add it
1974                if basename:
1975                    basename = "%s/%s" % (basename, name)
1976                else:
1977                    basename = name
1978                if self.debug:
1979                    print("Adding package in", pathname, "as", basename)
1980                fname, arcname = self._get_codename(initname[0:-3], basename)
1981                if self.debug:
1982                    print("Adding", arcname)
1983                self.write(fname, arcname)
1984                dirlist = sorted(os.listdir(pathname))
1985                dirlist.remove("__init__.py")
1986                # Add all *.py files and package subdirectories
1987                for filename in dirlist:
1988                    path = os.path.join(pathname, filename)
1989                    root, ext = os.path.splitext(filename)
1990                    if os.path.isdir(path):
1991                        if os.path.isfile(os.path.join(path, "__init__.py")):
1992                            # This is a package directory, add it
1993                            self.writepy(path, basename,
1994                                         filterfunc=filterfunc)  # Recursive call
1995                    elif ext == ".py":
1996                        if filterfunc and not filterfunc(path):
1997                            if self.debug:
1998                                print('file %r skipped by filterfunc' % path)
1999                            continue
2000                        fname, arcname = self._get_codename(path[0:-3],
2001                                                            basename)
2002                        if self.debug:
2003                            print("Adding", arcname)
2004                        self.write(fname, arcname)
2005            else:
2006                # This is NOT a package directory, add its files at top level
2007                if self.debug:
2008                    print("Adding files from directory", pathname)
2009                for filename in sorted(os.listdir(pathname)):
2010                    path = os.path.join(pathname, filename)
2011                    root, ext = os.path.splitext(filename)
2012                    if ext == ".py":
2013                        if filterfunc and not filterfunc(path):
2014                            if self.debug:
2015                                print('file %r skipped by filterfunc' % path)
2016                            continue
2017                        fname, arcname = self._get_codename(path[0:-3],
2018                                                            basename)
2019                        if self.debug:
2020                            print("Adding", arcname)
2021                        self.write(fname, arcname)
2022        else:
2023            if pathname[-3:] != ".py":
2024                raise RuntimeError(
2025                    'Files added with writepy() must end with ".py"')
2026            fname, arcname = self._get_codename(pathname[0:-3], basename)
2027            if self.debug:
2028                print("Adding file", arcname)
2029            self.write(fname, arcname)
2030
2031    def _get_codename(self, pathname, basename):
2032        """Return (filename, archivename) for the path.
2033
2034        Given a module name path, return the correct file path and
2035        archive name, compiling if necessary.  For example, given
2036        /python/lib/string, return (/python/lib/string.pyc, string).
2037        """
2038        def _compile(file, optimize=-1):
2039            import py_compile
2040            if self.debug:
2041                print("Compiling", file)
2042            try:
2043                py_compile.compile(file, doraise=True, optimize=optimize)
2044            except py_compile.PyCompileError as err:
2045                print(err.msg)
2046                return False
2047            return True
2048
2049        file_py  = pathname + ".py"
2050        file_pyc = pathname + ".pyc"
2051        pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2052        pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2053        pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2054        if self._optimize == -1:
2055            # legacy mode: use whatever file is present
2056            if (os.path.isfile(file_pyc) and
2057                  os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2058                # Use .pyc file.
2059                arcname = fname = file_pyc
2060            elif (os.path.isfile(pycache_opt0) and
2061                  os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2062                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2063                # file name in the archive.
2064                fname = pycache_opt0
2065                arcname = file_pyc
2066            elif (os.path.isfile(pycache_opt1) and
2067                  os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2068                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2069                # file name in the archive.
2070                fname = pycache_opt1
2071                arcname = file_pyc
2072            elif (os.path.isfile(pycache_opt2) and
2073                  os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2074                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2075                # file name in the archive.
2076                fname = pycache_opt2
2077                arcname = file_pyc
2078            else:
2079                # Compile py into PEP 3147 pyc file.
2080                if _compile(file_py):
2081                    if sys.flags.optimize == 0:
2082                        fname = pycache_opt0
2083                    elif sys.flags.optimize == 1:
2084                        fname = pycache_opt1
2085                    else:
2086                        fname = pycache_opt2
2087                    arcname = file_pyc
2088                else:
2089                    fname = arcname = file_py
2090        else:
2091            # new mode: use given optimization level
2092            if self._optimize == 0:
2093                fname = pycache_opt0
2094                arcname = file_pyc
2095            else:
2096                arcname = file_pyc
2097                if self._optimize == 1:
2098                    fname = pycache_opt1
2099                elif self._optimize == 2:
2100                    fname = pycache_opt2
2101                else:
2102                    msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2103                    raise ValueError(msg)
2104            if not (os.path.isfile(fname) and
2105                    os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2106                if not _compile(file_py, optimize=self._optimize):
2107                    fname = arcname = file_py
2108        archivename = os.path.split(arcname)[1]
2109        if basename:
2110            archivename = "%s/%s" % (basename, archivename)
2111        return (fname, archivename)
2112
2113
2114def _parents(path):
2115    """
2116    Given a path with elements separated by
2117    posixpath.sep, generate all parents of that path.
2118
2119    >>> list(_parents('b/d'))
2120    ['b']
2121    >>> list(_parents('/b/d/'))
2122    ['/b']
2123    >>> list(_parents('b/d/f/'))
2124    ['b/d', 'b']
2125    >>> list(_parents('b'))
2126    []
2127    >>> list(_parents(''))
2128    []
2129    """
2130    return itertools.islice(_ancestry(path), 1, None)
2131
2132
2133def _ancestry(path):
2134    """
2135    Given a path with elements separated by
2136    posixpath.sep, generate all elements of that path
2137
2138    >>> list(_ancestry('b/d'))
2139    ['b/d', 'b']
2140    >>> list(_ancestry('/b/d/'))
2141    ['/b/d', '/b']
2142    >>> list(_ancestry('b/d/f/'))
2143    ['b/d/f', 'b/d', 'b']
2144    >>> list(_ancestry('b'))
2145    ['b']
2146    >>> list(_ancestry(''))
2147    []
2148    """
2149    path = path.rstrip(posixpath.sep)
2150    while path and path != posixpath.sep:
2151        yield path
2152        path, tail = posixpath.split(path)
2153
2154
2155_dedupe = dict.fromkeys
2156"""Deduplicate an iterable in original order"""
2157
2158
2159def _difference(minuend, subtrahend):
2160    """
2161    Return items in minuend not in subtrahend, retaining order
2162    with O(1) lookup.
2163    """
2164    return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2165
2166
2167class CompleteDirs(ZipFile):
2168    """
2169    A ZipFile subclass that ensures that implied directories
2170    are always included in the namelist.
2171    """
2172
2173    @staticmethod
2174    def _implied_dirs(names):
2175        parents = itertools.chain.from_iterable(map(_parents, names))
2176        as_dirs = (p + posixpath.sep for p in parents)
2177        return _dedupe(_difference(as_dirs, names))
2178
2179    def namelist(self):
2180        names = super(CompleteDirs, self).namelist()
2181        return names + list(self._implied_dirs(names))
2182
2183    def _name_set(self):
2184        return set(self.namelist())
2185
2186    def resolve_dir(self, name):
2187        """
2188        If the name represents a directory, return that name
2189        as a directory (with the trailing slash).
2190        """
2191        names = self._name_set()
2192        dirname = name + '/'
2193        dir_match = name not in names and dirname in names
2194        return dirname if dir_match else name
2195
2196    @classmethod
2197    def make(cls, source):
2198        """
2199        Given a source (filename or zipfile), return an
2200        appropriate CompleteDirs subclass.
2201        """
2202        if isinstance(source, CompleteDirs):
2203            return source
2204
2205        if not isinstance(source, ZipFile):
2206            return cls(source)
2207
2208        # Only allow for FastLookup when supplied zipfile is read-only
2209        if 'r' not in source.mode:
2210            cls = CompleteDirs
2211
2212        source.__class__ = cls
2213        return source
2214
2215
2216class FastLookup(CompleteDirs):
2217    """
2218    ZipFile subclass to ensure implicit
2219    dirs exist and are resolved rapidly.
2220    """
2221
2222    def namelist(self):
2223        with contextlib.suppress(AttributeError):
2224            return self.__names
2225        self.__names = super(FastLookup, self).namelist()
2226        return self.__names
2227
2228    def _name_set(self):
2229        with contextlib.suppress(AttributeError):
2230            return self.__lookup
2231        self.__lookup = super(FastLookup, self)._name_set()
2232        return self.__lookup
2233
2234
2235class Path:
2236    """
2237    A pathlib-compatible interface for zip files.
2238
2239    Consider a zip file with this structure::
2240
2241        .
2242        ├── a.txt
2243        └── b
2244            ├── c.txt
2245            └── d
2246                └── e.txt
2247
2248    >>> data = io.BytesIO()
2249    >>> zf = ZipFile(data, 'w')
2250    >>> zf.writestr('a.txt', 'content of a')
2251    >>> zf.writestr('b/c.txt', 'content of c')
2252    >>> zf.writestr('b/d/e.txt', 'content of e')
2253    >>> zf.filename = 'mem/abcde.zip'
2254
2255    Path accepts the zipfile object itself or a filename
2256
2257    >>> root = Path(zf)
2258
2259    From there, several path operations are available.
2260
2261    Directory iteration (including the zip file itself):
2262
2263    >>> a, b = root.iterdir()
2264    >>> a
2265    Path('mem/abcde.zip', 'a.txt')
2266    >>> b
2267    Path('mem/abcde.zip', 'b/')
2268
2269    name property:
2270
2271    >>> b.name
2272    'b'
2273
2274    join with divide operator:
2275
2276    >>> c = b / 'c.txt'
2277    >>> c
2278    Path('mem/abcde.zip', 'b/c.txt')
2279    >>> c.name
2280    'c.txt'
2281
2282    Read text:
2283
2284    >>> c.read_text()
2285    'content of c'
2286
2287    existence:
2288
2289    >>> c.exists()
2290    True
2291    >>> (b / 'missing.txt').exists()
2292    False
2293
2294    Coercion to string:
2295
2296    >>> import os
2297    >>> str(c).replace(os.sep, posixpath.sep)
2298    'mem/abcde.zip/b/c.txt'
2299
2300    At the root, ``name``, ``filename``, and ``parent``
2301    resolve to the zipfile. Note these attributes are not
2302    valid and will raise a ``ValueError`` if the zipfile
2303    has no filename.
2304
2305    >>> root.name
2306    'abcde.zip'
2307    >>> str(root.filename).replace(os.sep, posixpath.sep)
2308    'mem/abcde.zip'
2309    >>> str(root.parent)
2310    'mem'
2311    """
2312
2313    __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2314
2315    def __init__(self, root, at=""):
2316        """
2317        Construct a Path from a ZipFile or filename.
2318
2319        Note: When the source is an existing ZipFile object,
2320        its type (__class__) will be mutated to a
2321        specialized type. If the caller wishes to retain the
2322        original type, the caller should either create a
2323        separate ZipFile object or pass a filename.
2324        """
2325        self.root = FastLookup.make(root)
2326        self.at = at
2327
2328    def open(self, mode='r', *args, pwd=None, **kwargs):
2329        """
2330        Open this entry as text or binary following the semantics
2331        of ``pathlib.Path.open()`` by passing arguments through
2332        to io.TextIOWrapper().
2333        """
2334        if self.is_dir():
2335            raise IsADirectoryError(self)
2336        zip_mode = mode[0]
2337        if not self.exists() and zip_mode == 'r':
2338            raise FileNotFoundError(self)
2339        stream = self.root.open(self.at, zip_mode, pwd=pwd)
2340        if 'b' in mode:
2341            if args or kwargs:
2342                raise ValueError("encoding args invalid for binary operation")
2343            return stream
2344        else:
2345            kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
2346        return io.TextIOWrapper(stream, *args, **kwargs)
2347
2348    @property
2349    def name(self):
2350        return pathlib.Path(self.at).name or self.filename.name
2351
2352    @property
2353    def filename(self):
2354        return pathlib.Path(self.root.filename).joinpath(self.at)
2355
2356    def read_text(self, *args, **kwargs):
2357        kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
2358        with self.open('r', *args, **kwargs) as strm:
2359            return strm.read()
2360
2361    def read_bytes(self):
2362        with self.open('rb') as strm:
2363            return strm.read()
2364
2365    def _is_child(self, path):
2366        return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2367
2368    def _next(self, at):
2369        return self.__class__(self.root, at)
2370
2371    def is_dir(self):
2372        return not self.at or self.at.endswith("/")
2373
2374    def is_file(self):
2375        return self.exists() and not self.is_dir()
2376
2377    def exists(self):
2378        return self.at in self.root._name_set()
2379
2380    def iterdir(self):
2381        if not self.is_dir():
2382            raise ValueError("Can't listdir a file")
2383        subs = map(self._next, self.root.namelist())
2384        return filter(self._is_child, subs)
2385
2386    def __str__(self):
2387        return posixpath.join(self.root.filename, self.at)
2388
2389    def __repr__(self):
2390        return self.__repr.format(self=self)
2391
2392    def joinpath(self, *other):
2393        next = posixpath.join(self.at, *other)
2394        return self._next(self.root.resolve_dir(next))
2395
2396    __truediv__ = joinpath
2397
2398    @property
2399    def parent(self):
2400        if not self.at:
2401            return self.filename.parent
2402        parent_at = posixpath.dirname(self.at.rstrip('/'))
2403        if parent_at:
2404            parent_at += '/'
2405        return self._next(parent_at)
2406
2407
2408def main(args=None):
2409    import argparse
2410
2411    description = 'A simple command-line interface for zipfile module.'
2412    parser = argparse.ArgumentParser(description=description)
2413    group = parser.add_mutually_exclusive_group(required=True)
2414    group.add_argument('-l', '--list', metavar='<zipfile>',
2415                       help='Show listing of a zipfile')
2416    group.add_argument('-e', '--extract', nargs=2,
2417                       metavar=('<zipfile>', '<output_dir>'),
2418                       help='Extract zipfile into target dir')
2419    group.add_argument('-c', '--create', nargs='+',
2420                       metavar=('<name>', '<file>'),
2421                       help='Create zipfile from sources')
2422    group.add_argument('-t', '--test', metavar='<zipfile>',
2423                       help='Test if a zipfile is valid')
2424    args = parser.parse_args(args)
2425
2426    if args.test is not None:
2427        src = args.test
2428        with ZipFile(src, 'r') as zf:
2429            badfile = zf.testzip()
2430        if badfile:
2431            print("The following enclosed file is corrupted: {!r}".format(badfile))
2432        print("Done testing")
2433
2434    elif args.list is not None:
2435        src = args.list
2436        with ZipFile(src, 'r') as zf:
2437            zf.printdir()
2438
2439    elif args.extract is not None:
2440        src, curdir = args.extract
2441        with ZipFile(src, 'r') as zf:
2442            zf.extractall(curdir)
2443
2444    elif args.create is not None:
2445        zip_name = args.create.pop(0)
2446        files = args.create
2447
2448        def addToZip(zf, path, zippath):
2449            if os.path.isfile(path):
2450                zf.write(path, zippath, ZIP_DEFLATED)
2451            elif os.path.isdir(path):
2452                if zippath:
2453                    zf.write(path, zippath)
2454                for nm in sorted(os.listdir(path)):
2455                    addToZip(zf,
2456                             os.path.join(path, nm), os.path.join(zippath, nm))
2457            # else: ignore
2458
2459        with ZipFile(zip_name, 'w') as zf:
2460            for path in files:
2461                zippath = os.path.basename(path)
2462                if not zippath:
2463                    zippath = os.path.basename(os.path.dirname(path))
2464                if zippath in ('', os.curdir, os.pardir):
2465                    zippath = ''
2466                addToZip(zf, path, zippath)
2467
2468
2469if __name__ == "__main__":
2470    main()
2471