• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2Read and write ZIP files.
3
4XXX references to utf-8 need further investigation.
5"""
6import binascii
7import importlib.util
8import io
9import itertools
10import os
11import posixpath
12import shutil
13import stat
14import struct
15import sys
16import threading
17import time
18import contextlib
19import pathlib
20
21try:
22    import zlib # We may need its compression method
23    crc32 = zlib.crc32
24except ImportError:
25    zlib = None
26    crc32 = binascii.crc32
27
28try:
29    import bz2 # We may need its compression method
30except ImportError:
31    bz2 = None
32
33try:
34    import lzma # We may need its compression method
35except ImportError:
36    lzma = None
37
38__all__ = ["BadZipFile", "BadZipfile", "error",
39           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
40           "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
41           "Path"]
42
43class BadZipFile(Exception):
44    pass
45
46
47class LargeZipFile(Exception):
48    """
49    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50    and those extensions are disabled.
51    """
52
53error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
54
55
56ZIP64_LIMIT = (1 << 31) - 1
57ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
58ZIP_MAX_COMMENT = (1 << 16) - 1
59
60# constants for Zip file compression methods
61ZIP_STORED = 0
62ZIP_DEFLATED = 8
63ZIP_BZIP2 = 12
64ZIP_LZMA = 14
65# Other ZIP compression methods not supported
66
67DEFAULT_VERSION = 20
68ZIP64_VERSION = 45
69BZIP2_VERSION = 46
70LZMA_VERSION = 63
71# we recognize (but not necessarily support) all features up to that version
72MAX_EXTRACT_VERSION = 63
73
74# Below are some formats and associated data for reading/writing headers using
75# the struct module.  The names and structures of headers/records are those used
76# in the PKWARE description of the ZIP file format:
77#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78# (URL valid as of January 2008)
79
80# The "end of central directory" structure, magic number, size, and indices
81# (section V.I in the format document)
82structEndArchive = b"<4s4H2LH"
83stringEndArchive = b"PK\005\006"
84sizeEndCentDir = struct.calcsize(structEndArchive)
85
86_ECD_SIGNATURE = 0
87_ECD_DISK_NUMBER = 1
88_ECD_DISK_START = 2
89_ECD_ENTRIES_THIS_DISK = 3
90_ECD_ENTRIES_TOTAL = 4
91_ECD_SIZE = 5
92_ECD_OFFSET = 6
93_ECD_COMMENT_SIZE = 7
94# These last two indices are not part of the structure as defined in the
95# spec, but they are used internally by this module as a convenience
96_ECD_COMMENT = 8
97_ECD_LOCATION = 9
98
99# The "central directory" structure, magic number, size, and indices
100# of entries in the structure (section V.F in the format document)
101structCentralDir = "<4s4B4HL2L5H2L"
102stringCentralDir = b"PK\001\002"
103sizeCentralDir = struct.calcsize(structCentralDir)
104
105# indexes of entries in the central directory structure
106_CD_SIGNATURE = 0
107_CD_CREATE_VERSION = 1
108_CD_CREATE_SYSTEM = 2
109_CD_EXTRACT_VERSION = 3
110_CD_EXTRACT_SYSTEM = 4
111_CD_FLAG_BITS = 5
112_CD_COMPRESS_TYPE = 6
113_CD_TIME = 7
114_CD_DATE = 8
115_CD_CRC = 9
116_CD_COMPRESSED_SIZE = 10
117_CD_UNCOMPRESSED_SIZE = 11
118_CD_FILENAME_LENGTH = 12
119_CD_EXTRA_FIELD_LENGTH = 13
120_CD_COMMENT_LENGTH = 14
121_CD_DISK_NUMBER_START = 15
122_CD_INTERNAL_FILE_ATTRIBUTES = 16
123_CD_EXTERNAL_FILE_ATTRIBUTES = 17
124_CD_LOCAL_HEADER_OFFSET = 18
125
126# The "local file header" structure, magic number, size, and indices
127# (section V.A in the format document)
128structFileHeader = "<4s2B4HL2L2H"
129stringFileHeader = b"PK\003\004"
130sizeFileHeader = struct.calcsize(structFileHeader)
131
132_FH_SIGNATURE = 0
133_FH_EXTRACT_VERSION = 1
134_FH_EXTRACT_SYSTEM = 2
135_FH_GENERAL_PURPOSE_FLAG_BITS = 3
136_FH_COMPRESSION_METHOD = 4
137_FH_LAST_MOD_TIME = 5
138_FH_LAST_MOD_DATE = 6
139_FH_CRC = 7
140_FH_COMPRESSED_SIZE = 8
141_FH_UNCOMPRESSED_SIZE = 9
142_FH_FILENAME_LENGTH = 10
143_FH_EXTRA_FIELD_LENGTH = 11
144
145# The "Zip64 end of central directory locator" structure, magic number, and size
146structEndArchive64Locator = "<4sLQL"
147stringEndArchive64Locator = b"PK\x06\x07"
148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
149
150# The "Zip64 end of central directory" record, magic number, size, and indices
151# (section V.G in the format document)
152structEndArchive64 = "<4sQ2H2L4Q"
153stringEndArchive64 = b"PK\x06\x06"
154sizeEndCentDir64 = struct.calcsize(structEndArchive64)
155
156_CD64_SIGNATURE = 0
157_CD64_DIRECTORY_RECSIZE = 1
158_CD64_CREATE_VERSION = 2
159_CD64_EXTRACT_VERSION = 3
160_CD64_DISK_NUMBER = 4
161_CD64_DISK_NUMBER_START = 5
162_CD64_NUMBER_ENTRIES_THIS_DISK = 6
163_CD64_NUMBER_ENTRIES_TOTAL = 7
164_CD64_DIRECTORY_SIZE = 8
165_CD64_OFFSET_START_CENTDIR = 9
166
167_DD_SIGNATURE = 0x08074b50
168
169_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
170
171def _strip_extra(extra, xids):
172    # Remove Extra Fields with specified IDs.
173    unpack = _EXTRA_FIELD_STRUCT.unpack
174    modified = False
175    buffer = []
176    start = i = 0
177    while i + 4 <= len(extra):
178        xid, xlen = unpack(extra[i : i + 4])
179        j = i + 4 + xlen
180        if xid in xids:
181            if i != start:
182                buffer.append(extra[start : i])
183            start = j
184            modified = True
185        i = j
186    if not modified:
187        return extra
188    return b''.join(buffer)
189
190def _check_zipfile(fp):
191    try:
192        if _EndRecData(fp):
193            return True         # file has correct magic number
194    except OSError:
195        pass
196    return False
197
198def is_zipfile(filename):
199    """Quickly see if a file is a ZIP file by checking the magic number.
200
201    The filename argument may be a file or file-like object too.
202    """
203    result = False
204    try:
205        if hasattr(filename, "read"):
206            result = _check_zipfile(fp=filename)
207        else:
208            with open(filename, "rb") as fp:
209                result = _check_zipfile(fp)
210    except OSError:
211        pass
212    return result
213
214def _EndRecData64(fpin, offset, endrec):
215    """
216    Read the ZIP64 end-of-archive records and use that to update endrec
217    """
218    try:
219        fpin.seek(offset - sizeEndCentDir64Locator, 2)
220    except OSError:
221        # If the seek fails, the file is not large enough to contain a ZIP64
222        # end-of-archive record, so just return the end record we were given.
223        return endrec
224
225    data = fpin.read(sizeEndCentDir64Locator)
226    if len(data) != sizeEndCentDir64Locator:
227        return endrec
228    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
229    if sig != stringEndArchive64Locator:
230        return endrec
231
232    if diskno != 0 or disks > 1:
233        raise BadZipFile("zipfiles that span multiple disks are not supported")
234
235    # Assume no 'zip64 extensible data'
236    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
237    data = fpin.read(sizeEndCentDir64)
238    if len(data) != sizeEndCentDir64:
239        return endrec
240    sig, sz, create_version, read_version, disk_num, disk_dir, \
241        dircount, dircount2, dirsize, diroffset = \
242        struct.unpack(structEndArchive64, data)
243    if sig != stringEndArchive64:
244        return endrec
245
246    # Update the original endrec using data from the ZIP64 record
247    endrec[_ECD_SIGNATURE] = sig
248    endrec[_ECD_DISK_NUMBER] = disk_num
249    endrec[_ECD_DISK_START] = disk_dir
250    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
251    endrec[_ECD_ENTRIES_TOTAL] = dircount2
252    endrec[_ECD_SIZE] = dirsize
253    endrec[_ECD_OFFSET] = diroffset
254    return endrec
255
256
257def _EndRecData(fpin):
258    """Return data from the "End of Central Directory" record, or None.
259
260    The data is a list of the nine items in the ZIP "End of central dir"
261    record followed by a tenth item, the file seek offset of this record."""
262
263    # Determine file size
264    fpin.seek(0, 2)
265    filesize = fpin.tell()
266
267    # Check to see if this is ZIP file with no archive comment (the
268    # "end of central directory" structure should be the last item in the
269    # file if this is the case).
270    try:
271        fpin.seek(-sizeEndCentDir, 2)
272    except OSError:
273        return None
274    data = fpin.read()
275    if (len(data) == sizeEndCentDir and
276        data[0:4] == stringEndArchive and
277        data[-2:] == b"\000\000"):
278        # the signature is correct and there's no comment, unpack structure
279        endrec = struct.unpack(structEndArchive, data)
280        endrec=list(endrec)
281
282        # Append a blank comment and record start offset
283        endrec.append(b"")
284        endrec.append(filesize - sizeEndCentDir)
285
286        # Try to read the "Zip64 end of central directory" structure
287        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
288
289    # Either this is not a ZIP file, or it is a ZIP file with an archive
290    # comment.  Search the end of the file for the "end of central directory"
291    # record signature. The comment is the last item in the ZIP file and may be
292    # up to 64K long.  It is assumed that the "end of central directory" magic
293    # number does not appear in the comment.
294    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
295    fpin.seek(maxCommentStart, 0)
296    data = fpin.read()
297    start = data.rfind(stringEndArchive)
298    if start >= 0:
299        # found the magic number; attempt to unpack and interpret
300        recData = data[start:start+sizeEndCentDir]
301        if len(recData) != sizeEndCentDir:
302            # Zip file is corrupted.
303            return None
304        endrec = list(struct.unpack(structEndArchive, recData))
305        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
306        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
307        endrec.append(comment)
308        endrec.append(maxCommentStart + start)
309
310        # Try to read the "Zip64 end of central directory" structure
311        return _EndRecData64(fpin, maxCommentStart + start - filesize,
312                             endrec)
313
314    # Unable to find a valid end of central directory structure
315    return None
316
317
318class ZipInfo (object):
319    """Class with attributes describing each file in the ZIP archive."""
320
321    __slots__ = (
322        'orig_filename',
323        'filename',
324        'date_time',
325        'compress_type',
326        '_compresslevel',
327        'comment',
328        'extra',
329        'create_system',
330        'create_version',
331        'extract_version',
332        'reserved',
333        'flag_bits',
334        'volume',
335        'internal_attr',
336        'external_attr',
337        'header_offset',
338        'CRC',
339        'compress_size',
340        'file_size',
341        '_raw_time',
342    )
343
344    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
345        self.orig_filename = filename   # Original file name in archive
346
347        # Terminate the file name at the first null byte.  Null bytes in file
348        # names are used as tricks by viruses in archives.
349        null_byte = filename.find(chr(0))
350        if null_byte >= 0:
351            filename = filename[0:null_byte]
352        # This is used to ensure paths in generated ZIP files always use
353        # forward slashes as the directory separator, as required by the
354        # ZIP format specification.
355        if os.sep != "/" and os.sep in filename:
356            filename = filename.replace(os.sep, "/")
357
358        self.filename = filename        # Normalized file name
359        self.date_time = date_time      # year, month, day, hour, min, sec
360
361        if date_time[0] < 1980:
362            raise ValueError('ZIP does not support timestamps before 1980')
363
364        # Standard values:
365        self.compress_type = ZIP_STORED # Type of compression for the file
366        self._compresslevel = None      # Level for the compressor
367        self.comment = b""              # Comment for each file
368        self.extra = b""                # ZIP extra data
369        if sys.platform == 'win32':
370            self.create_system = 0          # System which created ZIP archive
371        else:
372            # Assume everything else is unix-y
373            self.create_system = 3          # System which created ZIP archive
374        self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
375        self.extract_version = DEFAULT_VERSION # Version needed to extract archive
376        self.reserved = 0               # Must be zero
377        self.flag_bits = 0              # ZIP flag bits
378        self.volume = 0                 # Volume number of file header
379        self.internal_attr = 0          # Internal attributes
380        self.external_attr = 0          # External file attributes
381        self.compress_size = 0          # Size of the compressed file
382        self.file_size = 0              # Size of the uncompressed file
383        # Other attributes are set by class ZipFile:
384        # header_offset         Byte offset to the file header
385        # CRC                   CRC-32 of the uncompressed file
386
387    def __repr__(self):
388        result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
389        if self.compress_type != ZIP_STORED:
390            result.append(' compress_type=%s' %
391                          compressor_names.get(self.compress_type,
392                                               self.compress_type))
393        hi = self.external_attr >> 16
394        lo = self.external_attr & 0xFFFF
395        if hi:
396            result.append(' filemode=%r' % stat.filemode(hi))
397        if lo:
398            result.append(' external_attr=%#x' % lo)
399        isdir = self.is_dir()
400        if not isdir or self.file_size:
401            result.append(' file_size=%r' % self.file_size)
402        if ((not isdir or self.compress_size) and
403            (self.compress_type != ZIP_STORED or
404             self.file_size != self.compress_size)):
405            result.append(' compress_size=%r' % self.compress_size)
406        result.append('>')
407        return ''.join(result)
408
409    def FileHeader(self, zip64=None):
410        """Return the per-file header as a bytes object."""
411        dt = self.date_time
412        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
413        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
414        if self.flag_bits & 0x08:
415            # Set these to zero because we write them after the file data
416            CRC = compress_size = file_size = 0
417        else:
418            CRC = self.CRC
419            compress_size = self.compress_size
420            file_size = self.file_size
421
422        extra = self.extra
423
424        min_version = 0
425        if zip64 is None:
426            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
427        if zip64:
428            fmt = '<HHQQ'
429            extra = extra + struct.pack(fmt,
430                                        1, struct.calcsize(fmt)-4, file_size, compress_size)
431        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
432            if not zip64:
433                raise LargeZipFile("Filesize would require ZIP64 extensions")
434            # File is larger than what fits into a 4 byte integer,
435            # fall back to the ZIP64 extension
436            file_size = 0xffffffff
437            compress_size = 0xffffffff
438            min_version = ZIP64_VERSION
439
440        if self.compress_type == ZIP_BZIP2:
441            min_version = max(BZIP2_VERSION, min_version)
442        elif self.compress_type == ZIP_LZMA:
443            min_version = max(LZMA_VERSION, min_version)
444
445        self.extract_version = max(min_version, self.extract_version)
446        self.create_version = max(min_version, self.create_version)
447        filename, flag_bits = self._encodeFilenameFlags()
448        header = struct.pack(structFileHeader, stringFileHeader,
449                             self.extract_version, self.reserved, flag_bits,
450                             self.compress_type, dostime, dosdate, CRC,
451                             compress_size, file_size,
452                             len(filename), len(extra))
453        return header + filename + extra
454
455    def _encodeFilenameFlags(self):
456        try:
457            return self.filename.encode('ascii'), self.flag_bits
458        except UnicodeEncodeError:
459            return self.filename.encode('utf-8'), self.flag_bits | 0x800
460
461    def _decodeExtra(self):
462        # Try to decode the extra field.
463        extra = self.extra
464        unpack = struct.unpack
465        while len(extra) >= 4:
466            tp, ln = unpack('<HH', extra[:4])
467            if ln+4 > len(extra):
468                raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
469            if tp == 0x0001:
470                data = extra[4:ln+4]
471                # ZIP64 extension (large files and/or large archives)
472                try:
473                    if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
474                        field = "File size"
475                        self.file_size, = unpack('<Q', data[:8])
476                        data = data[8:]
477                    if self.compress_size == 0xFFFF_FFFF:
478                        field = "Compress size"
479                        self.compress_size, = unpack('<Q', data[:8])
480                        data = data[8:]
481                    if self.header_offset == 0xFFFF_FFFF:
482                        field = "Header offset"
483                        self.header_offset, = unpack('<Q', data[:8])
484                except struct.error:
485                    raise BadZipFile(f"Corrupt zip64 extra field. "
486                                     f"{field} not found.") from None
487
488            extra = extra[ln+4:]
489
490    @classmethod
491    def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
492        """Construct an appropriate ZipInfo for a file on the filesystem.
493
494        filename should be the path to a file or directory on the filesystem.
495
496        arcname is the name which it will have within the archive (by default,
497        this will be the same as filename, but without a drive letter and with
498        leading path separators removed).
499        """
500        if isinstance(filename, os.PathLike):
501            filename = os.fspath(filename)
502        st = os.stat(filename)
503        isdir = stat.S_ISDIR(st.st_mode)
504        mtime = time.localtime(st.st_mtime)
505        date_time = mtime[0:6]
506        if not strict_timestamps and date_time[0] < 1980:
507            date_time = (1980, 1, 1, 0, 0, 0)
508        elif not strict_timestamps and date_time[0] > 2107:
509            date_time = (2107, 12, 31, 23, 59, 59)
510        # Create ZipInfo instance to store file information
511        if arcname is None:
512            arcname = filename
513        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
514        while arcname[0] in (os.sep, os.altsep):
515            arcname = arcname[1:]
516        if isdir:
517            arcname += '/'
518        zinfo = cls(arcname, date_time)
519        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
520        if isdir:
521            zinfo.file_size = 0
522            zinfo.external_attr |= 0x10  # MS-DOS directory flag
523        else:
524            zinfo.file_size = st.st_size
525
526        return zinfo
527
528    def is_dir(self):
529        """Return True if this archive member is a directory."""
530        return self.filename[-1] == '/'
531
532
533# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
534# internal keys. We noticed that a direct implementation is faster than
535# relying on binascii.crc32().
536
537_crctable = None
538def _gen_crc(crc):
539    for j in range(8):
540        if crc & 1:
541            crc = (crc >> 1) ^ 0xEDB88320
542        else:
543            crc >>= 1
544    return crc
545
546# ZIP supports a password-based form of encryption. Even though known
547# plaintext attacks have been found against it, it is still useful
548# to be able to get data out of such a file.
549#
550# Usage:
551#     zd = _ZipDecrypter(mypwd)
552#     plain_bytes = zd(cypher_bytes)
553
554def _ZipDecrypter(pwd):
555    key0 = 305419896
556    key1 = 591751049
557    key2 = 878082192
558
559    global _crctable
560    if _crctable is None:
561        _crctable = list(map(_gen_crc, range(256)))
562    crctable = _crctable
563
564    def crc32(ch, crc):
565        """Compute the CRC32 primitive on one byte."""
566        return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
567
568    def update_keys(c):
569        nonlocal key0, key1, key2
570        key0 = crc32(c, key0)
571        key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
572        key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
573        key2 = crc32(key1 >> 24, key2)
574
575    for p in pwd:
576        update_keys(p)
577
578    def decrypter(data):
579        """Decrypt a bytes object."""
580        result = bytearray()
581        append = result.append
582        for c in data:
583            k = key2 | 2
584            c ^= ((k * (k^1)) >> 8) & 0xFF
585            update_keys(c)
586            append(c)
587        return bytes(result)
588
589    return decrypter
590
591
592class LZMACompressor:
593
594    def __init__(self):
595        self._comp = None
596
597    def _init(self):
598        props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
599        self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
600            lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
601        ])
602        return struct.pack('<BBH', 9, 4, len(props)) + props
603
604    def compress(self, data):
605        if self._comp is None:
606            return self._init() + self._comp.compress(data)
607        return self._comp.compress(data)
608
609    def flush(self):
610        if self._comp is None:
611            return self._init() + self._comp.flush()
612        return self._comp.flush()
613
614
615class LZMADecompressor:
616
617    def __init__(self):
618        self._decomp = None
619        self._unconsumed = b''
620        self.eof = False
621
622    def decompress(self, data):
623        if self._decomp is None:
624            self._unconsumed += data
625            if len(self._unconsumed) <= 4:
626                return b''
627            psize, = struct.unpack('<H', self._unconsumed[2:4])
628            if len(self._unconsumed) <= 4 + psize:
629                return b''
630
631            self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
632                lzma._decode_filter_properties(lzma.FILTER_LZMA1,
633                                               self._unconsumed[4:4 + psize])
634            ])
635            data = self._unconsumed[4 + psize:]
636            del self._unconsumed
637
638        result = self._decomp.decompress(data)
639        self.eof = self._decomp.eof
640        return result
641
642
643compressor_names = {
644    0: 'store',
645    1: 'shrink',
646    2: 'reduce',
647    3: 'reduce',
648    4: 'reduce',
649    5: 'reduce',
650    6: 'implode',
651    7: 'tokenize',
652    8: 'deflate',
653    9: 'deflate64',
654    10: 'implode',
655    12: 'bzip2',
656    14: 'lzma',
657    18: 'terse',
658    19: 'lz77',
659    97: 'wavpack',
660    98: 'ppmd',
661}
662
663def _check_compression(compression):
664    if compression == ZIP_STORED:
665        pass
666    elif compression == ZIP_DEFLATED:
667        if not zlib:
668            raise RuntimeError(
669                "Compression requires the (missing) zlib module")
670    elif compression == ZIP_BZIP2:
671        if not bz2:
672            raise RuntimeError(
673                "Compression requires the (missing) bz2 module")
674    elif compression == ZIP_LZMA:
675        if not lzma:
676            raise RuntimeError(
677                "Compression requires the (missing) lzma module")
678    else:
679        raise NotImplementedError("That compression method is not supported")
680
681
682def _get_compressor(compress_type, compresslevel=None):
683    if compress_type == ZIP_DEFLATED:
684        if compresslevel is not None:
685            return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
686        return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
687    elif compress_type == ZIP_BZIP2:
688        if compresslevel is not None:
689            return bz2.BZ2Compressor(compresslevel)
690        return bz2.BZ2Compressor()
691    # compresslevel is ignored for ZIP_LZMA
692    elif compress_type == ZIP_LZMA:
693        return LZMACompressor()
694    else:
695        return None
696
697
698def _get_decompressor(compress_type):
699    _check_compression(compress_type)
700    if compress_type == ZIP_STORED:
701        return None
702    elif compress_type == ZIP_DEFLATED:
703        return zlib.decompressobj(-15)
704    elif compress_type == ZIP_BZIP2:
705        return bz2.BZ2Decompressor()
706    elif compress_type == ZIP_LZMA:
707        return LZMADecompressor()
708    else:
709        descr = compressor_names.get(compress_type)
710        if descr:
711            raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
712        else:
713            raise NotImplementedError("compression type %d" % (compress_type,))
714
715
716class _SharedFile:
717    def __init__(self, file, pos, close, lock, writing):
718        self._file = file
719        self._pos = pos
720        self._close = close
721        self._lock = lock
722        self._writing = writing
723        self.seekable = file.seekable
724        self.tell = file.tell
725
726    def seek(self, offset, whence=0):
727        with self._lock:
728            if self._writing():
729                raise ValueError("Can't reposition in the ZIP file while "
730                        "there is an open writing handle on it. "
731                        "Close the writing handle before trying to read.")
732            self._file.seek(offset, whence)
733            self._pos = self._file.tell()
734            return self._pos
735
736    def read(self, n=-1):
737        with self._lock:
738            if self._writing():
739                raise ValueError("Can't read from the ZIP file while there "
740                        "is an open writing handle on it. "
741                        "Close the writing handle before trying to read.")
742            self._file.seek(self._pos)
743            data = self._file.read(n)
744            self._pos = self._file.tell()
745            return data
746
747    def close(self):
748        if self._file is not None:
749            fileobj = self._file
750            self._file = None
751            self._close(fileobj)
752
753# Provide the tell method for unseekable stream
754class _Tellable:
755    def __init__(self, fp):
756        self.fp = fp
757        self.offset = 0
758
759    def write(self, data):
760        n = self.fp.write(data)
761        self.offset += n
762        return n
763
764    def tell(self):
765        return self.offset
766
767    def flush(self):
768        self.fp.flush()
769
770    def close(self):
771        self.fp.close()
772
773
774class ZipExtFile(io.BufferedIOBase):
775    """File-like object for reading an archive member.
776       Is returned by ZipFile.open().
777    """
778
779    # Max size supported by decompressor.
780    MAX_N = 1 << 31 - 1
781
782    # Read from compressed files in 4k blocks.
783    MIN_READ_SIZE = 4096
784
785    # Chunk size to read during seek
786    MAX_SEEK_READ = 1 << 24
787
788    def __init__(self, fileobj, mode, zipinfo, pwd=None,
789                 close_fileobj=False):
790        self._fileobj = fileobj
791        self._pwd = pwd
792        self._close_fileobj = close_fileobj
793
794        self._compress_type = zipinfo.compress_type
795        self._compress_left = zipinfo.compress_size
796        self._left = zipinfo.file_size
797
798        self._decompressor = _get_decompressor(self._compress_type)
799
800        self._eof = False
801        self._readbuffer = b''
802        self._offset = 0
803
804        self.newlines = None
805
806        self.mode = mode
807        self.name = zipinfo.filename
808
809        if hasattr(zipinfo, 'CRC'):
810            self._expected_crc = zipinfo.CRC
811            self._running_crc = crc32(b'')
812        else:
813            self._expected_crc = None
814
815        self._seekable = False
816        try:
817            if fileobj.seekable():
818                self._orig_compress_start = fileobj.tell()
819                self._orig_compress_size = zipinfo.compress_size
820                self._orig_file_size = zipinfo.file_size
821                self._orig_start_crc = self._running_crc
822                self._seekable = True
823        except AttributeError:
824            pass
825
826        self._decrypter = None
827        if pwd:
828            if zipinfo.flag_bits & 0x8:
829                # compare against the file type from extended local headers
830                check_byte = (zipinfo._raw_time >> 8) & 0xff
831            else:
832                # compare against the CRC otherwise
833                check_byte = (zipinfo.CRC >> 24) & 0xff
834            h = self._init_decrypter()
835            if h != check_byte:
836                raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
837
838
839    def _init_decrypter(self):
840        self._decrypter = _ZipDecrypter(self._pwd)
841        # The first 12 bytes in the cypher stream is an encryption header
842        #  used to strengthen the algorithm. The first 11 bytes are
843        #  completely random, while the 12th contains the MSB of the CRC,
844        #  or the MSB of the file time depending on the header type
845        #  and is used to check the correctness of the password.
846        header = self._fileobj.read(12)
847        self._compress_left -= 12
848        return self._decrypter(header)[11]
849
850    def __repr__(self):
851        result = ['<%s.%s' % (self.__class__.__module__,
852                              self.__class__.__qualname__)]
853        if not self.closed:
854            result.append(' name=%r mode=%r' % (self.name, self.mode))
855            if self._compress_type != ZIP_STORED:
856                result.append(' compress_type=%s' %
857                              compressor_names.get(self._compress_type,
858                                                   self._compress_type))
859        else:
860            result.append(' [closed]')
861        result.append('>')
862        return ''.join(result)
863
864    def readline(self, limit=-1):
865        """Read and return a line from the stream.
866
867        If limit is specified, at most limit bytes will be read.
868        """
869
870        if limit < 0:
871            # Shortcut common case - newline found in buffer.
872            i = self._readbuffer.find(b'\n', self._offset) + 1
873            if i > 0:
874                line = self._readbuffer[self._offset: i]
875                self._offset = i
876                return line
877
878        return io.BufferedIOBase.readline(self, limit)
879
880    def peek(self, n=1):
881        """Returns buffered bytes without advancing the position."""
882        if n > len(self._readbuffer) - self._offset:
883            chunk = self.read(n)
884            if len(chunk) > self._offset:
885                self._readbuffer = chunk + self._readbuffer[self._offset:]
886                self._offset = 0
887            else:
888                self._offset -= len(chunk)
889
890        # Return up to 512 bytes to reduce allocation overhead for tight loops.
891        return self._readbuffer[self._offset: self._offset + 512]
892
893    def readable(self):
894        if self.closed:
895            raise ValueError("I/O operation on closed file.")
896        return True
897
898    def read(self, n=-1):
899        """Read and return up to n bytes.
900        If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
901        """
902        if self.closed:
903            raise ValueError("read from closed file.")
904        if n is None or n < 0:
905            buf = self._readbuffer[self._offset:]
906            self._readbuffer = b''
907            self._offset = 0
908            while not self._eof:
909                buf += self._read1(self.MAX_N)
910            return buf
911
912        end = n + self._offset
913        if end < len(self._readbuffer):
914            buf = self._readbuffer[self._offset:end]
915            self._offset = end
916            return buf
917
918        n = end - len(self._readbuffer)
919        buf = self._readbuffer[self._offset:]
920        self._readbuffer = b''
921        self._offset = 0
922        while n > 0 and not self._eof:
923            data = self._read1(n)
924            if n < len(data):
925                self._readbuffer = data
926                self._offset = n
927                buf += data[:n]
928                break
929            buf += data
930            n -= len(data)
931        return buf
932
933    def _update_crc(self, newdata):
934        # Update the CRC using the given data.
935        if self._expected_crc is None:
936            # No need to compute the CRC if we don't have a reference value
937            return
938        self._running_crc = crc32(newdata, self._running_crc)
939        # Check the CRC if we're at the end of the file
940        if self._eof and self._running_crc != self._expected_crc:
941            raise BadZipFile("Bad CRC-32 for file %r" % self.name)
942
943    def read1(self, n):
944        """Read up to n bytes with at most one read() system call."""
945
946        if n is None or n < 0:
947            buf = self._readbuffer[self._offset:]
948            self._readbuffer = b''
949            self._offset = 0
950            while not self._eof:
951                data = self._read1(self.MAX_N)
952                if data:
953                    buf += data
954                    break
955            return buf
956
957        end = n + self._offset
958        if end < len(self._readbuffer):
959            buf = self._readbuffer[self._offset:end]
960            self._offset = end
961            return buf
962
963        n = end - len(self._readbuffer)
964        buf = self._readbuffer[self._offset:]
965        self._readbuffer = b''
966        self._offset = 0
967        if n > 0:
968            while not self._eof:
969                data = self._read1(n)
970                if n < len(data):
971                    self._readbuffer = data
972                    self._offset = n
973                    buf += data[:n]
974                    break
975                if data:
976                    buf += data
977                    break
978        return buf
979
980    def _read1(self, n):
981        # Read up to n compressed bytes with at most one read() system call,
982        # decrypt and decompress them.
983        if self._eof or n <= 0:
984            return b''
985
986        # Read from file.
987        if self._compress_type == ZIP_DEFLATED:
988            ## Handle unconsumed data.
989            data = self._decompressor.unconsumed_tail
990            if n > len(data):
991                data += self._read2(n - len(data))
992        else:
993            data = self._read2(n)
994
995        if self._compress_type == ZIP_STORED:
996            self._eof = self._compress_left <= 0
997        elif self._compress_type == ZIP_DEFLATED:
998            n = max(n, self.MIN_READ_SIZE)
999            data = self._decompressor.decompress(data, n)
1000            self._eof = (self._decompressor.eof or
1001                         self._compress_left <= 0 and
1002                         not self._decompressor.unconsumed_tail)
1003            if self._eof:
1004                data += self._decompressor.flush()
1005        else:
1006            data = self._decompressor.decompress(data)
1007            self._eof = self._decompressor.eof or self._compress_left <= 0
1008
1009        data = data[:self._left]
1010        self._left -= len(data)
1011        if self._left <= 0:
1012            self._eof = True
1013        self._update_crc(data)
1014        return data
1015
1016    def _read2(self, n):
1017        if self._compress_left <= 0:
1018            return b''
1019
1020        n = max(n, self.MIN_READ_SIZE)
1021        n = min(n, self._compress_left)
1022
1023        data = self._fileobj.read(n)
1024        self._compress_left -= len(data)
1025        if not data:
1026            raise EOFError
1027
1028        if self._decrypter is not None:
1029            data = self._decrypter(data)
1030        return data
1031
1032    def close(self):
1033        try:
1034            if self._close_fileobj:
1035                self._fileobj.close()
1036        finally:
1037            super().close()
1038
1039    def seekable(self):
1040        if self.closed:
1041            raise ValueError("I/O operation on closed file.")
1042        return self._seekable
1043
1044    def seek(self, offset, whence=0):
1045        if self.closed:
1046            raise ValueError("seek on closed file.")
1047        if not self._seekable:
1048            raise io.UnsupportedOperation("underlying stream is not seekable")
1049        curr_pos = self.tell()
1050        if whence == 0: # Seek from start of file
1051            new_pos = offset
1052        elif whence == 1: # Seek from current position
1053            new_pos = curr_pos + offset
1054        elif whence == 2: # Seek from EOF
1055            new_pos = self._orig_file_size + offset
1056        else:
1057            raise ValueError("whence must be os.SEEK_SET (0), "
1058                             "os.SEEK_CUR (1), or os.SEEK_END (2)")
1059
1060        if new_pos > self._orig_file_size:
1061            new_pos = self._orig_file_size
1062
1063        if new_pos < 0:
1064            new_pos = 0
1065
1066        read_offset = new_pos - curr_pos
1067        buff_offset = read_offset + self._offset
1068
1069        if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1070            # Just move the _offset index if the new position is in the _readbuffer
1071            self._offset = buff_offset
1072            read_offset = 0
1073        elif read_offset < 0:
1074            # Position is before the current position. Reset the ZipExtFile
1075            self._fileobj.seek(self._orig_compress_start)
1076            self._running_crc = self._orig_start_crc
1077            self._compress_left = self._orig_compress_size
1078            self._left = self._orig_file_size
1079            self._readbuffer = b''
1080            self._offset = 0
1081            self._decompressor = _get_decompressor(self._compress_type)
1082            self._eof = False
1083            read_offset = new_pos
1084            if self._decrypter is not None:
1085                self._init_decrypter()
1086
1087        while read_offset > 0:
1088            read_len = min(self.MAX_SEEK_READ, read_offset)
1089            self.read(read_len)
1090            read_offset -= read_len
1091
1092        return self.tell()
1093
1094    def tell(self):
1095        if self.closed:
1096            raise ValueError("tell on closed file.")
1097        if not self._seekable:
1098            raise io.UnsupportedOperation("underlying stream is not seekable")
1099        filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1100        return filepos
1101
1102
1103class _ZipWriteFile(io.BufferedIOBase):
1104    def __init__(self, zf, zinfo, zip64):
1105        self._zinfo = zinfo
1106        self._zip64 = zip64
1107        self._zipfile = zf
1108        self._compressor = _get_compressor(zinfo.compress_type,
1109                                           zinfo._compresslevel)
1110        self._file_size = 0
1111        self._compress_size = 0
1112        self._crc = 0
1113
1114    @property
1115    def _fileobj(self):
1116        return self._zipfile.fp
1117
1118    def writable(self):
1119        return True
1120
1121    def write(self, data):
1122        if self.closed:
1123            raise ValueError('I/O operation on closed file.')
1124        nbytes = len(data)
1125        self._file_size += nbytes
1126        self._crc = crc32(data, self._crc)
1127        if self._compressor:
1128            data = self._compressor.compress(data)
1129            self._compress_size += len(data)
1130        self._fileobj.write(data)
1131        return nbytes
1132
1133    def close(self):
1134        if self.closed:
1135            return
1136        try:
1137            super().close()
1138            # Flush any data from the compressor, and update header info
1139            if self._compressor:
1140                buf = self._compressor.flush()
1141                self._compress_size += len(buf)
1142                self._fileobj.write(buf)
1143                self._zinfo.compress_size = self._compress_size
1144            else:
1145                self._zinfo.compress_size = self._file_size
1146            self._zinfo.CRC = self._crc
1147            self._zinfo.file_size = self._file_size
1148
1149            # Write updated header info
1150            if self._zinfo.flag_bits & 0x08:
1151                # Write CRC and file sizes after the file data
1152                fmt = '<LLQQ' if self._zip64 else '<LLLL'
1153                self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1154                    self._zinfo.compress_size, self._zinfo.file_size))
1155                self._zipfile.start_dir = self._fileobj.tell()
1156            else:
1157                if not self._zip64:
1158                    if self._file_size > ZIP64_LIMIT:
1159                        raise RuntimeError(
1160                            'File size unexpectedly exceeded ZIP64 limit')
1161                    if self._compress_size > ZIP64_LIMIT:
1162                        raise RuntimeError(
1163                            'Compressed size unexpectedly exceeded ZIP64 limit')
1164                # Seek backwards and write file header (which will now include
1165                # correct CRC and file sizes)
1166
1167                # Preserve current position in file
1168                self._zipfile.start_dir = self._fileobj.tell()
1169                self._fileobj.seek(self._zinfo.header_offset)
1170                self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1171                self._fileobj.seek(self._zipfile.start_dir)
1172
1173            # Successfully written: Add file to our caches
1174            self._zipfile.filelist.append(self._zinfo)
1175            self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1176        finally:
1177            self._zipfile._writing = False
1178
1179
1180
1181class ZipFile:
1182    """ Class with methods to open, read, write, close, list zip files.
1183
1184    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1185                compresslevel=None)
1186
1187    file: Either the path to the file, or a file-like object.
1188          If it is a path, the file will be opened and closed by ZipFile.
1189    mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1190          or append 'a'.
1191    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1192                 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1193    allowZip64: if True ZipFile will create files with ZIP64 extensions when
1194                needed, otherwise it will raise an exception when this would
1195                be necessary.
1196    compresslevel: None (default for the given compression type) or an integer
1197                   specifying the level to pass to the compressor.
1198                   When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1199                   When using ZIP_DEFLATED integers 0 through 9 are accepted.
1200                   When using ZIP_BZIP2 integers 1 through 9 are accepted.
1201
1202    """
1203
1204    fp = None                   # Set here since __del__ checks it
1205    _windows_illegal_name_trans_table = None
1206
1207    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1208                 compresslevel=None, *, strict_timestamps=True):
1209        """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1210        or append 'a'."""
1211        if mode not in ('r', 'w', 'x', 'a'):
1212            raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1213
1214        _check_compression(compression)
1215
1216        self._allowZip64 = allowZip64
1217        self._didModify = False
1218        self.debug = 0  # Level of printing: 0 through 3
1219        self.NameToInfo = {}    # Find file info given name
1220        self.filelist = []      # List of ZipInfo instances for archive
1221        self.compression = compression  # Method of compression
1222        self.compresslevel = compresslevel
1223        self.mode = mode
1224        self.pwd = None
1225        self._comment = b''
1226        self._strict_timestamps = strict_timestamps
1227
1228        # Check if we were passed a file-like object
1229        if isinstance(file, os.PathLike):
1230            file = os.fspath(file)
1231        if isinstance(file, str):
1232            # No, it's a filename
1233            self._filePassed = 0
1234            self.filename = file
1235            modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1236                        'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1237            filemode = modeDict[mode]
1238            while True:
1239                try:
1240                    self.fp = io.open(file, filemode)
1241                except OSError:
1242                    if filemode in modeDict:
1243                        filemode = modeDict[filemode]
1244                        continue
1245                    raise
1246                break
1247        else:
1248            self._filePassed = 1
1249            self.fp = file
1250            self.filename = getattr(file, 'name', None)
1251        self._fileRefCnt = 1
1252        self._lock = threading.RLock()
1253        self._seekable = True
1254        self._writing = False
1255
1256        try:
1257            if mode == 'r':
1258                self._RealGetContents()
1259            elif mode in ('w', 'x'):
1260                # set the modified flag so central directory gets written
1261                # even if no files are added to the archive
1262                self._didModify = True
1263                try:
1264                    self.start_dir = self.fp.tell()
1265                except (AttributeError, OSError):
1266                    self.fp = _Tellable(self.fp)
1267                    self.start_dir = 0
1268                    self._seekable = False
1269                else:
1270                    # Some file-like objects can provide tell() but not seek()
1271                    try:
1272                        self.fp.seek(self.start_dir)
1273                    except (AttributeError, OSError):
1274                        self._seekable = False
1275            elif mode == 'a':
1276                try:
1277                    # See if file is a zip file
1278                    self._RealGetContents()
1279                    # seek to start of directory and overwrite
1280                    self.fp.seek(self.start_dir)
1281                except BadZipFile:
1282                    # file is not a zip file, just append
1283                    self.fp.seek(0, 2)
1284
1285                    # set the modified flag so central directory gets written
1286                    # even if no files are added to the archive
1287                    self._didModify = True
1288                    self.start_dir = self.fp.tell()
1289            else:
1290                raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1291        except:
1292            fp = self.fp
1293            self.fp = None
1294            self._fpclose(fp)
1295            raise
1296
1297    def __enter__(self):
1298        return self
1299
1300    def __exit__(self, type, value, traceback):
1301        self.close()
1302
1303    def __repr__(self):
1304        result = ['<%s.%s' % (self.__class__.__module__,
1305                              self.__class__.__qualname__)]
1306        if self.fp is not None:
1307            if self._filePassed:
1308                result.append(' file=%r' % self.fp)
1309            elif self.filename is not None:
1310                result.append(' filename=%r' % self.filename)
1311            result.append(' mode=%r' % self.mode)
1312        else:
1313            result.append(' [closed]')
1314        result.append('>')
1315        return ''.join(result)
1316
1317    def _RealGetContents(self):
1318        """Read in the table of contents for the ZIP file."""
1319        fp = self.fp
1320        try:
1321            endrec = _EndRecData(fp)
1322        except OSError:
1323            raise BadZipFile("File is not a zip file")
1324        if not endrec:
1325            raise BadZipFile("File is not a zip file")
1326        if self.debug > 1:
1327            print(endrec)
1328        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
1329        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
1330        self._comment = endrec[_ECD_COMMENT]    # archive comment
1331
1332        # "concat" is zero, unless zip was concatenated to another file
1333        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1334        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1335            # If Zip64 extension structures are present, account for them
1336            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1337
1338        if self.debug > 2:
1339            inferred = concat + offset_cd
1340            print("given, inferred, offset", offset_cd, inferred, concat)
1341        # self.start_dir:  Position of start of central directory
1342        self.start_dir = offset_cd + concat
1343        fp.seek(self.start_dir, 0)
1344        data = fp.read(size_cd)
1345        fp = io.BytesIO(data)
1346        total = 0
1347        while total < size_cd:
1348            centdir = fp.read(sizeCentralDir)
1349            if len(centdir) != sizeCentralDir:
1350                raise BadZipFile("Truncated central directory")
1351            centdir = struct.unpack(structCentralDir, centdir)
1352            if centdir[_CD_SIGNATURE] != stringCentralDir:
1353                raise BadZipFile("Bad magic number for central directory")
1354            if self.debug > 2:
1355                print(centdir)
1356            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1357            flags = centdir[5]
1358            if flags & 0x800:
1359                # UTF-8 file names extension
1360                filename = filename.decode('utf-8')
1361            else:
1362                # Historical ZIP filename encoding
1363                filename = filename.decode('cp437')
1364            # Create ZipInfo instance to store file information
1365            x = ZipInfo(filename)
1366            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1367            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1368            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1369            (x.create_version, x.create_system, x.extract_version, x.reserved,
1370             x.flag_bits, x.compress_type, t, d,
1371             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1372            if x.extract_version > MAX_EXTRACT_VERSION:
1373                raise NotImplementedError("zip file version %.1f" %
1374                                          (x.extract_version / 10))
1375            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1376            # Convert date/time code to (year, month, day, hour, min, sec)
1377            x._raw_time = t
1378            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1379                            t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1380
1381            x._decodeExtra()
1382            x.header_offset = x.header_offset + concat
1383            self.filelist.append(x)
1384            self.NameToInfo[x.filename] = x
1385
1386            # update total bytes read from central directory
1387            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1388                     + centdir[_CD_EXTRA_FIELD_LENGTH]
1389                     + centdir[_CD_COMMENT_LENGTH])
1390
1391            if self.debug > 2:
1392                print("total", total)
1393
1394
1395    def namelist(self):
1396        """Return a list of file names in the archive."""
1397        return [data.filename for data in self.filelist]
1398
1399    def infolist(self):
1400        """Return a list of class ZipInfo instances for files in the
1401        archive."""
1402        return self.filelist
1403
1404    def printdir(self, file=None):
1405        """Print a table of contents for the zip file."""
1406        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
1407              file=file)
1408        for zinfo in self.filelist:
1409            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1410            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1411                  file=file)
1412
1413    def testzip(self):
1414        """Read all the files and check the CRC."""
1415        chunk_size = 2 ** 20
1416        for zinfo in self.filelist:
1417            try:
1418                # Read by chunks, to avoid an OverflowError or a
1419                # MemoryError with very large embedded files.
1420                with self.open(zinfo.filename, "r") as f:
1421                    while f.read(chunk_size):     # Check CRC-32
1422                        pass
1423            except BadZipFile:
1424                return zinfo.filename
1425
1426    def getinfo(self, name):
1427        """Return the instance of ZipInfo given 'name'."""
1428        info = self.NameToInfo.get(name)
1429        if info is None:
1430            raise KeyError(
1431                'There is no item named %r in the archive' % name)
1432
1433        return info
1434
1435    def setpassword(self, pwd):
1436        """Set default password for encrypted files."""
1437        if pwd and not isinstance(pwd, bytes):
1438            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1439        if pwd:
1440            self.pwd = pwd
1441        else:
1442            self.pwd = None
1443
1444    @property
1445    def comment(self):
1446        """The comment text associated with the ZIP file."""
1447        return self._comment
1448
1449    @comment.setter
1450    def comment(self, comment):
1451        if not isinstance(comment, bytes):
1452            raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1453        # check for valid comment length
1454        if len(comment) > ZIP_MAX_COMMENT:
1455            import warnings
1456            warnings.warn('Archive comment is too long; truncating to %d bytes'
1457                          % ZIP_MAX_COMMENT, stacklevel=2)
1458            comment = comment[:ZIP_MAX_COMMENT]
1459        self._comment = comment
1460        self._didModify = True
1461
1462    def read(self, name, pwd=None):
1463        """Return file bytes for name."""
1464        with self.open(name, "r", pwd) as fp:
1465            return fp.read()
1466
1467    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1468        """Return file-like object for 'name'.
1469
1470        name is a string for the file name within the ZIP file, or a ZipInfo
1471        object.
1472
1473        mode should be 'r' to read a file already in the ZIP file, or 'w' to
1474        write to a file newly added to the archive.
1475
1476        pwd is the password to decrypt files (only used for reading).
1477
1478        When writing, if the file size is not known in advance but may exceed
1479        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1480        files.  If the size is known in advance, it is best to pass a ZipInfo
1481        instance for name, with zinfo.file_size set.
1482        """
1483        if mode not in {"r", "w"}:
1484            raise ValueError('open() requires mode "r" or "w"')
1485        if pwd and not isinstance(pwd, bytes):
1486            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1487        if pwd and (mode == "w"):
1488            raise ValueError("pwd is only supported for reading files")
1489        if not self.fp:
1490            raise ValueError(
1491                "Attempt to use ZIP archive that was already closed")
1492
1493        # Make sure we have an info object
1494        if isinstance(name, ZipInfo):
1495            # 'name' is already an info object
1496            zinfo = name
1497        elif mode == 'w':
1498            zinfo = ZipInfo(name)
1499            zinfo.compress_type = self.compression
1500            zinfo._compresslevel = self.compresslevel
1501        else:
1502            # Get info object for name
1503            zinfo = self.getinfo(name)
1504
1505        if mode == 'w':
1506            return self._open_to_write(zinfo, force_zip64=force_zip64)
1507
1508        if self._writing:
1509            raise ValueError("Can't read from the ZIP file while there "
1510                    "is an open writing handle on it. "
1511                    "Close the writing handle before trying to read.")
1512
1513        # Open for reading:
1514        self._fileRefCnt += 1
1515        zef_file = _SharedFile(self.fp, zinfo.header_offset,
1516                               self._fpclose, self._lock, lambda: self._writing)
1517        try:
1518            # Skip the file header:
1519            fheader = zef_file.read(sizeFileHeader)
1520            if len(fheader) != sizeFileHeader:
1521                raise BadZipFile("Truncated file header")
1522            fheader = struct.unpack(structFileHeader, fheader)
1523            if fheader[_FH_SIGNATURE] != stringFileHeader:
1524                raise BadZipFile("Bad magic number for file header")
1525
1526            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1527            if fheader[_FH_EXTRA_FIELD_LENGTH]:
1528                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1529
1530            if zinfo.flag_bits & 0x20:
1531                # Zip 2.7: compressed patched data
1532                raise NotImplementedError("compressed patched data (flag bit 5)")
1533
1534            if zinfo.flag_bits & 0x40:
1535                # strong encryption
1536                raise NotImplementedError("strong encryption (flag bit 6)")
1537
1538            if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800:
1539                # UTF-8 filename
1540                fname_str = fname.decode("utf-8")
1541            else:
1542                fname_str = fname.decode("cp437")
1543
1544            if fname_str != zinfo.orig_filename:
1545                raise BadZipFile(
1546                    'File name in directory %r and header %r differ.'
1547                    % (zinfo.orig_filename, fname))
1548
1549            # check for encrypted flag & handle password
1550            is_encrypted = zinfo.flag_bits & 0x1
1551            if is_encrypted:
1552                if not pwd:
1553                    pwd = self.pwd
1554                if not pwd:
1555                    raise RuntimeError("File %r is encrypted, password "
1556                                       "required for extraction" % name)
1557            else:
1558                pwd = None
1559
1560            return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1561        except:
1562            zef_file.close()
1563            raise
1564
1565    def _open_to_write(self, zinfo, force_zip64=False):
1566        if force_zip64 and not self._allowZip64:
1567            raise ValueError(
1568                "force_zip64 is True, but allowZip64 was False when opening "
1569                "the ZIP file."
1570            )
1571        if self._writing:
1572            raise ValueError("Can't write to the ZIP file while there is "
1573                             "another write handle open on it. "
1574                             "Close the first handle before opening another.")
1575
1576        # Size and CRC are overwritten with correct data after processing the file
1577        zinfo.compress_size = 0
1578        zinfo.CRC = 0
1579
1580        zinfo.flag_bits = 0x00
1581        if zinfo.compress_type == ZIP_LZMA:
1582            # Compressed data includes an end-of-stream (EOS) marker
1583            zinfo.flag_bits |= 0x02
1584        if not self._seekable:
1585            zinfo.flag_bits |= 0x08
1586
1587        if not zinfo.external_attr:
1588            zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
1589
1590        # Compressed size can be larger than uncompressed size
1591        zip64 = self._allowZip64 and \
1592                (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1593
1594        if self._seekable:
1595            self.fp.seek(self.start_dir)
1596        zinfo.header_offset = self.fp.tell()
1597
1598        self._writecheck(zinfo)
1599        self._didModify = True
1600
1601        self.fp.write(zinfo.FileHeader(zip64))
1602
1603        self._writing = True
1604        return _ZipWriteFile(self, zinfo, zip64)
1605
1606    def extract(self, member, path=None, pwd=None):
1607        """Extract a member from the archive to the current working directory,
1608           using its full name. Its file information is extracted as accurately
1609           as possible. `member' may be a filename or a ZipInfo object. You can
1610           specify a different directory using `path'.
1611        """
1612        if path is None:
1613            path = os.getcwd()
1614        else:
1615            path = os.fspath(path)
1616
1617        return self._extract_member(member, path, pwd)
1618
1619    def extractall(self, path=None, members=None, pwd=None):
1620        """Extract all members from the archive to the current working
1621           directory. `path' specifies a different directory to extract to.
1622           `members' is optional and must be a subset of the list returned
1623           by namelist().
1624        """
1625        if members is None:
1626            members = self.namelist()
1627
1628        if path is None:
1629            path = os.getcwd()
1630        else:
1631            path = os.fspath(path)
1632
1633        for zipinfo in members:
1634            self._extract_member(zipinfo, path, pwd)
1635
1636    @classmethod
1637    def _sanitize_windows_name(cls, arcname, pathsep):
1638        """Replace bad characters and remove trailing dots from parts."""
1639        table = cls._windows_illegal_name_trans_table
1640        if not table:
1641            illegal = ':<>|"?*'
1642            table = str.maketrans(illegal, '_' * len(illegal))
1643            cls._windows_illegal_name_trans_table = table
1644        arcname = arcname.translate(table)
1645        # remove trailing dots
1646        arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1647        # rejoin, removing empty parts.
1648        arcname = pathsep.join(x for x in arcname if x)
1649        return arcname
1650
1651    def _extract_member(self, member, targetpath, pwd):
1652        """Extract the ZipInfo object 'member' to a physical
1653           file on the path targetpath.
1654        """
1655        if not isinstance(member, ZipInfo):
1656            member = self.getinfo(member)
1657
1658        # build the destination pathname, replacing
1659        # forward slashes to platform specific separators.
1660        arcname = member.filename.replace('/', os.path.sep)
1661
1662        if os.path.altsep:
1663            arcname = arcname.replace(os.path.altsep, os.path.sep)
1664        # interpret absolute pathname as relative, remove drive letter or
1665        # UNC path, redundant separators, "." and ".." components.
1666        arcname = os.path.splitdrive(arcname)[1]
1667        invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1668        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1669                                   if x not in invalid_path_parts)
1670        if os.path.sep == '\\':
1671            # filter illegal characters on Windows
1672            arcname = self._sanitize_windows_name(arcname, os.path.sep)
1673
1674        targetpath = os.path.join(targetpath, arcname)
1675        targetpath = os.path.normpath(targetpath)
1676
1677        # Create all upper directories if necessary.
1678        upperdirs = os.path.dirname(targetpath)
1679        if upperdirs and not os.path.exists(upperdirs):
1680            os.makedirs(upperdirs)
1681
1682        if member.is_dir():
1683            if not os.path.isdir(targetpath):
1684                os.mkdir(targetpath)
1685            return targetpath
1686
1687        with self.open(member, pwd=pwd) as source, \
1688             open(targetpath, "wb") as target:
1689            shutil.copyfileobj(source, target)
1690
1691        return targetpath
1692
1693    def _writecheck(self, zinfo):
1694        """Check for errors before writing a file to the archive."""
1695        if zinfo.filename in self.NameToInfo:
1696            import warnings
1697            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1698        if self.mode not in ('w', 'x', 'a'):
1699            raise ValueError("write() requires mode 'w', 'x', or 'a'")
1700        if not self.fp:
1701            raise ValueError(
1702                "Attempt to write ZIP archive that was already closed")
1703        _check_compression(zinfo.compress_type)
1704        if not self._allowZip64:
1705            requires_zip64 = None
1706            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1707                requires_zip64 = "Files count"
1708            elif zinfo.file_size > ZIP64_LIMIT:
1709                requires_zip64 = "Filesize"
1710            elif zinfo.header_offset > ZIP64_LIMIT:
1711                requires_zip64 = "Zipfile size"
1712            if requires_zip64:
1713                raise LargeZipFile(requires_zip64 +
1714                                   " would require ZIP64 extensions")
1715
1716    def write(self, filename, arcname=None,
1717              compress_type=None, compresslevel=None):
1718        """Put the bytes from filename into the archive under the name
1719        arcname."""
1720        if not self.fp:
1721            raise ValueError(
1722                "Attempt to write to ZIP archive that was already closed")
1723        if self._writing:
1724            raise ValueError(
1725                "Can't write to ZIP archive while an open writing handle exists"
1726            )
1727
1728        zinfo = ZipInfo.from_file(filename, arcname,
1729                                  strict_timestamps=self._strict_timestamps)
1730
1731        if zinfo.is_dir():
1732            zinfo.compress_size = 0
1733            zinfo.CRC = 0
1734        else:
1735            if compress_type is not None:
1736                zinfo.compress_type = compress_type
1737            else:
1738                zinfo.compress_type = self.compression
1739
1740            if compresslevel is not None:
1741                zinfo._compresslevel = compresslevel
1742            else:
1743                zinfo._compresslevel = self.compresslevel
1744
1745        if zinfo.is_dir():
1746            with self._lock:
1747                if self._seekable:
1748                    self.fp.seek(self.start_dir)
1749                zinfo.header_offset = self.fp.tell()  # Start of header bytes
1750                if zinfo.compress_type == ZIP_LZMA:
1751                # Compressed data includes an end-of-stream (EOS) marker
1752                    zinfo.flag_bits |= 0x02
1753
1754                self._writecheck(zinfo)
1755                self._didModify = True
1756
1757                self.filelist.append(zinfo)
1758                self.NameToInfo[zinfo.filename] = zinfo
1759                self.fp.write(zinfo.FileHeader(False))
1760                self.start_dir = self.fp.tell()
1761        else:
1762            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1763                shutil.copyfileobj(src, dest, 1024*8)
1764
1765    def writestr(self, zinfo_or_arcname, data,
1766                 compress_type=None, compresslevel=None):
1767        """Write a file into the archive.  The contents is 'data', which
1768        may be either a 'str' or a 'bytes' instance; if it is a 'str',
1769        it is encoded as UTF-8 first.
1770        'zinfo_or_arcname' is either a ZipInfo instance or
1771        the name of the file in the archive."""
1772        if isinstance(data, str):
1773            data = data.encode("utf-8")
1774        if not isinstance(zinfo_or_arcname, ZipInfo):
1775            zinfo = ZipInfo(filename=zinfo_or_arcname,
1776                            date_time=time.localtime(time.time())[:6])
1777            zinfo.compress_type = self.compression
1778            zinfo._compresslevel = self.compresslevel
1779            if zinfo.filename[-1] == '/':
1780                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1781                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1782            else:
1783                zinfo.external_attr = 0o600 << 16     # ?rw-------
1784        else:
1785            zinfo = zinfo_or_arcname
1786
1787        if not self.fp:
1788            raise ValueError(
1789                "Attempt to write to ZIP archive that was already closed")
1790        if self._writing:
1791            raise ValueError(
1792                "Can't write to ZIP archive while an open writing handle exists."
1793            )
1794
1795        if compress_type is not None:
1796            zinfo.compress_type = compress_type
1797
1798        if compresslevel is not None:
1799            zinfo._compresslevel = compresslevel
1800
1801        zinfo.file_size = len(data)            # Uncompressed size
1802        with self._lock:
1803            with self.open(zinfo, mode='w') as dest:
1804                dest.write(data)
1805
1806    def __del__(self):
1807        """Call the "close()" method in case the user forgot."""
1808        self.close()
1809
1810    def close(self):
1811        """Close the file, and for mode 'w', 'x' and 'a' write the ending
1812        records."""
1813        if self.fp is None:
1814            return
1815
1816        if self._writing:
1817            raise ValueError("Can't close the ZIP file while there is "
1818                             "an open writing handle on it. "
1819                             "Close the writing handle before closing the zip.")
1820
1821        try:
1822            if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1823                with self._lock:
1824                    if self._seekable:
1825                        self.fp.seek(self.start_dir)
1826                    self._write_end_record()
1827        finally:
1828            fp = self.fp
1829            self.fp = None
1830            self._fpclose(fp)
1831
1832    def _write_end_record(self):
1833        for zinfo in self.filelist:         # write central directory
1834            dt = zinfo.date_time
1835            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1836            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1837            extra = []
1838            if zinfo.file_size > ZIP64_LIMIT \
1839               or zinfo.compress_size > ZIP64_LIMIT:
1840                extra.append(zinfo.file_size)
1841                extra.append(zinfo.compress_size)
1842                file_size = 0xffffffff
1843                compress_size = 0xffffffff
1844            else:
1845                file_size = zinfo.file_size
1846                compress_size = zinfo.compress_size
1847
1848            if zinfo.header_offset > ZIP64_LIMIT:
1849                extra.append(zinfo.header_offset)
1850                header_offset = 0xffffffff
1851            else:
1852                header_offset = zinfo.header_offset
1853
1854            extra_data = zinfo.extra
1855            min_version = 0
1856            if extra:
1857                # Append a ZIP64 field to the extra's
1858                extra_data = _strip_extra(extra_data, (1,))
1859                extra_data = struct.pack(
1860                    '<HH' + 'Q'*len(extra),
1861                    1, 8*len(extra), *extra) + extra_data
1862
1863                min_version = ZIP64_VERSION
1864
1865            if zinfo.compress_type == ZIP_BZIP2:
1866                min_version = max(BZIP2_VERSION, min_version)
1867            elif zinfo.compress_type == ZIP_LZMA:
1868                min_version = max(LZMA_VERSION, min_version)
1869
1870            extract_version = max(min_version, zinfo.extract_version)
1871            create_version = max(min_version, zinfo.create_version)
1872            filename, flag_bits = zinfo._encodeFilenameFlags()
1873            centdir = struct.pack(structCentralDir,
1874                                  stringCentralDir, create_version,
1875                                  zinfo.create_system, extract_version, zinfo.reserved,
1876                                  flag_bits, zinfo.compress_type, dostime, dosdate,
1877                                  zinfo.CRC, compress_size, file_size,
1878                                  len(filename), len(extra_data), len(zinfo.comment),
1879                                  0, zinfo.internal_attr, zinfo.external_attr,
1880                                  header_offset)
1881            self.fp.write(centdir)
1882            self.fp.write(filename)
1883            self.fp.write(extra_data)
1884            self.fp.write(zinfo.comment)
1885
1886        pos2 = self.fp.tell()
1887        # Write end-of-zip-archive record
1888        centDirCount = len(self.filelist)
1889        centDirSize = pos2 - self.start_dir
1890        centDirOffset = self.start_dir
1891        requires_zip64 = None
1892        if centDirCount > ZIP_FILECOUNT_LIMIT:
1893            requires_zip64 = "Files count"
1894        elif centDirOffset > ZIP64_LIMIT:
1895            requires_zip64 = "Central directory offset"
1896        elif centDirSize > ZIP64_LIMIT:
1897            requires_zip64 = "Central directory size"
1898        if requires_zip64:
1899            # Need to write the ZIP64 end-of-archive records
1900            if not self._allowZip64:
1901                raise LargeZipFile(requires_zip64 +
1902                                   " would require ZIP64 extensions")
1903            zip64endrec = struct.pack(
1904                structEndArchive64, stringEndArchive64,
1905                44, 45, 45, 0, 0, centDirCount, centDirCount,
1906                centDirSize, centDirOffset)
1907            self.fp.write(zip64endrec)
1908
1909            zip64locrec = struct.pack(
1910                structEndArchive64Locator,
1911                stringEndArchive64Locator, 0, pos2, 1)
1912            self.fp.write(zip64locrec)
1913            centDirCount = min(centDirCount, 0xFFFF)
1914            centDirSize = min(centDirSize, 0xFFFFFFFF)
1915            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1916
1917        endrec = struct.pack(structEndArchive, stringEndArchive,
1918                             0, 0, centDirCount, centDirCount,
1919                             centDirSize, centDirOffset, len(self._comment))
1920        self.fp.write(endrec)
1921        self.fp.write(self._comment)
1922        if self.mode == "a":
1923            self.fp.truncate()
1924        self.fp.flush()
1925
1926    def _fpclose(self, fp):
1927        assert self._fileRefCnt > 0
1928        self._fileRefCnt -= 1
1929        if not self._fileRefCnt and not self._filePassed:
1930            fp.close()
1931
1932
1933class PyZipFile(ZipFile):
1934    """Class to create ZIP archives with Python library files and packages."""
1935
1936    def __init__(self, file, mode="r", compression=ZIP_STORED,
1937                 allowZip64=True, optimize=-1):
1938        ZipFile.__init__(self, file, mode=mode, compression=compression,
1939                         allowZip64=allowZip64)
1940        self._optimize = optimize
1941
1942    def writepy(self, pathname, basename="", filterfunc=None):
1943        """Add all files from "pathname" to the ZIP archive.
1944
1945        If pathname is a package directory, search the directory and
1946        all package subdirectories recursively for all *.py and enter
1947        the modules into the archive.  If pathname is a plain
1948        directory, listdir *.py and enter all modules.  Else, pathname
1949        must be a Python *.py file and the module will be put into the
1950        archive.  Added modules are always module.pyc.
1951        This method will compile the module.py into module.pyc if
1952        necessary.
1953        If filterfunc(pathname) is given, it is called with every argument.
1954        When it is False, the file or directory is skipped.
1955        """
1956        pathname = os.fspath(pathname)
1957        if filterfunc and not filterfunc(pathname):
1958            if self.debug:
1959                label = 'path' if os.path.isdir(pathname) else 'file'
1960                print('%s %r skipped by filterfunc' % (label, pathname))
1961            return
1962        dir, name = os.path.split(pathname)
1963        if os.path.isdir(pathname):
1964            initname = os.path.join(pathname, "__init__.py")
1965            if os.path.isfile(initname):
1966                # This is a package directory, add it
1967                if basename:
1968                    basename = "%s/%s" % (basename, name)
1969                else:
1970                    basename = name
1971                if self.debug:
1972                    print("Adding package in", pathname, "as", basename)
1973                fname, arcname = self._get_codename(initname[0:-3], basename)
1974                if self.debug:
1975                    print("Adding", arcname)
1976                self.write(fname, arcname)
1977                dirlist = sorted(os.listdir(pathname))
1978                dirlist.remove("__init__.py")
1979                # Add all *.py files and package subdirectories
1980                for filename in dirlist:
1981                    path = os.path.join(pathname, filename)
1982                    root, ext = os.path.splitext(filename)
1983                    if os.path.isdir(path):
1984                        if os.path.isfile(os.path.join(path, "__init__.py")):
1985                            # This is a package directory, add it
1986                            self.writepy(path, basename,
1987                                         filterfunc=filterfunc)  # Recursive call
1988                    elif ext == ".py":
1989                        if filterfunc and not filterfunc(path):
1990                            if self.debug:
1991                                print('file %r skipped by filterfunc' % path)
1992                            continue
1993                        fname, arcname = self._get_codename(path[0:-3],
1994                                                            basename)
1995                        if self.debug:
1996                            print("Adding", arcname)
1997                        self.write(fname, arcname)
1998            else:
1999                # This is NOT a package directory, add its files at top level
2000                if self.debug:
2001                    print("Adding files from directory", pathname)
2002                for filename in sorted(os.listdir(pathname)):
2003                    path = os.path.join(pathname, filename)
2004                    root, ext = os.path.splitext(filename)
2005                    if ext == ".py":
2006                        if filterfunc and not filterfunc(path):
2007                            if self.debug:
2008                                print('file %r skipped by filterfunc' % path)
2009                            continue
2010                        fname, arcname = self._get_codename(path[0:-3],
2011                                                            basename)
2012                        if self.debug:
2013                            print("Adding", arcname)
2014                        self.write(fname, arcname)
2015        else:
2016            if pathname[-3:] != ".py":
2017                raise RuntimeError(
2018                    'Files added with writepy() must end with ".py"')
2019            fname, arcname = self._get_codename(pathname[0:-3], basename)
2020            if self.debug:
2021                print("Adding file", arcname)
2022            self.write(fname, arcname)
2023
2024    def _get_codename(self, pathname, basename):
2025        """Return (filename, archivename) for the path.
2026
2027        Given a module name path, return the correct file path and
2028        archive name, compiling if necessary.  For example, given
2029        /python/lib/string, return (/python/lib/string.pyc, string).
2030        """
2031        def _compile(file, optimize=-1):
2032            import py_compile
2033            if self.debug:
2034                print("Compiling", file)
2035            try:
2036                py_compile.compile(file, doraise=True, optimize=optimize)
2037            except py_compile.PyCompileError as err:
2038                print(err.msg)
2039                return False
2040            return True
2041
2042        file_py  = pathname + ".py"
2043        file_pyc = pathname + ".pyc"
2044        pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2045        pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2046        pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2047        if self._optimize == -1:
2048            # legacy mode: use whatever file is present
2049            if (os.path.isfile(file_pyc) and
2050                  os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2051                # Use .pyc file.
2052                arcname = fname = file_pyc
2053            elif (os.path.isfile(pycache_opt0) and
2054                  os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2055                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2056                # file name in the archive.
2057                fname = pycache_opt0
2058                arcname = file_pyc
2059            elif (os.path.isfile(pycache_opt1) and
2060                  os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2061                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2062                # file name in the archive.
2063                fname = pycache_opt1
2064                arcname = file_pyc
2065            elif (os.path.isfile(pycache_opt2) and
2066                  os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2067                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2068                # file name in the archive.
2069                fname = pycache_opt2
2070                arcname = file_pyc
2071            else:
2072                # Compile py into PEP 3147 pyc file.
2073                if _compile(file_py):
2074                    if sys.flags.optimize == 0:
2075                        fname = pycache_opt0
2076                    elif sys.flags.optimize == 1:
2077                        fname = pycache_opt1
2078                    else:
2079                        fname = pycache_opt2
2080                    arcname = file_pyc
2081                else:
2082                    fname = arcname = file_py
2083        else:
2084            # new mode: use given optimization level
2085            if self._optimize == 0:
2086                fname = pycache_opt0
2087                arcname = file_pyc
2088            else:
2089                arcname = file_pyc
2090                if self._optimize == 1:
2091                    fname = pycache_opt1
2092                elif self._optimize == 2:
2093                    fname = pycache_opt2
2094                else:
2095                    msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2096                    raise ValueError(msg)
2097            if not (os.path.isfile(fname) and
2098                    os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2099                if not _compile(file_py, optimize=self._optimize):
2100                    fname = arcname = file_py
2101        archivename = os.path.split(arcname)[1]
2102        if basename:
2103            archivename = "%s/%s" % (basename, archivename)
2104        return (fname, archivename)
2105
2106
2107def _parents(path):
2108    """
2109    Given a path with elements separated by
2110    posixpath.sep, generate all parents of that path.
2111
2112    >>> list(_parents('b/d'))
2113    ['b']
2114    >>> list(_parents('/b/d/'))
2115    ['/b']
2116    >>> list(_parents('b/d/f/'))
2117    ['b/d', 'b']
2118    >>> list(_parents('b'))
2119    []
2120    >>> list(_parents(''))
2121    []
2122    """
2123    return itertools.islice(_ancestry(path), 1, None)
2124
2125
2126def _ancestry(path):
2127    """
2128    Given a path with elements separated by
2129    posixpath.sep, generate all elements of that path
2130
2131    >>> list(_ancestry('b/d'))
2132    ['b/d', 'b']
2133    >>> list(_ancestry('/b/d/'))
2134    ['/b/d', '/b']
2135    >>> list(_ancestry('b/d/f/'))
2136    ['b/d/f', 'b/d', 'b']
2137    >>> list(_ancestry('b'))
2138    ['b']
2139    >>> list(_ancestry(''))
2140    []
2141    """
2142    path = path.rstrip(posixpath.sep)
2143    while path and path != posixpath.sep:
2144        yield path
2145        path, tail = posixpath.split(path)
2146
2147
2148_dedupe = dict.fromkeys
2149"""Deduplicate an iterable in original order"""
2150
2151
2152def _difference(minuend, subtrahend):
2153    """
2154    Return items in minuend not in subtrahend, retaining order
2155    with O(1) lookup.
2156    """
2157    return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2158
2159
2160class CompleteDirs(ZipFile):
2161    """
2162    A ZipFile subclass that ensures that implied directories
2163    are always included in the namelist.
2164    """
2165
2166    @staticmethod
2167    def _implied_dirs(names):
2168        parents = itertools.chain.from_iterable(map(_parents, names))
2169        as_dirs = (p + posixpath.sep for p in parents)
2170        return _dedupe(_difference(as_dirs, names))
2171
2172    def namelist(self):
2173        names = super(CompleteDirs, self).namelist()
2174        return names + list(self._implied_dirs(names))
2175
2176    def _name_set(self):
2177        return set(self.namelist())
2178
2179    def resolve_dir(self, name):
2180        """
2181        If the name represents a directory, return that name
2182        as a directory (with the trailing slash).
2183        """
2184        names = self._name_set()
2185        dirname = name + '/'
2186        dir_match = name not in names and dirname in names
2187        return dirname if dir_match else name
2188
2189    @classmethod
2190    def make(cls, source):
2191        """
2192        Given a source (filename or zipfile), return an
2193        appropriate CompleteDirs subclass.
2194        """
2195        if isinstance(source, CompleteDirs):
2196            return source
2197
2198        if not isinstance(source, ZipFile):
2199            return cls(source)
2200
2201        # Only allow for FastLookup when supplied zipfile is read-only
2202        if 'r' not in source.mode:
2203            cls = CompleteDirs
2204
2205        source.__class__ = cls
2206        return source
2207
2208
2209class FastLookup(CompleteDirs):
2210    """
2211    ZipFile subclass to ensure implicit
2212    dirs exist and are resolved rapidly.
2213    """
2214
2215    def namelist(self):
2216        with contextlib.suppress(AttributeError):
2217            return self.__names
2218        self.__names = super(FastLookup, self).namelist()
2219        return self.__names
2220
2221    def _name_set(self):
2222        with contextlib.suppress(AttributeError):
2223            return self.__lookup
2224        self.__lookup = super(FastLookup, self)._name_set()
2225        return self.__lookup
2226
2227
2228class Path:
2229    """
2230    A pathlib-compatible interface for zip files.
2231
2232    Consider a zip file with this structure::
2233
2234        .
2235        ├── a.txt
2236        └── b
2237            ├── c.txt
2238            └── d
2239                └── e.txt
2240
2241    >>> data = io.BytesIO()
2242    >>> zf = ZipFile(data, 'w')
2243    >>> zf.writestr('a.txt', 'content of a')
2244    >>> zf.writestr('b/c.txt', 'content of c')
2245    >>> zf.writestr('b/d/e.txt', 'content of e')
2246    >>> zf.filename = 'mem/abcde.zip'
2247
2248    Path accepts the zipfile object itself or a filename
2249
2250    >>> root = Path(zf)
2251
2252    From there, several path operations are available.
2253
2254    Directory iteration (including the zip file itself):
2255
2256    >>> a, b = root.iterdir()
2257    >>> a
2258    Path('mem/abcde.zip', 'a.txt')
2259    >>> b
2260    Path('mem/abcde.zip', 'b/')
2261
2262    name property:
2263
2264    >>> b.name
2265    'b'
2266
2267    join with divide operator:
2268
2269    >>> c = b / 'c.txt'
2270    >>> c
2271    Path('mem/abcde.zip', 'b/c.txt')
2272    >>> c.name
2273    'c.txt'
2274
2275    Read text:
2276
2277    >>> c.read_text()
2278    'content of c'
2279
2280    existence:
2281
2282    >>> c.exists()
2283    True
2284    >>> (b / 'missing.txt').exists()
2285    False
2286
2287    Coercion to string:
2288
2289    >>> import os
2290    >>> str(c).replace(os.sep, posixpath.sep)
2291    'mem/abcde.zip/b/c.txt'
2292
2293    At the root, ``name``, ``filename``, and ``parent``
2294    resolve to the zipfile. Note these attributes are not
2295    valid and will raise a ``ValueError`` if the zipfile
2296    has no filename.
2297
2298    >>> root.name
2299    'abcde.zip'
2300    >>> str(root.filename).replace(os.sep, posixpath.sep)
2301    'mem/abcde.zip'
2302    >>> str(root.parent)
2303    'mem'
2304    """
2305
2306    __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2307
2308    def __init__(self, root, at=""):
2309        """
2310        Construct a Path from a ZipFile or filename.
2311
2312        Note: When the source is an existing ZipFile object,
2313        its type (__class__) will be mutated to a
2314        specialized type. If the caller wishes to retain the
2315        original type, the caller should either create a
2316        separate ZipFile object or pass a filename.
2317        """
2318        self.root = FastLookup.make(root)
2319        self.at = at
2320
2321    def open(self, mode='r', *args, pwd=None, **kwargs):
2322        """
2323        Open this entry as text or binary following the semantics
2324        of ``pathlib.Path.open()`` by passing arguments through
2325        to io.TextIOWrapper().
2326        """
2327        if self.is_dir():
2328            raise IsADirectoryError(self)
2329        zip_mode = mode[0]
2330        if not self.exists() and zip_mode == 'r':
2331            raise FileNotFoundError(self)
2332        stream = self.root.open(self.at, zip_mode, pwd=pwd)
2333        if 'b' in mode:
2334            if args or kwargs:
2335                raise ValueError("encoding args invalid for binary operation")
2336            return stream
2337        else:
2338            kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
2339        return io.TextIOWrapper(stream, *args, **kwargs)
2340
2341    @property
2342    def name(self):
2343        return pathlib.Path(self.at).name or self.filename.name
2344
2345    @property
2346    def filename(self):
2347        return pathlib.Path(self.root.filename).joinpath(self.at)
2348
2349    def read_text(self, *args, **kwargs):
2350        kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
2351        with self.open('r', *args, **kwargs) as strm:
2352            return strm.read()
2353
2354    def read_bytes(self):
2355        with self.open('rb') as strm:
2356            return strm.read()
2357
2358    def _is_child(self, path):
2359        return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2360
2361    def _next(self, at):
2362        return self.__class__(self.root, at)
2363
2364    def is_dir(self):
2365        return not self.at or self.at.endswith("/")
2366
2367    def is_file(self):
2368        return self.exists() and not self.is_dir()
2369
2370    def exists(self):
2371        return self.at in self.root._name_set()
2372
2373    def iterdir(self):
2374        if not self.is_dir():
2375            raise ValueError("Can't listdir a file")
2376        subs = map(self._next, self.root.namelist())
2377        return filter(self._is_child, subs)
2378
2379    def __str__(self):
2380        return posixpath.join(self.root.filename, self.at)
2381
2382    def __repr__(self):
2383        return self.__repr.format(self=self)
2384
2385    def joinpath(self, *other):
2386        next = posixpath.join(self.at, *other)
2387        return self._next(self.root.resolve_dir(next))
2388
2389    __truediv__ = joinpath
2390
2391    @property
2392    def parent(self):
2393        if not self.at:
2394            return self.filename.parent
2395        parent_at = posixpath.dirname(self.at.rstrip('/'))
2396        if parent_at:
2397            parent_at += '/'
2398        return self._next(parent_at)
2399
2400
2401def main(args=None):
2402    import argparse
2403
2404    description = 'A simple command-line interface for zipfile module.'
2405    parser = argparse.ArgumentParser(description=description)
2406    group = parser.add_mutually_exclusive_group(required=True)
2407    group.add_argument('-l', '--list', metavar='<zipfile>',
2408                       help='Show listing of a zipfile')
2409    group.add_argument('-e', '--extract', nargs=2,
2410                       metavar=('<zipfile>', '<output_dir>'),
2411                       help='Extract zipfile into target dir')
2412    group.add_argument('-c', '--create', nargs='+',
2413                       metavar=('<name>', '<file>'),
2414                       help='Create zipfile from sources')
2415    group.add_argument('-t', '--test', metavar='<zipfile>',
2416                       help='Test if a zipfile is valid')
2417    args = parser.parse_args(args)
2418
2419    if args.test is not None:
2420        src = args.test
2421        with ZipFile(src, 'r') as zf:
2422            badfile = zf.testzip()
2423        if badfile:
2424            print("The following enclosed file is corrupted: {!r}".format(badfile))
2425        print("Done testing")
2426
2427    elif args.list is not None:
2428        src = args.list
2429        with ZipFile(src, 'r') as zf:
2430            zf.printdir()
2431
2432    elif args.extract is not None:
2433        src, curdir = args.extract
2434        with ZipFile(src, 'r') as zf:
2435            zf.extractall(curdir)
2436
2437    elif args.create is not None:
2438        zip_name = args.create.pop(0)
2439        files = args.create
2440
2441        def addToZip(zf, path, zippath):
2442            if os.path.isfile(path):
2443                zf.write(path, zippath, ZIP_DEFLATED)
2444            elif os.path.isdir(path):
2445                if zippath:
2446                    zf.write(path, zippath)
2447                for nm in sorted(os.listdir(path)):
2448                    addToZip(zf,
2449                             os.path.join(path, nm), os.path.join(zippath, nm))
2450            # else: ignore
2451
2452        with ZipFile(zip_name, 'w') as zf:
2453            for path in files:
2454                zippath = os.path.basename(path)
2455                if not zippath:
2456                    zippath = os.path.basename(os.path.dirname(path))
2457                if zippath in ('', os.curdir, os.pardir):
2458                    zippath = ''
2459                addToZip(zf, path, zippath)
2460
2461
2462if __name__ == "__main__":
2463    main()
2464