• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2Read and write ZIP files.
3
4XXX references to utf-8 need further investigation.
5"""
6import binascii
7import importlib.util
8import io
9import itertools
10import os
11import posixpath
12import shutil
13import stat
14import struct
15import sys
16import threading
17import time
18import contextlib
19import pathlib
20
21try:
22    import zlib # We may need its compression method
23    crc32 = zlib.crc32
24except ImportError:
25    zlib = None
26    crc32 = binascii.crc32
27
28try:
29    import bz2 # We may need its compression method
30except ImportError:
31    bz2 = None
32
33try:
34    import lzma # We may need its compression method
35except ImportError:
36    lzma = None
37
38__all__ = ["BadZipFile", "BadZipfile", "error",
39           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
40           "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
41           "Path"]
42
43class BadZipFile(Exception):
44    pass
45
46
47class LargeZipFile(Exception):
48    """
49    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50    and those extensions are disabled.
51    """
52
53error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
54
55
56ZIP64_LIMIT = (1 << 31) - 1
57ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
58ZIP_MAX_COMMENT = (1 << 16) - 1
59
60# constants for Zip file compression methods
61ZIP_STORED = 0
62ZIP_DEFLATED = 8
63ZIP_BZIP2 = 12
64ZIP_LZMA = 14
65# Other ZIP compression methods not supported
66
67DEFAULT_VERSION = 20
68ZIP64_VERSION = 45
69BZIP2_VERSION = 46
70LZMA_VERSION = 63
71# we recognize (but not necessarily support) all features up to that version
72MAX_EXTRACT_VERSION = 63
73
74# Below are some formats and associated data for reading/writing headers using
75# the struct module.  The names and structures of headers/records are those used
76# in the PKWARE description of the ZIP file format:
77#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78# (URL valid as of January 2008)
79
80# The "end of central directory" structure, magic number, size, and indices
81# (section V.I in the format document)
82structEndArchive = b"<4s4H2LH"
83stringEndArchive = b"PK\005\006"
84sizeEndCentDir = struct.calcsize(structEndArchive)
85
86_ECD_SIGNATURE = 0
87_ECD_DISK_NUMBER = 1
88_ECD_DISK_START = 2
89_ECD_ENTRIES_THIS_DISK = 3
90_ECD_ENTRIES_TOTAL = 4
91_ECD_SIZE = 5
92_ECD_OFFSET = 6
93_ECD_COMMENT_SIZE = 7
94# These last two indices are not part of the structure as defined in the
95# spec, but they are used internally by this module as a convenience
96_ECD_COMMENT = 8
97_ECD_LOCATION = 9
98
99# The "central directory" structure, magic number, size, and indices
100# of entries in the structure (section V.F in the format document)
101structCentralDir = "<4s4B4HL2L5H2L"
102stringCentralDir = b"PK\001\002"
103sizeCentralDir = struct.calcsize(structCentralDir)
104
105# indexes of entries in the central directory structure
106_CD_SIGNATURE = 0
107_CD_CREATE_VERSION = 1
108_CD_CREATE_SYSTEM = 2
109_CD_EXTRACT_VERSION = 3
110_CD_EXTRACT_SYSTEM = 4
111_CD_FLAG_BITS = 5
112_CD_COMPRESS_TYPE = 6
113_CD_TIME = 7
114_CD_DATE = 8
115_CD_CRC = 9
116_CD_COMPRESSED_SIZE = 10
117_CD_UNCOMPRESSED_SIZE = 11
118_CD_FILENAME_LENGTH = 12
119_CD_EXTRA_FIELD_LENGTH = 13
120_CD_COMMENT_LENGTH = 14
121_CD_DISK_NUMBER_START = 15
122_CD_INTERNAL_FILE_ATTRIBUTES = 16
123_CD_EXTERNAL_FILE_ATTRIBUTES = 17
124_CD_LOCAL_HEADER_OFFSET = 18
125
126# General purpose bit flags
127# Zip Appnote: 4.4.4 general purpose bit flag: (2 bytes)
128_MASK_ENCRYPTED = 1 << 0
129# Bits 1 and 2 have different meanings depending on the compression used.
130_MASK_COMPRESS_OPTION_1 = 1 << 1
131# _MASK_COMPRESS_OPTION_2 = 1 << 2
132# _MASK_USE_DATA_DESCRIPTOR: If set, crc-32, compressed size and uncompressed
133# size are zero in the local header and the real values are written in the data
134# descriptor immediately following the compressed data.
135_MASK_USE_DATA_DESCRIPTOR = 1 << 3
136# Bit 4: Reserved for use with compression method 8, for enhanced deflating.
137# _MASK_RESERVED_BIT_4 = 1 << 4
138_MASK_COMPRESSED_PATCH = 1 << 5
139_MASK_STRONG_ENCRYPTION = 1 << 6
140# _MASK_UNUSED_BIT_7 = 1 << 7
141# _MASK_UNUSED_BIT_8 = 1 << 8
142# _MASK_UNUSED_BIT_9 = 1 << 9
143# _MASK_UNUSED_BIT_10 = 1 << 10
144_MASK_UTF_FILENAME = 1 << 11
145# Bit 12: Reserved by PKWARE for enhanced compression.
146# _MASK_RESERVED_BIT_12 = 1 << 12
147# _MASK_ENCRYPTED_CENTRAL_DIR = 1 << 13
148# Bit 14, 15: Reserved by PKWARE
149# _MASK_RESERVED_BIT_14 = 1 << 14
150# _MASK_RESERVED_BIT_15 = 1 << 15
151
152# The "local file header" structure, magic number, size, and indices
153# (section V.A in the format document)
154structFileHeader = "<4s2B4HL2L2H"
155stringFileHeader = b"PK\003\004"
156sizeFileHeader = struct.calcsize(structFileHeader)
157
158_FH_SIGNATURE = 0
159_FH_EXTRACT_VERSION = 1
160_FH_EXTRACT_SYSTEM = 2
161_FH_GENERAL_PURPOSE_FLAG_BITS = 3
162_FH_COMPRESSION_METHOD = 4
163_FH_LAST_MOD_TIME = 5
164_FH_LAST_MOD_DATE = 6
165_FH_CRC = 7
166_FH_COMPRESSED_SIZE = 8
167_FH_UNCOMPRESSED_SIZE = 9
168_FH_FILENAME_LENGTH = 10
169_FH_EXTRA_FIELD_LENGTH = 11
170
171# The "Zip64 end of central directory locator" structure, magic number, and size
172structEndArchive64Locator = "<4sLQL"
173stringEndArchive64Locator = b"PK\x06\x07"
174sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
175
176# The "Zip64 end of central directory" record, magic number, size, and indices
177# (section V.G in the format document)
178structEndArchive64 = "<4sQ2H2L4Q"
179stringEndArchive64 = b"PK\x06\x06"
180sizeEndCentDir64 = struct.calcsize(structEndArchive64)
181
182_CD64_SIGNATURE = 0
183_CD64_DIRECTORY_RECSIZE = 1
184_CD64_CREATE_VERSION = 2
185_CD64_EXTRACT_VERSION = 3
186_CD64_DISK_NUMBER = 4
187_CD64_DISK_NUMBER_START = 5
188_CD64_NUMBER_ENTRIES_THIS_DISK = 6
189_CD64_NUMBER_ENTRIES_TOTAL = 7
190_CD64_DIRECTORY_SIZE = 8
191_CD64_OFFSET_START_CENTDIR = 9
192
193_DD_SIGNATURE = 0x08074b50
194
195_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
196
197def _strip_extra(extra, xids):
198    # Remove Extra Fields with specified IDs.
199    unpack = _EXTRA_FIELD_STRUCT.unpack
200    modified = False
201    buffer = []
202    start = i = 0
203    while i + 4 <= len(extra):
204        xid, xlen = unpack(extra[i : i + 4])
205        j = i + 4 + xlen
206        if xid in xids:
207            if i != start:
208                buffer.append(extra[start : i])
209            start = j
210            modified = True
211        i = j
212    if not modified:
213        return extra
214    if start != len(extra):
215        buffer.append(extra[start:])
216    return b''.join(buffer)
217
218def _check_zipfile(fp):
219    try:
220        if _EndRecData(fp):
221            return True         # file has correct magic number
222    except OSError:
223        pass
224    return False
225
226def is_zipfile(filename):
227    """Quickly see if a file is a ZIP file by checking the magic number.
228
229    The filename argument may be a file or file-like object too.
230    """
231    result = False
232    try:
233        if hasattr(filename, "read"):
234            result = _check_zipfile(fp=filename)
235        else:
236            with open(filename, "rb") as fp:
237                result = _check_zipfile(fp)
238    except OSError:
239        pass
240    return result
241
242def _EndRecData64(fpin, offset, endrec):
243    """
244    Read the ZIP64 end-of-archive records and use that to update endrec
245    """
246    try:
247        fpin.seek(offset - sizeEndCentDir64Locator, 2)
248    except OSError:
249        # If the seek fails, the file is not large enough to contain a ZIP64
250        # end-of-archive record, so just return the end record we were given.
251        return endrec
252
253    data = fpin.read(sizeEndCentDir64Locator)
254    if len(data) != sizeEndCentDir64Locator:
255        return endrec
256    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
257    if sig != stringEndArchive64Locator:
258        return endrec
259
260    if diskno != 0 or disks > 1:
261        raise BadZipFile("zipfiles that span multiple disks are not supported")
262
263    # Assume no 'zip64 extensible data'
264    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
265    data = fpin.read(sizeEndCentDir64)
266    if len(data) != sizeEndCentDir64:
267        return endrec
268    sig, sz, create_version, read_version, disk_num, disk_dir, \
269        dircount, dircount2, dirsize, diroffset = \
270        struct.unpack(structEndArchive64, data)
271    if sig != stringEndArchive64:
272        return endrec
273
274    # Update the original endrec using data from the ZIP64 record
275    endrec[_ECD_SIGNATURE] = sig
276    endrec[_ECD_DISK_NUMBER] = disk_num
277    endrec[_ECD_DISK_START] = disk_dir
278    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
279    endrec[_ECD_ENTRIES_TOTAL] = dircount2
280    endrec[_ECD_SIZE] = dirsize
281    endrec[_ECD_OFFSET] = diroffset
282    return endrec
283
284
285def _EndRecData(fpin):
286    """Return data from the "End of Central Directory" record, or None.
287
288    The data is a list of the nine items in the ZIP "End of central dir"
289    record followed by a tenth item, the file seek offset of this record."""
290
291    # Determine file size
292    fpin.seek(0, 2)
293    filesize = fpin.tell()
294
295    # Check to see if this is ZIP file with no archive comment (the
296    # "end of central directory" structure should be the last item in the
297    # file if this is the case).
298    try:
299        fpin.seek(-sizeEndCentDir, 2)
300    except OSError:
301        return None
302    data = fpin.read()
303    if (len(data) == sizeEndCentDir and
304        data[0:4] == stringEndArchive and
305        data[-2:] == b"\000\000"):
306        # the signature is correct and there's no comment, unpack structure
307        endrec = struct.unpack(structEndArchive, data)
308        endrec=list(endrec)
309
310        # Append a blank comment and record start offset
311        endrec.append(b"")
312        endrec.append(filesize - sizeEndCentDir)
313
314        # Try to read the "Zip64 end of central directory" structure
315        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
316
317    # Either this is not a ZIP file, or it is a ZIP file with an archive
318    # comment.  Search the end of the file for the "end of central directory"
319    # record signature. The comment is the last item in the ZIP file and may be
320    # up to 64K long.  It is assumed that the "end of central directory" magic
321    # number does not appear in the comment.
322    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
323    fpin.seek(maxCommentStart, 0)
324    data = fpin.read()
325    start = data.rfind(stringEndArchive)
326    if start >= 0:
327        # found the magic number; attempt to unpack and interpret
328        recData = data[start:start+sizeEndCentDir]
329        if len(recData) != sizeEndCentDir:
330            # Zip file is corrupted.
331            return None
332        endrec = list(struct.unpack(structEndArchive, recData))
333        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
334        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
335        endrec.append(comment)
336        endrec.append(maxCommentStart + start)
337
338        # Try to read the "Zip64 end of central directory" structure
339        return _EndRecData64(fpin, maxCommentStart + start - filesize,
340                             endrec)
341
342    # Unable to find a valid end of central directory structure
343    return None
344
345
346class ZipInfo (object):
347    """Class with attributes describing each file in the ZIP archive."""
348
349    __slots__ = (
350        'orig_filename',
351        'filename',
352        'date_time',
353        'compress_type',
354        '_compresslevel',
355        'comment',
356        'extra',
357        'create_system',
358        'create_version',
359        'extract_version',
360        'reserved',
361        'flag_bits',
362        'volume',
363        'internal_attr',
364        'external_attr',
365        'header_offset',
366        'CRC',
367        'compress_size',
368        'file_size',
369        '_raw_time',
370        '_end_offset',
371    )
372
373    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
374        self.orig_filename = filename   # Original file name in archive
375
376        # Terminate the file name at the first null byte.  Null bytes in file
377        # names are used as tricks by viruses in archives.
378        null_byte = filename.find(chr(0))
379        if null_byte >= 0:
380            filename = filename[0:null_byte]
381        # This is used to ensure paths in generated ZIP files always use
382        # forward slashes as the directory separator, as required by the
383        # ZIP format specification.
384        if os.sep != "/" and os.sep in filename:
385            filename = filename.replace(os.sep, "/")
386
387        self.filename = filename        # Normalized file name
388        self.date_time = date_time      # year, month, day, hour, min, sec
389
390        if date_time[0] < 1980:
391            raise ValueError('ZIP does not support timestamps before 1980')
392
393        # Standard values:
394        self.compress_type = ZIP_STORED # Type of compression for the file
395        self._compresslevel = None      # Level for the compressor
396        self.comment = b""              # Comment for each file
397        self.extra = b""                # ZIP extra data
398        if sys.platform == 'win32':
399            self.create_system = 0          # System which created ZIP archive
400        else:
401            # Assume everything else is unix-y
402            self.create_system = 3          # System which created ZIP archive
403        self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
404        self.extract_version = DEFAULT_VERSION # Version needed to extract archive
405        self.reserved = 0               # Must be zero
406        self.flag_bits = 0              # ZIP flag bits
407        self.volume = 0                 # Volume number of file header
408        self.internal_attr = 0          # Internal attributes
409        self.external_attr = 0          # External file attributes
410        self.compress_size = 0          # Size of the compressed file
411        self.file_size = 0              # Size of the uncompressed file
412        self._end_offset = None         # Start of the next local header or central directory
413        # Other attributes are set by class ZipFile:
414        # header_offset         Byte offset to the file header
415        # CRC                   CRC-32 of the uncompressed file
416
417    def __repr__(self):
418        result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
419        if self.compress_type != ZIP_STORED:
420            result.append(' compress_type=%s' %
421                          compressor_names.get(self.compress_type,
422                                               self.compress_type))
423        hi = self.external_attr >> 16
424        lo = self.external_attr & 0xFFFF
425        if hi:
426            result.append(' filemode=%r' % stat.filemode(hi))
427        if lo:
428            result.append(' external_attr=%#x' % lo)
429        isdir = self.is_dir()
430        if not isdir or self.file_size:
431            result.append(' file_size=%r' % self.file_size)
432        if ((not isdir or self.compress_size) and
433            (self.compress_type != ZIP_STORED or
434             self.file_size != self.compress_size)):
435            result.append(' compress_size=%r' % self.compress_size)
436        result.append('>')
437        return ''.join(result)
438
439    def FileHeader(self, zip64=None):
440        """Return the per-file header as a bytes object.
441
442        When the optional zip64 arg is None rather than a bool, we will
443        decide based upon the file_size and compress_size, if known,
444        False otherwise.
445        """
446        dt = self.date_time
447        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
448        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
449        if self.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
450            # Set these to zero because we write them after the file data
451            CRC = compress_size = file_size = 0
452        else:
453            CRC = self.CRC
454            compress_size = self.compress_size
455            file_size = self.file_size
456
457        extra = self.extra
458
459        min_version = 0
460        if zip64 is None:
461            # We always explicitly pass zip64 within this module.... This
462            # remains for anyone using ZipInfo.FileHeader as a public API.
463            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
464        if zip64:
465            fmt = '<HHQQ'
466            extra = extra + struct.pack(fmt,
467                                        1, struct.calcsize(fmt)-4, file_size, compress_size)
468            file_size = 0xffffffff
469            compress_size = 0xffffffff
470            min_version = ZIP64_VERSION
471
472        if self.compress_type == ZIP_BZIP2:
473            min_version = max(BZIP2_VERSION, min_version)
474        elif self.compress_type == ZIP_LZMA:
475            min_version = max(LZMA_VERSION, min_version)
476
477        self.extract_version = max(min_version, self.extract_version)
478        self.create_version = max(min_version, self.create_version)
479        filename, flag_bits = self._encodeFilenameFlags()
480        header = struct.pack(structFileHeader, stringFileHeader,
481                             self.extract_version, self.reserved, flag_bits,
482                             self.compress_type, dostime, dosdate, CRC,
483                             compress_size, file_size,
484                             len(filename), len(extra))
485        return header + filename + extra
486
487    def _encodeFilenameFlags(self):
488        try:
489            return self.filename.encode('ascii'), self.flag_bits
490        except UnicodeEncodeError:
491            return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME
492
493    def _decodeExtra(self):
494        # Try to decode the extra field.
495        extra = self.extra
496        unpack = struct.unpack
497        while len(extra) >= 4:
498            tp, ln = unpack('<HH', extra[:4])
499            if ln+4 > len(extra):
500                raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
501            if tp == 0x0001:
502                data = extra[4:ln+4]
503                # ZIP64 extension (large files and/or large archives)
504                try:
505                    if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
506                        field = "File size"
507                        self.file_size, = unpack('<Q', data[:8])
508                        data = data[8:]
509                    if self.compress_size == 0xFFFF_FFFF:
510                        field = "Compress size"
511                        self.compress_size, = unpack('<Q', data[:8])
512                        data = data[8:]
513                    if self.header_offset == 0xFFFF_FFFF:
514                        field = "Header offset"
515                        self.header_offset, = unpack('<Q', data[:8])
516                except struct.error:
517                    raise BadZipFile(f"Corrupt zip64 extra field. "
518                                     f"{field} not found.") from None
519
520            extra = extra[ln+4:]
521
522    @classmethod
523    def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
524        """Construct an appropriate ZipInfo for a file on the filesystem.
525
526        filename should be the path to a file or directory on the filesystem.
527
528        arcname is the name which it will have within the archive (by default,
529        this will be the same as filename, but without a drive letter and with
530        leading path separators removed).
531        """
532        if isinstance(filename, os.PathLike):
533            filename = os.fspath(filename)
534        st = os.stat(filename)
535        isdir = stat.S_ISDIR(st.st_mode)
536        mtime = time.localtime(st.st_mtime)
537        date_time = mtime[0:6]
538        if not strict_timestamps and date_time[0] < 1980:
539            date_time = (1980, 1, 1, 0, 0, 0)
540        elif not strict_timestamps and date_time[0] > 2107:
541            date_time = (2107, 12, 31, 23, 59, 59)
542        # Create ZipInfo instance to store file information
543        if arcname is None:
544            arcname = filename
545        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
546        while arcname[0] in (os.sep, os.altsep):
547            arcname = arcname[1:]
548        if isdir:
549            arcname += '/'
550        zinfo = cls(arcname, date_time)
551        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
552        if isdir:
553            zinfo.file_size = 0
554            zinfo.external_attr |= 0x10  # MS-DOS directory flag
555        else:
556            zinfo.file_size = st.st_size
557
558        return zinfo
559
560    def is_dir(self):
561        """Return True if this archive member is a directory."""
562        return self.filename[-1] == '/'
563
564
565# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
566# internal keys. We noticed that a direct implementation is faster than
567# relying on binascii.crc32().
568
569_crctable = None
570def _gen_crc(crc):
571    for j in range(8):
572        if crc & 1:
573            crc = (crc >> 1) ^ 0xEDB88320
574        else:
575            crc >>= 1
576    return crc
577
578# ZIP supports a password-based form of encryption. Even though known
579# plaintext attacks have been found against it, it is still useful
580# to be able to get data out of such a file.
581#
582# Usage:
583#     zd = _ZipDecrypter(mypwd)
584#     plain_bytes = zd(cypher_bytes)
585
586def _ZipDecrypter(pwd):
587    key0 = 305419896
588    key1 = 591751049
589    key2 = 878082192
590
591    global _crctable
592    if _crctable is None:
593        _crctable = list(map(_gen_crc, range(256)))
594    crctable = _crctable
595
596    def crc32(ch, crc):
597        """Compute the CRC32 primitive on one byte."""
598        return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
599
600    def update_keys(c):
601        nonlocal key0, key1, key2
602        key0 = crc32(c, key0)
603        key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
604        key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
605        key2 = crc32(key1 >> 24, key2)
606
607    for p in pwd:
608        update_keys(p)
609
610    def decrypter(data):
611        """Decrypt a bytes object."""
612        result = bytearray()
613        append = result.append
614        for c in data:
615            k = key2 | 2
616            c ^= ((k * (k^1)) >> 8) & 0xFF
617            update_keys(c)
618            append(c)
619        return bytes(result)
620
621    return decrypter
622
623
624class LZMACompressor:
625
626    def __init__(self):
627        self._comp = None
628
629    def _init(self):
630        props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
631        self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
632            lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
633        ])
634        return struct.pack('<BBH', 9, 4, len(props)) + props
635
636    def compress(self, data):
637        if self._comp is None:
638            return self._init() + self._comp.compress(data)
639        return self._comp.compress(data)
640
641    def flush(self):
642        if self._comp is None:
643            return self._init() + self._comp.flush()
644        return self._comp.flush()
645
646
647class LZMADecompressor:
648
649    def __init__(self):
650        self._decomp = None
651        self._unconsumed = b''
652        self.eof = False
653
654    def decompress(self, data):
655        if self._decomp is None:
656            self._unconsumed += data
657            if len(self._unconsumed) <= 4:
658                return b''
659            psize, = struct.unpack('<H', self._unconsumed[2:4])
660            if len(self._unconsumed) <= 4 + psize:
661                return b''
662
663            self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
664                lzma._decode_filter_properties(lzma.FILTER_LZMA1,
665                                               self._unconsumed[4:4 + psize])
666            ])
667            data = self._unconsumed[4 + psize:]
668            del self._unconsumed
669
670        result = self._decomp.decompress(data)
671        self.eof = self._decomp.eof
672        return result
673
674
675compressor_names = {
676    0: 'store',
677    1: 'shrink',
678    2: 'reduce',
679    3: 'reduce',
680    4: 'reduce',
681    5: 'reduce',
682    6: 'implode',
683    7: 'tokenize',
684    8: 'deflate',
685    9: 'deflate64',
686    10: 'implode',
687    12: 'bzip2',
688    14: 'lzma',
689    18: 'terse',
690    19: 'lz77',
691    97: 'wavpack',
692    98: 'ppmd',
693}
694
695def _check_compression(compression):
696    if compression == ZIP_STORED:
697        pass
698    elif compression == ZIP_DEFLATED:
699        if not zlib:
700            raise RuntimeError(
701                "Compression requires the (missing) zlib module")
702    elif compression == ZIP_BZIP2:
703        if not bz2:
704            raise RuntimeError(
705                "Compression requires the (missing) bz2 module")
706    elif compression == ZIP_LZMA:
707        if not lzma:
708            raise RuntimeError(
709                "Compression requires the (missing) lzma module")
710    else:
711        raise NotImplementedError("That compression method is not supported")
712
713
714def _get_compressor(compress_type, compresslevel=None):
715    if compress_type == ZIP_DEFLATED:
716        if compresslevel is not None:
717            return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
718        return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
719    elif compress_type == ZIP_BZIP2:
720        if compresslevel is not None:
721            return bz2.BZ2Compressor(compresslevel)
722        return bz2.BZ2Compressor()
723    # compresslevel is ignored for ZIP_LZMA
724    elif compress_type == ZIP_LZMA:
725        return LZMACompressor()
726    else:
727        return None
728
729
730def _get_decompressor(compress_type):
731    _check_compression(compress_type)
732    if compress_type == ZIP_STORED:
733        return None
734    elif compress_type == ZIP_DEFLATED:
735        return zlib.decompressobj(-15)
736    elif compress_type == ZIP_BZIP2:
737        return bz2.BZ2Decompressor()
738    elif compress_type == ZIP_LZMA:
739        return LZMADecompressor()
740    else:
741        descr = compressor_names.get(compress_type)
742        if descr:
743            raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
744        else:
745            raise NotImplementedError("compression type %d" % (compress_type,))
746
747
748class _SharedFile:
749    def __init__(self, file, pos, close, lock, writing):
750        self._file = file
751        self._pos = pos
752        self._close = close
753        self._lock = lock
754        self._writing = writing
755        self.seekable = file.seekable
756
757    def tell(self):
758        return self._pos
759
760    def seek(self, offset, whence=0):
761        with self._lock:
762            if self._writing():
763                raise ValueError("Can't reposition in the ZIP file while "
764                        "there is an open writing handle on it. "
765                        "Close the writing handle before trying to read.")
766            self._file.seek(offset, whence)
767            self._pos = self._file.tell()
768            return self._pos
769
770    def read(self, n=-1):
771        with self._lock:
772            if self._writing():
773                raise ValueError("Can't read from the ZIP file while there "
774                        "is an open writing handle on it. "
775                        "Close the writing handle before trying to read.")
776            self._file.seek(self._pos)
777            data = self._file.read(n)
778            self._pos = self._file.tell()
779            return data
780
781    def close(self):
782        if self._file is not None:
783            fileobj = self._file
784            self._file = None
785            self._close(fileobj)
786
787# Provide the tell method for unseekable stream
788class _Tellable:
789    def __init__(self, fp):
790        self.fp = fp
791        self.offset = 0
792
793    def write(self, data):
794        n = self.fp.write(data)
795        self.offset += n
796        return n
797
798    def tell(self):
799        return self.offset
800
801    def flush(self):
802        self.fp.flush()
803
804    def close(self):
805        self.fp.close()
806
807
808class ZipExtFile(io.BufferedIOBase):
809    """File-like object for reading an archive member.
810       Is returned by ZipFile.open().
811    """
812
813    # Max size supported by decompressor.
814    MAX_N = 1 << 31 - 1
815
816    # Read from compressed files in 4k blocks.
817    MIN_READ_SIZE = 4096
818
819    # Chunk size to read during seek
820    MAX_SEEK_READ = 1 << 24
821
822    def __init__(self, fileobj, mode, zipinfo, pwd=None,
823                 close_fileobj=False):
824        self._fileobj = fileobj
825        self._pwd = pwd
826        self._close_fileobj = close_fileobj
827
828        self._compress_type = zipinfo.compress_type
829        self._compress_left = zipinfo.compress_size
830        self._left = zipinfo.file_size
831
832        self._decompressor = _get_decompressor(self._compress_type)
833
834        self._eof = False
835        self._readbuffer = b''
836        self._offset = 0
837
838        self.newlines = None
839
840        self.mode = mode
841        self.name = zipinfo.filename
842
843        if hasattr(zipinfo, 'CRC'):
844            self._expected_crc = zipinfo.CRC
845            self._running_crc = crc32(b'')
846        else:
847            self._expected_crc = None
848
849        self._seekable = False
850        try:
851            if fileobj.seekable():
852                self._orig_compress_start = fileobj.tell()
853                self._orig_compress_size = zipinfo.compress_size
854                self._orig_file_size = zipinfo.file_size
855                self._orig_start_crc = self._running_crc
856                self._seekable = True
857        except AttributeError:
858            pass
859
860        self._decrypter = None
861        if pwd:
862            if zipinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
863                # compare against the file type from extended local headers
864                check_byte = (zipinfo._raw_time >> 8) & 0xff
865            else:
866                # compare against the CRC otherwise
867                check_byte = (zipinfo.CRC >> 24) & 0xff
868            h = self._init_decrypter()
869            if h != check_byte:
870                raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
871
872
873    def _init_decrypter(self):
874        self._decrypter = _ZipDecrypter(self._pwd)
875        # The first 12 bytes in the cypher stream is an encryption header
876        #  used to strengthen the algorithm. The first 11 bytes are
877        #  completely random, while the 12th contains the MSB of the CRC,
878        #  or the MSB of the file time depending on the header type
879        #  and is used to check the correctness of the password.
880        header = self._fileobj.read(12)
881        self._compress_left -= 12
882        return self._decrypter(header)[11]
883
884    def __repr__(self):
885        result = ['<%s.%s' % (self.__class__.__module__,
886                              self.__class__.__qualname__)]
887        if not self.closed:
888            result.append(' name=%r mode=%r' % (self.name, self.mode))
889            if self._compress_type != ZIP_STORED:
890                result.append(' compress_type=%s' %
891                              compressor_names.get(self._compress_type,
892                                                   self._compress_type))
893        else:
894            result.append(' [closed]')
895        result.append('>')
896        return ''.join(result)
897
898    def readline(self, limit=-1):
899        """Read and return a line from the stream.
900
901        If limit is specified, at most limit bytes will be read.
902        """
903
904        if limit < 0:
905            # Shortcut common case - newline found in buffer.
906            i = self._readbuffer.find(b'\n', self._offset) + 1
907            if i > 0:
908                line = self._readbuffer[self._offset: i]
909                self._offset = i
910                return line
911
912        return io.BufferedIOBase.readline(self, limit)
913
914    def peek(self, n=1):
915        """Returns buffered bytes without advancing the position."""
916        if n > len(self._readbuffer) - self._offset:
917            chunk = self.read(n)
918            if len(chunk) > self._offset:
919                self._readbuffer = chunk + self._readbuffer[self._offset:]
920                self._offset = 0
921            else:
922                self._offset -= len(chunk)
923
924        # Return up to 512 bytes to reduce allocation overhead for tight loops.
925        return self._readbuffer[self._offset: self._offset + 512]
926
927    def readable(self):
928        if self.closed:
929            raise ValueError("I/O operation on closed file.")
930        return True
931
932    def read(self, n=-1):
933        """Read and return up to n bytes.
934        If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
935        """
936        if self.closed:
937            raise ValueError("read from closed file.")
938        if n is None or n < 0:
939            buf = self._readbuffer[self._offset:]
940            self._readbuffer = b''
941            self._offset = 0
942            while not self._eof:
943                buf += self._read1(self.MAX_N)
944            return buf
945
946        end = n + self._offset
947        if end < len(self._readbuffer):
948            buf = self._readbuffer[self._offset:end]
949            self._offset = end
950            return buf
951
952        n = end - len(self._readbuffer)
953        buf = self._readbuffer[self._offset:]
954        self._readbuffer = b''
955        self._offset = 0
956        while n > 0 and not self._eof:
957            data = self._read1(n)
958            if n < len(data):
959                self._readbuffer = data
960                self._offset = n
961                buf += data[:n]
962                break
963            buf += data
964            n -= len(data)
965        return buf
966
967    def _update_crc(self, newdata):
968        # Update the CRC using the given data.
969        if self._expected_crc is None:
970            # No need to compute the CRC if we don't have a reference value
971            return
972        self._running_crc = crc32(newdata, self._running_crc)
973        # Check the CRC if we're at the end of the file
974        if self._eof and self._running_crc != self._expected_crc:
975            raise BadZipFile("Bad CRC-32 for file %r" % self.name)
976
977    def read1(self, n):
978        """Read up to n bytes with at most one read() system call."""
979
980        if n is None or n < 0:
981            buf = self._readbuffer[self._offset:]
982            self._readbuffer = b''
983            self._offset = 0
984            while not self._eof:
985                data = self._read1(self.MAX_N)
986                if data:
987                    buf += data
988                    break
989            return buf
990
991        end = n + self._offset
992        if end < len(self._readbuffer):
993            buf = self._readbuffer[self._offset:end]
994            self._offset = end
995            return buf
996
997        n = end - len(self._readbuffer)
998        buf = self._readbuffer[self._offset:]
999        self._readbuffer = b''
1000        self._offset = 0
1001        if n > 0:
1002            while not self._eof:
1003                data = self._read1(n)
1004                if n < len(data):
1005                    self._readbuffer = data
1006                    self._offset = n
1007                    buf += data[:n]
1008                    break
1009                if data:
1010                    buf += data
1011                    break
1012        return buf
1013
1014    def _read1(self, n):
1015        # Read up to n compressed bytes with at most one read() system call,
1016        # decrypt and decompress them.
1017        if self._eof or n <= 0:
1018            return b''
1019
1020        # Read from file.
1021        if self._compress_type == ZIP_DEFLATED:
1022            ## Handle unconsumed data.
1023            data = self._decompressor.unconsumed_tail
1024            if n > len(data):
1025                data += self._read2(n - len(data))
1026        else:
1027            data = self._read2(n)
1028
1029        if self._compress_type == ZIP_STORED:
1030            self._eof = self._compress_left <= 0
1031        elif self._compress_type == ZIP_DEFLATED:
1032            n = max(n, self.MIN_READ_SIZE)
1033            data = self._decompressor.decompress(data, n)
1034            self._eof = (self._decompressor.eof or
1035                         self._compress_left <= 0 and
1036                         not self._decompressor.unconsumed_tail)
1037            if self._eof:
1038                data += self._decompressor.flush()
1039        else:
1040            data = self._decompressor.decompress(data)
1041            self._eof = self._decompressor.eof or self._compress_left <= 0
1042
1043        data = data[:self._left]
1044        self._left -= len(data)
1045        if self._left <= 0:
1046            self._eof = True
1047        self._update_crc(data)
1048        return data
1049
1050    def _read2(self, n):
1051        if self._compress_left <= 0:
1052            return b''
1053
1054        n = max(n, self.MIN_READ_SIZE)
1055        n = min(n, self._compress_left)
1056
1057        data = self._fileobj.read(n)
1058        self._compress_left -= len(data)
1059        if not data:
1060            raise EOFError
1061
1062        if self._decrypter is not None:
1063            data = self._decrypter(data)
1064        return data
1065
1066    def close(self):
1067        try:
1068            if self._close_fileobj:
1069                self._fileobj.close()
1070        finally:
1071            super().close()
1072
1073    def seekable(self):
1074        if self.closed:
1075            raise ValueError("I/O operation on closed file.")
1076        return self._seekable
1077
1078    def seek(self, offset, whence=0):
1079        if self.closed:
1080            raise ValueError("seek on closed file.")
1081        if not self._seekable:
1082            raise io.UnsupportedOperation("underlying stream is not seekable")
1083        curr_pos = self.tell()
1084        if whence == 0: # Seek from start of file
1085            new_pos = offset
1086        elif whence == 1: # Seek from current position
1087            new_pos = curr_pos + offset
1088        elif whence == 2: # Seek from EOF
1089            new_pos = self._orig_file_size + offset
1090        else:
1091            raise ValueError("whence must be os.SEEK_SET (0), "
1092                             "os.SEEK_CUR (1), or os.SEEK_END (2)")
1093
1094        if new_pos > self._orig_file_size:
1095            new_pos = self._orig_file_size
1096
1097        if new_pos < 0:
1098            new_pos = 0
1099
1100        read_offset = new_pos - curr_pos
1101        buff_offset = read_offset + self._offset
1102
1103        if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1104            # Just move the _offset index if the new position is in the _readbuffer
1105            self._offset = buff_offset
1106            read_offset = 0
1107        elif read_offset < 0:
1108            # Position is before the current position. Reset the ZipExtFile
1109            self._fileobj.seek(self._orig_compress_start)
1110            self._running_crc = self._orig_start_crc
1111            self._compress_left = self._orig_compress_size
1112            self._left = self._orig_file_size
1113            self._readbuffer = b''
1114            self._offset = 0
1115            self._decompressor = _get_decompressor(self._compress_type)
1116            self._eof = False
1117            read_offset = new_pos
1118            if self._decrypter is not None:
1119                self._init_decrypter()
1120
1121        while read_offset > 0:
1122            read_len = min(self.MAX_SEEK_READ, read_offset)
1123            self.read(read_len)
1124            read_offset -= read_len
1125
1126        return self.tell()
1127
1128    def tell(self):
1129        if self.closed:
1130            raise ValueError("tell on closed file.")
1131        if not self._seekable:
1132            raise io.UnsupportedOperation("underlying stream is not seekable")
1133        filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1134        return filepos
1135
1136
1137class _ZipWriteFile(io.BufferedIOBase):
1138    def __init__(self, zf, zinfo, zip64):
1139        self._zinfo = zinfo
1140        self._zip64 = zip64
1141        self._zipfile = zf
1142        self._compressor = _get_compressor(zinfo.compress_type,
1143                                           zinfo._compresslevel)
1144        self._file_size = 0
1145        self._compress_size = 0
1146        self._crc = 0
1147
1148    @property
1149    def _fileobj(self):
1150        return self._zipfile.fp
1151
1152    def writable(self):
1153        return True
1154
1155    def write(self, data):
1156        if self.closed:
1157            raise ValueError('I/O operation on closed file.')
1158
1159        # Accept any data that supports the buffer protocol
1160        if isinstance(data, (bytes, bytearray)):
1161            nbytes = len(data)
1162        else:
1163            data = memoryview(data)
1164            nbytes = data.nbytes
1165        self._file_size += nbytes
1166
1167        self._crc = crc32(data, self._crc)
1168        if self._compressor:
1169            data = self._compressor.compress(data)
1170            self._compress_size += len(data)
1171        self._fileobj.write(data)
1172        return nbytes
1173
1174    def close(self):
1175        if self.closed:
1176            return
1177        try:
1178            super().close()
1179            # Flush any data from the compressor, and update header info
1180            if self._compressor:
1181                buf = self._compressor.flush()
1182                self._compress_size += len(buf)
1183                self._fileobj.write(buf)
1184                self._zinfo.compress_size = self._compress_size
1185            else:
1186                self._zinfo.compress_size = self._file_size
1187            self._zinfo.CRC = self._crc
1188            self._zinfo.file_size = self._file_size
1189
1190            if not self._zip64:
1191                if self._file_size > ZIP64_LIMIT:
1192                    raise RuntimeError("File size too large, try using force_zip64")
1193                if self._compress_size > ZIP64_LIMIT:
1194                    raise RuntimeError("Compressed size too large, try using force_zip64")
1195
1196            # Write updated header info
1197            if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
1198                # Write CRC and file sizes after the file data
1199                fmt = '<LLQQ' if self._zip64 else '<LLLL'
1200                self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1201                    self._zinfo.compress_size, self._zinfo.file_size))
1202                self._zipfile.start_dir = self._fileobj.tell()
1203            else:
1204                # Seek backwards and write file header (which will now include
1205                # correct CRC and file sizes)
1206
1207                # Preserve current position in file
1208                self._zipfile.start_dir = self._fileobj.tell()
1209                self._fileobj.seek(self._zinfo.header_offset)
1210                self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1211                self._fileobj.seek(self._zipfile.start_dir)
1212
1213            # Successfully written: Add file to our caches
1214            self._zipfile.filelist.append(self._zinfo)
1215            self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1216        finally:
1217            self._zipfile._writing = False
1218
1219
1220
1221class ZipFile:
1222    """ Class with methods to open, read, write, close, list zip files.
1223
1224    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1225                compresslevel=None)
1226
1227    file: Either the path to the file, or a file-like object.
1228          If it is a path, the file will be opened and closed by ZipFile.
1229    mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1230          or append 'a'.
1231    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1232                 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1233    allowZip64: if True ZipFile will create files with ZIP64 extensions when
1234                needed, otherwise it will raise an exception when this would
1235                be necessary.
1236    compresslevel: None (default for the given compression type) or an integer
1237                   specifying the level to pass to the compressor.
1238                   When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1239                   When using ZIP_DEFLATED integers 0 through 9 are accepted.
1240                   When using ZIP_BZIP2 integers 1 through 9 are accepted.
1241
1242    """
1243
1244    fp = None                   # Set here since __del__ checks it
1245    _windows_illegal_name_trans_table = None
1246
1247    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1248                 compresslevel=None, *, strict_timestamps=True, metadata_encoding=None):
1249        """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1250        or append 'a'."""
1251        if mode not in ('r', 'w', 'x', 'a'):
1252            raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1253
1254        _check_compression(compression)
1255
1256        self._allowZip64 = allowZip64
1257        self._didModify = False
1258        self.debug = 0  # Level of printing: 0 through 3
1259        self.NameToInfo = {}    # Find file info given name
1260        self.filelist = []      # List of ZipInfo instances for archive
1261        self.compression = compression  # Method of compression
1262        self.compresslevel = compresslevel
1263        self.mode = mode
1264        self.pwd = None
1265        self._comment = b''
1266        self._strict_timestamps = strict_timestamps
1267        self.metadata_encoding = metadata_encoding
1268
1269        # Check that we don't try to write with nonconforming codecs
1270        if self.metadata_encoding and mode != 'r':
1271            raise ValueError(
1272                "metadata_encoding is only supported for reading files")
1273
1274        # Check if we were passed a file-like object
1275        if isinstance(file, os.PathLike):
1276            file = os.fspath(file)
1277        if isinstance(file, str):
1278            # No, it's a filename
1279            self._filePassed = 0
1280            self.filename = file
1281            modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1282                        'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1283            filemode = modeDict[mode]
1284            while True:
1285                try:
1286                    self.fp = io.open(file, filemode)
1287                except OSError:
1288                    if filemode in modeDict:
1289                        filemode = modeDict[filemode]
1290                        continue
1291                    raise
1292                break
1293        else:
1294            self._filePassed = 1
1295            self.fp = file
1296            self.filename = getattr(file, 'name', None)
1297        self._fileRefCnt = 1
1298        self._lock = threading.RLock()
1299        self._seekable = True
1300        self._writing = False
1301
1302        try:
1303            if mode == 'r':
1304                self._RealGetContents()
1305            elif mode in ('w', 'x'):
1306                # set the modified flag so central directory gets written
1307                # even if no files are added to the archive
1308                self._didModify = True
1309                try:
1310                    self.start_dir = self.fp.tell()
1311                except (AttributeError, OSError):
1312                    self.fp = _Tellable(self.fp)
1313                    self.start_dir = 0
1314                    self._seekable = False
1315                else:
1316                    # Some file-like objects can provide tell() but not seek()
1317                    try:
1318                        self.fp.seek(self.start_dir)
1319                    except (AttributeError, OSError):
1320                        self._seekable = False
1321            elif mode == 'a':
1322                try:
1323                    # See if file is a zip file
1324                    self._RealGetContents()
1325                    # seek to start of directory and overwrite
1326                    self.fp.seek(self.start_dir)
1327                except BadZipFile:
1328                    # file is not a zip file, just append
1329                    self.fp.seek(0, 2)
1330
1331                    # set the modified flag so central directory gets written
1332                    # even if no files are added to the archive
1333                    self._didModify = True
1334                    self.start_dir = self.fp.tell()
1335            else:
1336                raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1337        except:
1338            fp = self.fp
1339            self.fp = None
1340            self._fpclose(fp)
1341            raise
1342
1343    def __enter__(self):
1344        return self
1345
1346    def __exit__(self, type, value, traceback):
1347        self.close()
1348
1349    def __repr__(self):
1350        result = ['<%s.%s' % (self.__class__.__module__,
1351                              self.__class__.__qualname__)]
1352        if self.fp is not None:
1353            if self._filePassed:
1354                result.append(' file=%r' % self.fp)
1355            elif self.filename is not None:
1356                result.append(' filename=%r' % self.filename)
1357            result.append(' mode=%r' % self.mode)
1358        else:
1359            result.append(' [closed]')
1360        result.append('>')
1361        return ''.join(result)
1362
1363    def _RealGetContents(self):
1364        """Read in the table of contents for the ZIP file."""
1365        fp = self.fp
1366        try:
1367            endrec = _EndRecData(fp)
1368        except OSError:
1369            raise BadZipFile("File is not a zip file")
1370        if not endrec:
1371            raise BadZipFile("File is not a zip file")
1372        if self.debug > 1:
1373            print(endrec)
1374        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
1375        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
1376        self._comment = endrec[_ECD_COMMENT]    # archive comment
1377
1378        # "concat" is zero, unless zip was concatenated to another file
1379        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1380        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1381            # If Zip64 extension structures are present, account for them
1382            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1383
1384        if self.debug > 2:
1385            inferred = concat + offset_cd
1386            print("given, inferred, offset", offset_cd, inferred, concat)
1387        # self.start_dir:  Position of start of central directory
1388        self.start_dir = offset_cd + concat
1389        if self.start_dir < 0:
1390            raise BadZipFile("Bad offset for central directory")
1391        fp.seek(self.start_dir, 0)
1392        data = fp.read(size_cd)
1393        fp = io.BytesIO(data)
1394        total = 0
1395        while total < size_cd:
1396            centdir = fp.read(sizeCentralDir)
1397            if len(centdir) != sizeCentralDir:
1398                raise BadZipFile("Truncated central directory")
1399            centdir = struct.unpack(structCentralDir, centdir)
1400            if centdir[_CD_SIGNATURE] != stringCentralDir:
1401                raise BadZipFile("Bad magic number for central directory")
1402            if self.debug > 2:
1403                print(centdir)
1404            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1405            flags = centdir[_CD_FLAG_BITS]
1406            if flags & _MASK_UTF_FILENAME:
1407                # UTF-8 file names extension
1408                filename = filename.decode('utf-8')
1409            else:
1410                # Historical ZIP filename encoding
1411                filename = filename.decode(self.metadata_encoding or 'cp437')
1412            # Create ZipInfo instance to store file information
1413            x = ZipInfo(filename)
1414            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1415            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1416            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1417            (x.create_version, x.create_system, x.extract_version, x.reserved,
1418             x.flag_bits, x.compress_type, t, d,
1419             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1420            if x.extract_version > MAX_EXTRACT_VERSION:
1421                raise NotImplementedError("zip file version %.1f" %
1422                                          (x.extract_version / 10))
1423            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1424            # Convert date/time code to (year, month, day, hour, min, sec)
1425            x._raw_time = t
1426            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1427                            t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1428
1429            x._decodeExtra()
1430            x.header_offset = x.header_offset + concat
1431            self.filelist.append(x)
1432            self.NameToInfo[x.filename] = x
1433
1434            # update total bytes read from central directory
1435            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1436                     + centdir[_CD_EXTRA_FIELD_LENGTH]
1437                     + centdir[_CD_COMMENT_LENGTH])
1438
1439            if self.debug > 2:
1440                print("total", total)
1441
1442        end_offset = self.start_dir
1443        for zinfo in sorted(self.filelist,
1444                            key=lambda zinfo: zinfo.header_offset,
1445                            reverse=True):
1446            zinfo._end_offset = end_offset
1447            end_offset = zinfo.header_offset
1448
1449    def namelist(self):
1450        """Return a list of file names in the archive."""
1451        return [data.filename for data in self.filelist]
1452
1453    def infolist(self):
1454        """Return a list of class ZipInfo instances for files in the
1455        archive."""
1456        return self.filelist
1457
1458    def printdir(self, file=None):
1459        """Print a table of contents for the zip file."""
1460        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
1461              file=file)
1462        for zinfo in self.filelist:
1463            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1464            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1465                  file=file)
1466
1467    def testzip(self):
1468        """Read all the files and check the CRC."""
1469        chunk_size = 2 ** 20
1470        for zinfo in self.filelist:
1471            try:
1472                # Read by chunks, to avoid an OverflowError or a
1473                # MemoryError with very large embedded files.
1474                with self.open(zinfo.filename, "r") as f:
1475                    while f.read(chunk_size):     # Check CRC-32
1476                        pass
1477            except BadZipFile:
1478                return zinfo.filename
1479
1480    def getinfo(self, name):
1481        """Return the instance of ZipInfo given 'name'."""
1482        info = self.NameToInfo.get(name)
1483        if info is None:
1484            raise KeyError(
1485                'There is no item named %r in the archive' % name)
1486
1487        return info
1488
1489    def setpassword(self, pwd):
1490        """Set default password for encrypted files."""
1491        if pwd and not isinstance(pwd, bytes):
1492            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1493        if pwd:
1494            self.pwd = pwd
1495        else:
1496            self.pwd = None
1497
1498    @property
1499    def comment(self):
1500        """The comment text associated with the ZIP file."""
1501        return self._comment
1502
1503    @comment.setter
1504    def comment(self, comment):
1505        if not isinstance(comment, bytes):
1506            raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1507        # check for valid comment length
1508        if len(comment) > ZIP_MAX_COMMENT:
1509            import warnings
1510            warnings.warn('Archive comment is too long; truncating to %d bytes'
1511                          % ZIP_MAX_COMMENT, stacklevel=2)
1512            comment = comment[:ZIP_MAX_COMMENT]
1513        self._comment = comment
1514        self._didModify = True
1515
1516    def read(self, name, pwd=None):
1517        """Return file bytes for name."""
1518        with self.open(name, "r", pwd) as fp:
1519            return fp.read()
1520
1521    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1522        """Return file-like object for 'name'.
1523
1524        name is a string for the file name within the ZIP file, or a ZipInfo
1525        object.
1526
1527        mode should be 'r' to read a file already in the ZIP file, or 'w' to
1528        write to a file newly added to the archive.
1529
1530        pwd is the password to decrypt files (only used for reading).
1531
1532        When writing, if the file size is not known in advance but may exceed
1533        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1534        files.  If the size is known in advance, it is best to pass a ZipInfo
1535        instance for name, with zinfo.file_size set.
1536        """
1537        if mode not in {"r", "w"}:
1538            raise ValueError('open() requires mode "r" or "w"')
1539        if pwd and (mode == "w"):
1540            raise ValueError("pwd is only supported for reading files")
1541        if not self.fp:
1542            raise ValueError(
1543                "Attempt to use ZIP archive that was already closed")
1544
1545        # Make sure we have an info object
1546        if isinstance(name, ZipInfo):
1547            # 'name' is already an info object
1548            zinfo = name
1549        elif mode == 'w':
1550            zinfo = ZipInfo(name)
1551            zinfo.compress_type = self.compression
1552            zinfo._compresslevel = self.compresslevel
1553        else:
1554            # Get info object for name
1555            zinfo = self.getinfo(name)
1556
1557        if mode == 'w':
1558            return self._open_to_write(zinfo, force_zip64=force_zip64)
1559
1560        if self._writing:
1561            raise ValueError("Can't read from the ZIP file while there "
1562                    "is an open writing handle on it. "
1563                    "Close the writing handle before trying to read.")
1564
1565        # Open for reading:
1566        self._fileRefCnt += 1
1567        zef_file = _SharedFile(self.fp, zinfo.header_offset,
1568                               self._fpclose, self._lock, lambda: self._writing)
1569        try:
1570            # Skip the file header:
1571            fheader = zef_file.read(sizeFileHeader)
1572            if len(fheader) != sizeFileHeader:
1573                raise BadZipFile("Truncated file header")
1574            fheader = struct.unpack(structFileHeader, fheader)
1575            if fheader[_FH_SIGNATURE] != stringFileHeader:
1576                raise BadZipFile("Bad magic number for file header")
1577
1578            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1579            if fheader[_FH_EXTRA_FIELD_LENGTH]:
1580                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1581
1582            if zinfo.flag_bits & _MASK_COMPRESSED_PATCH:
1583                # Zip 2.7: compressed patched data
1584                raise NotImplementedError("compressed patched data (flag bit 5)")
1585
1586            if zinfo.flag_bits & _MASK_STRONG_ENCRYPTION:
1587                # strong encryption
1588                raise NotImplementedError("strong encryption (flag bit 6)")
1589
1590            if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & _MASK_UTF_FILENAME:
1591                # UTF-8 filename
1592                fname_str = fname.decode("utf-8")
1593            else:
1594                fname_str = fname.decode(self.metadata_encoding or "cp437")
1595
1596            if fname_str != zinfo.orig_filename:
1597                raise BadZipFile(
1598                    'File name in directory %r and header %r differ.'
1599                    % (zinfo.orig_filename, fname))
1600
1601            if (zinfo._end_offset is not None and
1602                zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
1603                raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)")
1604
1605            # check for encrypted flag & handle password
1606            is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
1607            if is_encrypted:
1608                if not pwd:
1609                    pwd = self.pwd
1610                if pwd and not isinstance(pwd, bytes):
1611                    raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1612                if not pwd:
1613                    raise RuntimeError("File %r is encrypted, password "
1614                                       "required for extraction" % name)
1615            else:
1616                pwd = None
1617
1618            return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1619        except:
1620            zef_file.close()
1621            raise
1622
1623    def _open_to_write(self, zinfo, force_zip64=False):
1624        if force_zip64 and not self._allowZip64:
1625            raise ValueError(
1626                "force_zip64 is True, but allowZip64 was False when opening "
1627                "the ZIP file."
1628            )
1629        if self._writing:
1630            raise ValueError("Can't write to the ZIP file while there is "
1631                             "another write handle open on it. "
1632                             "Close the first handle before opening another.")
1633
1634        # Size and CRC are overwritten with correct data after processing the file
1635        zinfo.compress_size = 0
1636        zinfo.CRC = 0
1637
1638        zinfo.flag_bits = 0x00
1639        if zinfo.compress_type == ZIP_LZMA:
1640            # Compressed data includes an end-of-stream (EOS) marker
1641            zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
1642        if not self._seekable:
1643            zinfo.flag_bits |= _MASK_USE_DATA_DESCRIPTOR
1644
1645        if not zinfo.external_attr:
1646            zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
1647
1648        # Compressed size can be larger than uncompressed size
1649        zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT)
1650        if not self._allowZip64 and zip64:
1651            raise LargeZipFile("Filesize would require ZIP64 extensions")
1652
1653        if self._seekable:
1654            self.fp.seek(self.start_dir)
1655        zinfo.header_offset = self.fp.tell()
1656
1657        self._writecheck(zinfo)
1658        self._didModify = True
1659
1660        self.fp.write(zinfo.FileHeader(zip64))
1661
1662        self._writing = True
1663        return _ZipWriteFile(self, zinfo, zip64)
1664
1665    def extract(self, member, path=None, pwd=None):
1666        """Extract a member from the archive to the current working directory,
1667           using its full name. Its file information is extracted as accurately
1668           as possible. `member' may be a filename or a ZipInfo object. You can
1669           specify a different directory using `path'.
1670        """
1671        if path is None:
1672            path = os.getcwd()
1673        else:
1674            path = os.fspath(path)
1675
1676        return self._extract_member(member, path, pwd)
1677
1678    def extractall(self, path=None, members=None, pwd=None):
1679        """Extract all members from the archive to the current working
1680           directory. `path' specifies a different directory to extract to.
1681           `members' is optional and must be a subset of the list returned
1682           by namelist().
1683        """
1684        if members is None:
1685            members = self.namelist()
1686
1687        if path is None:
1688            path = os.getcwd()
1689        else:
1690            path = os.fspath(path)
1691
1692        for zipinfo in members:
1693            self._extract_member(zipinfo, path, pwd)
1694
1695    @classmethod
1696    def _sanitize_windows_name(cls, arcname, pathsep):
1697        """Replace bad characters and remove trailing dots from parts."""
1698        table = cls._windows_illegal_name_trans_table
1699        if not table:
1700            illegal = ':<>|"?*'
1701            table = str.maketrans(illegal, '_' * len(illegal))
1702            cls._windows_illegal_name_trans_table = table
1703        arcname = arcname.translate(table)
1704        # remove trailing dots
1705        arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1706        # rejoin, removing empty parts.
1707        arcname = pathsep.join(x for x in arcname if x)
1708        return arcname
1709
1710    def _extract_member(self, member, targetpath, pwd):
1711        """Extract the ZipInfo object 'member' to a physical
1712           file on the path targetpath.
1713        """
1714        if not isinstance(member, ZipInfo):
1715            member = self.getinfo(member)
1716
1717        # build the destination pathname, replacing
1718        # forward slashes to platform specific separators.
1719        arcname = member.filename.replace('/', os.path.sep)
1720
1721        if os.path.altsep:
1722            arcname = arcname.replace(os.path.altsep, os.path.sep)
1723        # interpret absolute pathname as relative, remove drive letter or
1724        # UNC path, redundant separators, "." and ".." components.
1725        arcname = os.path.splitdrive(arcname)[1]
1726        invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1727        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1728                                   if x not in invalid_path_parts)
1729        if os.path.sep == '\\':
1730            # filter illegal characters on Windows
1731            arcname = self._sanitize_windows_name(arcname, os.path.sep)
1732
1733        targetpath = os.path.join(targetpath, arcname)
1734        targetpath = os.path.normpath(targetpath)
1735
1736        # Create all upper directories if necessary.
1737        upperdirs = os.path.dirname(targetpath)
1738        if upperdirs and not os.path.exists(upperdirs):
1739            os.makedirs(upperdirs)
1740
1741        if member.is_dir():
1742            if not os.path.isdir(targetpath):
1743                os.mkdir(targetpath)
1744            return targetpath
1745
1746        with self.open(member, pwd=pwd) as source, \
1747             open(targetpath, "wb") as target:
1748            shutil.copyfileobj(source, target)
1749
1750        return targetpath
1751
1752    def _writecheck(self, zinfo):
1753        """Check for errors before writing a file to the archive."""
1754        if zinfo.filename in self.NameToInfo:
1755            import warnings
1756            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1757        if self.mode not in ('w', 'x', 'a'):
1758            raise ValueError("write() requires mode 'w', 'x', or 'a'")
1759        if not self.fp:
1760            raise ValueError(
1761                "Attempt to write ZIP archive that was already closed")
1762        _check_compression(zinfo.compress_type)
1763        if not self._allowZip64:
1764            requires_zip64 = None
1765            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1766                requires_zip64 = "Files count"
1767            elif zinfo.file_size > ZIP64_LIMIT:
1768                requires_zip64 = "Filesize"
1769            elif zinfo.header_offset > ZIP64_LIMIT:
1770                requires_zip64 = "Zipfile size"
1771            if requires_zip64:
1772                raise LargeZipFile(requires_zip64 +
1773                                   " would require ZIP64 extensions")
1774
1775    def write(self, filename, arcname=None,
1776              compress_type=None, compresslevel=None):
1777        """Put the bytes from filename into the archive under the name
1778        arcname."""
1779        if not self.fp:
1780            raise ValueError(
1781                "Attempt to write to ZIP archive that was already closed")
1782        if self._writing:
1783            raise ValueError(
1784                "Can't write to ZIP archive while an open writing handle exists"
1785            )
1786
1787        zinfo = ZipInfo.from_file(filename, arcname,
1788                                  strict_timestamps=self._strict_timestamps)
1789
1790        if zinfo.is_dir():
1791            zinfo.compress_size = 0
1792            zinfo.CRC = 0
1793            self.mkdir(zinfo)
1794        else:
1795            if compress_type is not None:
1796                zinfo.compress_type = compress_type
1797            else:
1798                zinfo.compress_type = self.compression
1799
1800            if compresslevel is not None:
1801                zinfo._compresslevel = compresslevel
1802            else:
1803                zinfo._compresslevel = self.compresslevel
1804
1805            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1806                shutil.copyfileobj(src, dest, 1024*8)
1807
1808    def writestr(self, zinfo_or_arcname, data,
1809                 compress_type=None, compresslevel=None):
1810        """Write a file into the archive.  The contents is 'data', which
1811        may be either a 'str' or a 'bytes' instance; if it is a 'str',
1812        it is encoded as UTF-8 first.
1813        'zinfo_or_arcname' is either a ZipInfo instance or
1814        the name of the file in the archive."""
1815        if isinstance(data, str):
1816            data = data.encode("utf-8")
1817        if not isinstance(zinfo_or_arcname, ZipInfo):
1818            zinfo = ZipInfo(filename=zinfo_or_arcname,
1819                            date_time=time.localtime(time.time())[:6])
1820            zinfo.compress_type = self.compression
1821            zinfo._compresslevel = self.compresslevel
1822            if zinfo.filename[-1] == '/':
1823                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1824                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1825            else:
1826                zinfo.external_attr = 0o600 << 16     # ?rw-------
1827        else:
1828            zinfo = zinfo_or_arcname
1829
1830        if not self.fp:
1831            raise ValueError(
1832                "Attempt to write to ZIP archive that was already closed")
1833        if self._writing:
1834            raise ValueError(
1835                "Can't write to ZIP archive while an open writing handle exists."
1836            )
1837
1838        if compress_type is not None:
1839            zinfo.compress_type = compress_type
1840
1841        if compresslevel is not None:
1842            zinfo._compresslevel = compresslevel
1843
1844        zinfo.file_size = len(data)            # Uncompressed size
1845        with self._lock:
1846            with self.open(zinfo, mode='w') as dest:
1847                dest.write(data)
1848
1849    def mkdir(self, zinfo_or_directory_name, mode=511):
1850        """Creates a directory inside the zip archive."""
1851        if isinstance(zinfo_or_directory_name, ZipInfo):
1852            zinfo = zinfo_or_directory_name
1853            if not zinfo.is_dir():
1854                raise ValueError("The given ZipInfo does not describe a directory")
1855        elif isinstance(zinfo_or_directory_name, str):
1856            directory_name = zinfo_or_directory_name
1857            if not directory_name.endswith("/"):
1858                directory_name += "/"
1859            zinfo = ZipInfo(directory_name)
1860            zinfo.compress_size = 0
1861            zinfo.CRC = 0
1862            zinfo.external_attr = ((0o40000 | mode) & 0xFFFF) << 16
1863            zinfo.file_size = 0
1864            zinfo.external_attr |= 0x10
1865        else:
1866            raise TypeError("Expected type str or ZipInfo")
1867
1868        with self._lock:
1869            if self._seekable:
1870                self.fp.seek(self.start_dir)
1871            zinfo.header_offset = self.fp.tell()  # Start of header bytes
1872            if zinfo.compress_type == ZIP_LZMA:
1873            # Compressed data includes an end-of-stream (EOS) marker
1874                zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
1875
1876            self._writecheck(zinfo)
1877            self._didModify = True
1878
1879            self.filelist.append(zinfo)
1880            self.NameToInfo[zinfo.filename] = zinfo
1881            self.fp.write(zinfo.FileHeader(False))
1882            self.start_dir = self.fp.tell()
1883
1884    def __del__(self):
1885        """Call the "close()" method in case the user forgot."""
1886        self.close()
1887
1888    def close(self):
1889        """Close the file, and for mode 'w', 'x' and 'a' write the ending
1890        records."""
1891        if self.fp is None:
1892            return
1893
1894        if self._writing:
1895            raise ValueError("Can't close the ZIP file while there is "
1896                             "an open writing handle on it. "
1897                             "Close the writing handle before closing the zip.")
1898
1899        try:
1900            if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1901                with self._lock:
1902                    if self._seekable:
1903                        self.fp.seek(self.start_dir)
1904                    self._write_end_record()
1905        finally:
1906            fp = self.fp
1907            self.fp = None
1908            self._fpclose(fp)
1909
1910    def _write_end_record(self):
1911        for zinfo in self.filelist:         # write central directory
1912            dt = zinfo.date_time
1913            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1914            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1915            extra = []
1916            if zinfo.file_size > ZIP64_LIMIT \
1917               or zinfo.compress_size > ZIP64_LIMIT:
1918                extra.append(zinfo.file_size)
1919                extra.append(zinfo.compress_size)
1920                file_size = 0xffffffff
1921                compress_size = 0xffffffff
1922            else:
1923                file_size = zinfo.file_size
1924                compress_size = zinfo.compress_size
1925
1926            if zinfo.header_offset > ZIP64_LIMIT:
1927                extra.append(zinfo.header_offset)
1928                header_offset = 0xffffffff
1929            else:
1930                header_offset = zinfo.header_offset
1931
1932            extra_data = zinfo.extra
1933            min_version = 0
1934            if extra:
1935                # Append a ZIP64 field to the extra's
1936                extra_data = _strip_extra(extra_data, (1,))
1937                extra_data = struct.pack(
1938                    '<HH' + 'Q'*len(extra),
1939                    1, 8*len(extra), *extra) + extra_data
1940
1941                min_version = ZIP64_VERSION
1942
1943            if zinfo.compress_type == ZIP_BZIP2:
1944                min_version = max(BZIP2_VERSION, min_version)
1945            elif zinfo.compress_type == ZIP_LZMA:
1946                min_version = max(LZMA_VERSION, min_version)
1947
1948            extract_version = max(min_version, zinfo.extract_version)
1949            create_version = max(min_version, zinfo.create_version)
1950            filename, flag_bits = zinfo._encodeFilenameFlags()
1951            centdir = struct.pack(structCentralDir,
1952                                  stringCentralDir, create_version,
1953                                  zinfo.create_system, extract_version, zinfo.reserved,
1954                                  flag_bits, zinfo.compress_type, dostime, dosdate,
1955                                  zinfo.CRC, compress_size, file_size,
1956                                  len(filename), len(extra_data), len(zinfo.comment),
1957                                  0, zinfo.internal_attr, zinfo.external_attr,
1958                                  header_offset)
1959            self.fp.write(centdir)
1960            self.fp.write(filename)
1961            self.fp.write(extra_data)
1962            self.fp.write(zinfo.comment)
1963
1964        pos2 = self.fp.tell()
1965        # Write end-of-zip-archive record
1966        centDirCount = len(self.filelist)
1967        centDirSize = pos2 - self.start_dir
1968        centDirOffset = self.start_dir
1969        requires_zip64 = None
1970        if centDirCount > ZIP_FILECOUNT_LIMIT:
1971            requires_zip64 = "Files count"
1972        elif centDirOffset > ZIP64_LIMIT:
1973            requires_zip64 = "Central directory offset"
1974        elif centDirSize > ZIP64_LIMIT:
1975            requires_zip64 = "Central directory size"
1976        if requires_zip64:
1977            # Need to write the ZIP64 end-of-archive records
1978            if not self._allowZip64:
1979                raise LargeZipFile(requires_zip64 +
1980                                   " would require ZIP64 extensions")
1981            zip64endrec = struct.pack(
1982                structEndArchive64, stringEndArchive64,
1983                44, 45, 45, 0, 0, centDirCount, centDirCount,
1984                centDirSize, centDirOffset)
1985            self.fp.write(zip64endrec)
1986
1987            zip64locrec = struct.pack(
1988                structEndArchive64Locator,
1989                stringEndArchive64Locator, 0, pos2, 1)
1990            self.fp.write(zip64locrec)
1991            centDirCount = min(centDirCount, 0xFFFF)
1992            centDirSize = min(centDirSize, 0xFFFFFFFF)
1993            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1994
1995        endrec = struct.pack(structEndArchive, stringEndArchive,
1996                             0, 0, centDirCount, centDirCount,
1997                             centDirSize, centDirOffset, len(self._comment))
1998        self.fp.write(endrec)
1999        self.fp.write(self._comment)
2000        if self.mode == "a":
2001            self.fp.truncate()
2002        self.fp.flush()
2003
2004    def _fpclose(self, fp):
2005        assert self._fileRefCnt > 0
2006        self._fileRefCnt -= 1
2007        if not self._fileRefCnt and not self._filePassed:
2008            fp.close()
2009
2010
2011class PyZipFile(ZipFile):
2012    """Class to create ZIP archives with Python library files and packages."""
2013
2014    def __init__(self, file, mode="r", compression=ZIP_STORED,
2015                 allowZip64=True, optimize=-1):
2016        ZipFile.__init__(self, file, mode=mode, compression=compression,
2017                         allowZip64=allowZip64)
2018        self._optimize = optimize
2019
2020    def writepy(self, pathname, basename="", filterfunc=None):
2021        """Add all files from "pathname" to the ZIP archive.
2022
2023        If pathname is a package directory, search the directory and
2024        all package subdirectories recursively for all *.py and enter
2025        the modules into the archive.  If pathname is a plain
2026        directory, listdir *.py and enter all modules.  Else, pathname
2027        must be a Python *.py file and the module will be put into the
2028        archive.  Added modules are always module.pyc.
2029        This method will compile the module.py into module.pyc if
2030        necessary.
2031        If filterfunc(pathname) is given, it is called with every argument.
2032        When it is False, the file or directory is skipped.
2033        """
2034        pathname = os.fspath(pathname)
2035        if filterfunc and not filterfunc(pathname):
2036            if self.debug:
2037                label = 'path' if os.path.isdir(pathname) else 'file'
2038                print('%s %r skipped by filterfunc' % (label, pathname))
2039            return
2040        dir, name = os.path.split(pathname)
2041        if os.path.isdir(pathname):
2042            initname = os.path.join(pathname, "__init__.py")
2043            if os.path.isfile(initname):
2044                # This is a package directory, add it
2045                if basename:
2046                    basename = "%s/%s" % (basename, name)
2047                else:
2048                    basename = name
2049                if self.debug:
2050                    print("Adding package in", pathname, "as", basename)
2051                fname, arcname = self._get_codename(initname[0:-3], basename)
2052                if self.debug:
2053                    print("Adding", arcname)
2054                self.write(fname, arcname)
2055                dirlist = sorted(os.listdir(pathname))
2056                dirlist.remove("__init__.py")
2057                # Add all *.py files and package subdirectories
2058                for filename in dirlist:
2059                    path = os.path.join(pathname, filename)
2060                    root, ext = os.path.splitext(filename)
2061                    if os.path.isdir(path):
2062                        if os.path.isfile(os.path.join(path, "__init__.py")):
2063                            # This is a package directory, add it
2064                            self.writepy(path, basename,
2065                                         filterfunc=filterfunc)  # Recursive call
2066                    elif ext == ".py":
2067                        if filterfunc and not filterfunc(path):
2068                            if self.debug:
2069                                print('file %r skipped by filterfunc' % path)
2070                            continue
2071                        fname, arcname = self._get_codename(path[0:-3],
2072                                                            basename)
2073                        if self.debug:
2074                            print("Adding", arcname)
2075                        self.write(fname, arcname)
2076            else:
2077                # This is NOT a package directory, add its files at top level
2078                if self.debug:
2079                    print("Adding files from directory", pathname)
2080                for filename in sorted(os.listdir(pathname)):
2081                    path = os.path.join(pathname, filename)
2082                    root, ext = os.path.splitext(filename)
2083                    if ext == ".py":
2084                        if filterfunc and not filterfunc(path):
2085                            if self.debug:
2086                                print('file %r skipped by filterfunc' % path)
2087                            continue
2088                        fname, arcname = self._get_codename(path[0:-3],
2089                                                            basename)
2090                        if self.debug:
2091                            print("Adding", arcname)
2092                        self.write(fname, arcname)
2093        else:
2094            if pathname[-3:] != ".py":
2095                raise RuntimeError(
2096                    'Files added with writepy() must end with ".py"')
2097            fname, arcname = self._get_codename(pathname[0:-3], basename)
2098            if self.debug:
2099                print("Adding file", arcname)
2100            self.write(fname, arcname)
2101
2102    def _get_codename(self, pathname, basename):
2103        """Return (filename, archivename) for the path.
2104
2105        Given a module name path, return the correct file path and
2106        archive name, compiling if necessary.  For example, given
2107        /python/lib/string, return (/python/lib/string.pyc, string).
2108        """
2109        def _compile(file, optimize=-1):
2110            import py_compile
2111            if self.debug:
2112                print("Compiling", file)
2113            try:
2114                py_compile.compile(file, doraise=True, optimize=optimize)
2115            except py_compile.PyCompileError as err:
2116                print(err.msg)
2117                return False
2118            return True
2119
2120        file_py  = pathname + ".py"
2121        file_pyc = pathname + ".pyc"
2122        pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2123        pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2124        pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2125        if self._optimize == -1:
2126            # legacy mode: use whatever file is present
2127            if (os.path.isfile(file_pyc) and
2128                  os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2129                # Use .pyc file.
2130                arcname = fname = file_pyc
2131            elif (os.path.isfile(pycache_opt0) and
2132                  os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2133                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2134                # file name in the archive.
2135                fname = pycache_opt0
2136                arcname = file_pyc
2137            elif (os.path.isfile(pycache_opt1) and
2138                  os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2139                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2140                # file name in the archive.
2141                fname = pycache_opt1
2142                arcname = file_pyc
2143            elif (os.path.isfile(pycache_opt2) and
2144                  os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2145                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2146                # file name in the archive.
2147                fname = pycache_opt2
2148                arcname = file_pyc
2149            else:
2150                # Compile py into PEP 3147 pyc file.
2151                if _compile(file_py):
2152                    if sys.flags.optimize == 0:
2153                        fname = pycache_opt0
2154                    elif sys.flags.optimize == 1:
2155                        fname = pycache_opt1
2156                    else:
2157                        fname = pycache_opt2
2158                    arcname = file_pyc
2159                else:
2160                    fname = arcname = file_py
2161        else:
2162            # new mode: use given optimization level
2163            if self._optimize == 0:
2164                fname = pycache_opt0
2165                arcname = file_pyc
2166            else:
2167                arcname = file_pyc
2168                if self._optimize == 1:
2169                    fname = pycache_opt1
2170                elif self._optimize == 2:
2171                    fname = pycache_opt2
2172                else:
2173                    msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2174                    raise ValueError(msg)
2175            if not (os.path.isfile(fname) and
2176                    os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2177                if not _compile(file_py, optimize=self._optimize):
2178                    fname = arcname = file_py
2179        archivename = os.path.split(arcname)[1]
2180        if basename:
2181            archivename = "%s/%s" % (basename, archivename)
2182        return (fname, archivename)
2183
2184
2185def _parents(path):
2186    """
2187    Given a path with elements separated by
2188    posixpath.sep, generate all parents of that path.
2189
2190    >>> list(_parents('b/d'))
2191    ['b']
2192    >>> list(_parents('/b/d/'))
2193    ['/b']
2194    >>> list(_parents('b/d/f/'))
2195    ['b/d', 'b']
2196    >>> list(_parents('b'))
2197    []
2198    >>> list(_parents(''))
2199    []
2200    """
2201    return itertools.islice(_ancestry(path), 1, None)
2202
2203
2204def _ancestry(path):
2205    """
2206    Given a path with elements separated by
2207    posixpath.sep, generate all elements of that path
2208
2209    >>> list(_ancestry('b/d'))
2210    ['b/d', 'b']
2211    >>> list(_ancestry('/b/d/'))
2212    ['/b/d', '/b']
2213    >>> list(_ancestry('b/d/f/'))
2214    ['b/d/f', 'b/d', 'b']
2215    >>> list(_ancestry('b'))
2216    ['b']
2217    >>> list(_ancestry(''))
2218    []
2219    """
2220    path = path.rstrip(posixpath.sep)
2221    while path and path != posixpath.sep:
2222        yield path
2223        path, tail = posixpath.split(path)
2224
2225
2226_dedupe = dict.fromkeys
2227"""Deduplicate an iterable in original order"""
2228
2229
2230def _difference(minuend, subtrahend):
2231    """
2232    Return items in minuend not in subtrahend, retaining order
2233    with O(1) lookup.
2234    """
2235    return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2236
2237
2238class CompleteDirs(ZipFile):
2239    """
2240    A ZipFile subclass that ensures that implied directories
2241    are always included in the namelist.
2242    """
2243
2244    @staticmethod
2245    def _implied_dirs(names):
2246        parents = itertools.chain.from_iterable(map(_parents, names))
2247        as_dirs = (p + posixpath.sep for p in parents)
2248        return _dedupe(_difference(as_dirs, names))
2249
2250    def namelist(self):
2251        names = super(CompleteDirs, self).namelist()
2252        return names + list(self._implied_dirs(names))
2253
2254    def _name_set(self):
2255        return set(self.namelist())
2256
2257    def resolve_dir(self, name):
2258        """
2259        If the name represents a directory, return that name
2260        as a directory (with the trailing slash).
2261        """
2262        names = self._name_set()
2263        dirname = name + '/'
2264        dir_match = name not in names and dirname in names
2265        return dirname if dir_match else name
2266
2267    def getinfo(self, name):
2268        """
2269        Supplement getinfo for implied dirs.
2270        """
2271        try:
2272            return super().getinfo(name)
2273        except KeyError:
2274            if not name.endswith('/') or name not in self._name_set():
2275                raise
2276            return ZipInfo(filename=name)
2277
2278    @classmethod
2279    def make(cls, source):
2280        """
2281        Given a source (filename or zipfile), return an
2282        appropriate CompleteDirs subclass.
2283        """
2284        if isinstance(source, CompleteDirs):
2285            return source
2286
2287        if not isinstance(source, ZipFile):
2288            return cls(source)
2289
2290        # Only allow for FastLookup when supplied zipfile is read-only
2291        if 'r' not in source.mode:
2292            cls = CompleteDirs
2293
2294        source.__class__ = cls
2295        return source
2296
2297
2298class FastLookup(CompleteDirs):
2299    """
2300    ZipFile subclass to ensure implicit
2301    dirs exist and are resolved rapidly.
2302    """
2303
2304    def namelist(self):
2305        with contextlib.suppress(AttributeError):
2306            return self.__names
2307        self.__names = super(FastLookup, self).namelist()
2308        return self.__names
2309
2310    def _name_set(self):
2311        with contextlib.suppress(AttributeError):
2312            return self.__lookup
2313        self.__lookup = super(FastLookup, self)._name_set()
2314        return self.__lookup
2315
2316
2317def _extract_text_encoding(encoding=None, *args, **kwargs):
2318    # stacklevel=3 so that the caller of the caller see any warning.
2319    return io.text_encoding(encoding, 3), args, kwargs
2320
2321
2322class Path:
2323    """
2324    A pathlib-compatible interface for zip files.
2325
2326    Consider a zip file with this structure::
2327
2328        .
2329        ├── a.txt
2330        └── b
2331            ├── c.txt
2332            └── d
2333                └── e.txt
2334
2335    >>> data = io.BytesIO()
2336    >>> zf = ZipFile(data, 'w')
2337    >>> zf.writestr('a.txt', 'content of a')
2338    >>> zf.writestr('b/c.txt', 'content of c')
2339    >>> zf.writestr('b/d/e.txt', 'content of e')
2340    >>> zf.filename = 'mem/abcde.zip'
2341
2342    Path accepts the zipfile object itself or a filename
2343
2344    >>> root = Path(zf)
2345
2346    From there, several path operations are available.
2347
2348    Directory iteration (including the zip file itself):
2349
2350    >>> a, b = root.iterdir()
2351    >>> a
2352    Path('mem/abcde.zip', 'a.txt')
2353    >>> b
2354    Path('mem/abcde.zip', 'b/')
2355
2356    name property:
2357
2358    >>> b.name
2359    'b'
2360
2361    join with divide operator:
2362
2363    >>> c = b / 'c.txt'
2364    >>> c
2365    Path('mem/abcde.zip', 'b/c.txt')
2366    >>> c.name
2367    'c.txt'
2368
2369    Read text:
2370
2371    >>> c.read_text()
2372    'content of c'
2373
2374    existence:
2375
2376    >>> c.exists()
2377    True
2378    >>> (b / 'missing.txt').exists()
2379    False
2380
2381    Coercion to string:
2382
2383    >>> import os
2384    >>> str(c).replace(os.sep, posixpath.sep)
2385    'mem/abcde.zip/b/c.txt'
2386
2387    At the root, ``name``, ``filename``, and ``parent``
2388    resolve to the zipfile. Note these attributes are not
2389    valid and will raise a ``ValueError`` if the zipfile
2390    has no filename.
2391
2392    >>> root.name
2393    'abcde.zip'
2394    >>> str(root.filename).replace(os.sep, posixpath.sep)
2395    'mem/abcde.zip'
2396    >>> str(root.parent)
2397    'mem'
2398    """
2399
2400    __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2401
2402    def __init__(self, root, at=""):
2403        """
2404        Construct a Path from a ZipFile or filename.
2405
2406        Note: When the source is an existing ZipFile object,
2407        its type (__class__) will be mutated to a
2408        specialized type. If the caller wishes to retain the
2409        original type, the caller should either create a
2410        separate ZipFile object or pass a filename.
2411        """
2412        self.root = FastLookup.make(root)
2413        self.at = at
2414
2415    def open(self, mode='r', *args, pwd=None, **kwargs):
2416        """
2417        Open this entry as text or binary following the semantics
2418        of ``pathlib.Path.open()`` by passing arguments through
2419        to io.TextIOWrapper().
2420        """
2421        if self.is_dir():
2422            raise IsADirectoryError(self)
2423        zip_mode = mode[0]
2424        if not self.exists() and zip_mode == 'r':
2425            raise FileNotFoundError(self)
2426        stream = self.root.open(self.at, zip_mode, pwd=pwd)
2427        if 'b' in mode:
2428            if args or kwargs:
2429                raise ValueError("encoding args invalid for binary operation")
2430            return stream
2431        # Text mode:
2432        encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
2433        return io.TextIOWrapper(stream, encoding, *args, **kwargs)
2434
2435    @property
2436    def name(self):
2437        return pathlib.Path(self.at).name or self.filename.name
2438
2439    @property
2440    def suffix(self):
2441        return pathlib.Path(self.at).suffix or self.filename.suffix
2442
2443    @property
2444    def suffixes(self):
2445        return pathlib.Path(self.at).suffixes or self.filename.suffixes
2446
2447    @property
2448    def stem(self):
2449        return pathlib.Path(self.at).stem or self.filename.stem
2450
2451    @property
2452    def filename(self):
2453        return pathlib.Path(self.root.filename).joinpath(self.at)
2454
2455    def read_text(self, *args, **kwargs):
2456        encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
2457        with self.open('r', encoding, *args, **kwargs) as strm:
2458            return strm.read()
2459
2460    def read_bytes(self):
2461        with self.open('rb') as strm:
2462            return strm.read()
2463
2464    def _is_child(self, path):
2465        return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2466
2467    def _next(self, at):
2468        return self.__class__(self.root, at)
2469
2470    def is_dir(self):
2471        return not self.at or self.at.endswith("/")
2472
2473    def is_file(self):
2474        return self.exists() and not self.is_dir()
2475
2476    def exists(self):
2477        return self.at in self.root._name_set()
2478
2479    def iterdir(self):
2480        if not self.is_dir():
2481            raise ValueError("Can't listdir a file")
2482        subs = map(self._next, self.root.namelist())
2483        return filter(self._is_child, subs)
2484
2485    def __str__(self):
2486        return posixpath.join(self.root.filename, self.at)
2487
2488    def __repr__(self):
2489        return self.__repr.format(self=self)
2490
2491    def joinpath(self, *other):
2492        next = posixpath.join(self.at, *other)
2493        return self._next(self.root.resolve_dir(next))
2494
2495    __truediv__ = joinpath
2496
2497    @property
2498    def parent(self):
2499        if not self.at:
2500            return self.filename.parent
2501        parent_at = posixpath.dirname(self.at.rstrip('/'))
2502        if parent_at:
2503            parent_at += '/'
2504        return self._next(parent_at)
2505
2506
2507def main(args=None):
2508    import argparse
2509
2510    description = 'A simple command-line interface for zipfile module.'
2511    parser = argparse.ArgumentParser(description=description)
2512    group = parser.add_mutually_exclusive_group(required=True)
2513    group.add_argument('-l', '--list', metavar='<zipfile>',
2514                       help='Show listing of a zipfile')
2515    group.add_argument('-e', '--extract', nargs=2,
2516                       metavar=('<zipfile>', '<output_dir>'),
2517                       help='Extract zipfile into target dir')
2518    group.add_argument('-c', '--create', nargs='+',
2519                       metavar=('<name>', '<file>'),
2520                       help='Create zipfile from sources')
2521    group.add_argument('-t', '--test', metavar='<zipfile>',
2522                       help='Test if a zipfile is valid')
2523    parser.add_argument('--metadata-encoding', metavar='<encoding>',
2524                        help='Specify encoding of member names for -l, -e and -t')
2525    args = parser.parse_args(args)
2526
2527    encoding = args.metadata_encoding
2528
2529    if args.test is not None:
2530        src = args.test
2531        with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
2532            badfile = zf.testzip()
2533        if badfile:
2534            print("The following enclosed file is corrupted: {!r}".format(badfile))
2535        print("Done testing")
2536
2537    elif args.list is not None:
2538        src = args.list
2539        with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
2540            zf.printdir()
2541
2542    elif args.extract is not None:
2543        src, curdir = args.extract
2544        with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
2545            zf.extractall(curdir)
2546
2547    elif args.create is not None:
2548        if encoding:
2549            print("Non-conforming encodings not supported with -c.",
2550                  file=sys.stderr)
2551            sys.exit(1)
2552
2553        zip_name = args.create.pop(0)
2554        files = args.create
2555
2556        def addToZip(zf, path, zippath):
2557            if os.path.isfile(path):
2558                zf.write(path, zippath, ZIP_DEFLATED)
2559            elif os.path.isdir(path):
2560                if zippath:
2561                    zf.write(path, zippath)
2562                for nm in sorted(os.listdir(path)):
2563                    addToZip(zf,
2564                             os.path.join(path, nm), os.path.join(zippath, nm))
2565            # else: ignore
2566
2567        with ZipFile(zip_name, 'w') as zf:
2568            for path in files:
2569                zippath = os.path.basename(path)
2570                if not zippath:
2571                    zippath = os.path.basename(os.path.dirname(path))
2572                if zippath in ('', os.curdir, os.pardir):
2573                    zippath = ''
2574                addToZip(zf, path, zippath)
2575
2576
2577if __name__ == "__main__":
2578    main()
2579