1""" 2Read and write ZIP files. 3 4XXX references to utf-8 need further investigation. 5""" 6import binascii 7import importlib.util 8import io 9import itertools 10import os 11import posixpath 12import shutil 13import stat 14import struct 15import sys 16import threading 17import time 18import contextlib 19import pathlib 20 21try: 22 import zlib # We may need its compression method 23 crc32 = zlib.crc32 24except ImportError: 25 zlib = None 26 crc32 = binascii.crc32 27 28try: 29 import bz2 # We may need its compression method 30except ImportError: 31 bz2 = None 32 33try: 34 import lzma # We may need its compression method 35except ImportError: 36 lzma = None 37 38__all__ = ["BadZipFile", "BadZipfile", "error", 39 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", 40 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", 41 "Path"] 42 43class BadZipFile(Exception): 44 pass 45 46 47class LargeZipFile(Exception): 48 """ 49 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 50 and those extensions are disabled. 51 """ 52 53error = BadZipfile = BadZipFile # Pre-3.2 compatibility names 54 55 56ZIP64_LIMIT = (1 << 31) - 1 57ZIP_FILECOUNT_LIMIT = (1 << 16) - 1 58ZIP_MAX_COMMENT = (1 << 16) - 1 59 60# constants for Zip file compression methods 61ZIP_STORED = 0 62ZIP_DEFLATED = 8 63ZIP_BZIP2 = 12 64ZIP_LZMA = 14 65# Other ZIP compression methods not supported 66 67DEFAULT_VERSION = 20 68ZIP64_VERSION = 45 69BZIP2_VERSION = 46 70LZMA_VERSION = 63 71# we recognize (but not necessarily support) all features up to that version 72MAX_EXTRACT_VERSION = 63 73 74# Below are some formats and associated data for reading/writing headers using 75# the struct module. The names and structures of headers/records are those used 76# in the PKWARE description of the ZIP file format: 77# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 78# (URL valid as of January 2008) 79 80# The "end of central directory" structure, magic number, size, and indices 81# (section V.I in the format document) 82structEndArchive = b"<4s4H2LH" 83stringEndArchive = b"PK\005\006" 84sizeEndCentDir = struct.calcsize(structEndArchive) 85 86_ECD_SIGNATURE = 0 87_ECD_DISK_NUMBER = 1 88_ECD_DISK_START = 2 89_ECD_ENTRIES_THIS_DISK = 3 90_ECD_ENTRIES_TOTAL = 4 91_ECD_SIZE = 5 92_ECD_OFFSET = 6 93_ECD_COMMENT_SIZE = 7 94# These last two indices are not part of the structure as defined in the 95# spec, but they are used internally by this module as a convenience 96_ECD_COMMENT = 8 97_ECD_LOCATION = 9 98 99# The "central directory" structure, magic number, size, and indices 100# of entries in the structure (section V.F in the format document) 101structCentralDir = "<4s4B4HL2L5H2L" 102stringCentralDir = b"PK\001\002" 103sizeCentralDir = struct.calcsize(structCentralDir) 104 105# indexes of entries in the central directory structure 106_CD_SIGNATURE = 0 107_CD_CREATE_VERSION = 1 108_CD_CREATE_SYSTEM = 2 109_CD_EXTRACT_VERSION = 3 110_CD_EXTRACT_SYSTEM = 4 111_CD_FLAG_BITS = 5 112_CD_COMPRESS_TYPE = 6 113_CD_TIME = 7 114_CD_DATE = 8 115_CD_CRC = 9 116_CD_COMPRESSED_SIZE = 10 117_CD_UNCOMPRESSED_SIZE = 11 118_CD_FILENAME_LENGTH = 12 119_CD_EXTRA_FIELD_LENGTH = 13 120_CD_COMMENT_LENGTH = 14 121_CD_DISK_NUMBER_START = 15 122_CD_INTERNAL_FILE_ATTRIBUTES = 16 123_CD_EXTERNAL_FILE_ATTRIBUTES = 17 124_CD_LOCAL_HEADER_OFFSET = 18 125 126# General purpose bit flags 127# Zip Appnote: 4.4.4 general purpose bit flag: (2 bytes) 128_MASK_ENCRYPTED = 1 << 0 129# Bits 1 and 2 have different meanings depending on the compression used. 130_MASK_COMPRESS_OPTION_1 = 1 << 1 131# _MASK_COMPRESS_OPTION_2 = 1 << 2 132# _MASK_USE_DATA_DESCRIPTOR: If set, crc-32, compressed size and uncompressed 133# size are zero in the local header and the real values are written in the data 134# descriptor immediately following the compressed data. 135_MASK_USE_DATA_DESCRIPTOR = 1 << 3 136# Bit 4: Reserved for use with compression method 8, for enhanced deflating. 137# _MASK_RESERVED_BIT_4 = 1 << 4 138_MASK_COMPRESSED_PATCH = 1 << 5 139_MASK_STRONG_ENCRYPTION = 1 << 6 140# _MASK_UNUSED_BIT_7 = 1 << 7 141# _MASK_UNUSED_BIT_8 = 1 << 8 142# _MASK_UNUSED_BIT_9 = 1 << 9 143# _MASK_UNUSED_BIT_10 = 1 << 10 144_MASK_UTF_FILENAME = 1 << 11 145# Bit 12: Reserved by PKWARE for enhanced compression. 146# _MASK_RESERVED_BIT_12 = 1 << 12 147# _MASK_ENCRYPTED_CENTRAL_DIR = 1 << 13 148# Bit 14, 15: Reserved by PKWARE 149# _MASK_RESERVED_BIT_14 = 1 << 14 150# _MASK_RESERVED_BIT_15 = 1 << 15 151 152# The "local file header" structure, magic number, size, and indices 153# (section V.A in the format document) 154structFileHeader = "<4s2B4HL2L2H" 155stringFileHeader = b"PK\003\004" 156sizeFileHeader = struct.calcsize(structFileHeader) 157 158_FH_SIGNATURE = 0 159_FH_EXTRACT_VERSION = 1 160_FH_EXTRACT_SYSTEM = 2 161_FH_GENERAL_PURPOSE_FLAG_BITS = 3 162_FH_COMPRESSION_METHOD = 4 163_FH_LAST_MOD_TIME = 5 164_FH_LAST_MOD_DATE = 6 165_FH_CRC = 7 166_FH_COMPRESSED_SIZE = 8 167_FH_UNCOMPRESSED_SIZE = 9 168_FH_FILENAME_LENGTH = 10 169_FH_EXTRA_FIELD_LENGTH = 11 170 171# The "Zip64 end of central directory locator" structure, magic number, and size 172structEndArchive64Locator = "<4sLQL" 173stringEndArchive64Locator = b"PK\x06\x07" 174sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 175 176# The "Zip64 end of central directory" record, magic number, size, and indices 177# (section V.G in the format document) 178structEndArchive64 = "<4sQ2H2L4Q" 179stringEndArchive64 = b"PK\x06\x06" 180sizeEndCentDir64 = struct.calcsize(structEndArchive64) 181 182_CD64_SIGNATURE = 0 183_CD64_DIRECTORY_RECSIZE = 1 184_CD64_CREATE_VERSION = 2 185_CD64_EXTRACT_VERSION = 3 186_CD64_DISK_NUMBER = 4 187_CD64_DISK_NUMBER_START = 5 188_CD64_NUMBER_ENTRIES_THIS_DISK = 6 189_CD64_NUMBER_ENTRIES_TOTAL = 7 190_CD64_DIRECTORY_SIZE = 8 191_CD64_OFFSET_START_CENTDIR = 9 192 193_DD_SIGNATURE = 0x08074b50 194 195_EXTRA_FIELD_STRUCT = struct.Struct('<HH') 196 197def _strip_extra(extra, xids): 198 # Remove Extra Fields with specified IDs. 199 unpack = _EXTRA_FIELD_STRUCT.unpack 200 modified = False 201 buffer = [] 202 start = i = 0 203 while i + 4 <= len(extra): 204 xid, xlen = unpack(extra[i : i + 4]) 205 j = i + 4 + xlen 206 if xid in xids: 207 if i != start: 208 buffer.append(extra[start : i]) 209 start = j 210 modified = True 211 i = j 212 if not modified: 213 return extra 214 if start != len(extra): 215 buffer.append(extra[start:]) 216 return b''.join(buffer) 217 218def _check_zipfile(fp): 219 try: 220 if _EndRecData(fp): 221 return True # file has correct magic number 222 except OSError: 223 pass 224 return False 225 226def is_zipfile(filename): 227 """Quickly see if a file is a ZIP file by checking the magic number. 228 229 The filename argument may be a file or file-like object too. 230 """ 231 result = False 232 try: 233 if hasattr(filename, "read"): 234 result = _check_zipfile(fp=filename) 235 else: 236 with open(filename, "rb") as fp: 237 result = _check_zipfile(fp) 238 except OSError: 239 pass 240 return result 241 242def _EndRecData64(fpin, offset, endrec): 243 """ 244 Read the ZIP64 end-of-archive records and use that to update endrec 245 """ 246 try: 247 fpin.seek(offset - sizeEndCentDir64Locator, 2) 248 except OSError: 249 # If the seek fails, the file is not large enough to contain a ZIP64 250 # end-of-archive record, so just return the end record we were given. 251 return endrec 252 253 data = fpin.read(sizeEndCentDir64Locator) 254 if len(data) != sizeEndCentDir64Locator: 255 return endrec 256 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 257 if sig != stringEndArchive64Locator: 258 return endrec 259 260 if diskno != 0 or disks > 1: 261 raise BadZipFile("zipfiles that span multiple disks are not supported") 262 263 # Assume no 'zip64 extensible data' 264 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 265 data = fpin.read(sizeEndCentDir64) 266 if len(data) != sizeEndCentDir64: 267 return endrec 268 sig, sz, create_version, read_version, disk_num, disk_dir, \ 269 dircount, dircount2, dirsize, diroffset = \ 270 struct.unpack(structEndArchive64, data) 271 if sig != stringEndArchive64: 272 return endrec 273 274 # Update the original endrec using data from the ZIP64 record 275 endrec[_ECD_SIGNATURE] = sig 276 endrec[_ECD_DISK_NUMBER] = disk_num 277 endrec[_ECD_DISK_START] = disk_dir 278 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 279 endrec[_ECD_ENTRIES_TOTAL] = dircount2 280 endrec[_ECD_SIZE] = dirsize 281 endrec[_ECD_OFFSET] = diroffset 282 return endrec 283 284 285def _EndRecData(fpin): 286 """Return data from the "End of Central Directory" record, or None. 287 288 The data is a list of the nine items in the ZIP "End of central dir" 289 record followed by a tenth item, the file seek offset of this record.""" 290 291 # Determine file size 292 fpin.seek(0, 2) 293 filesize = fpin.tell() 294 295 # Check to see if this is ZIP file with no archive comment (the 296 # "end of central directory" structure should be the last item in the 297 # file if this is the case). 298 try: 299 fpin.seek(-sizeEndCentDir, 2) 300 except OSError: 301 return None 302 data = fpin.read() 303 if (len(data) == sizeEndCentDir and 304 data[0:4] == stringEndArchive and 305 data[-2:] == b"\000\000"): 306 # the signature is correct and there's no comment, unpack structure 307 endrec = struct.unpack(structEndArchive, data) 308 endrec=list(endrec) 309 310 # Append a blank comment and record start offset 311 endrec.append(b"") 312 endrec.append(filesize - sizeEndCentDir) 313 314 # Try to read the "Zip64 end of central directory" structure 315 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 316 317 # Either this is not a ZIP file, or it is a ZIP file with an archive 318 # comment. Search the end of the file for the "end of central directory" 319 # record signature. The comment is the last item in the ZIP file and may be 320 # up to 64K long. It is assumed that the "end of central directory" magic 321 # number does not appear in the comment. 322 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 323 fpin.seek(maxCommentStart, 0) 324 data = fpin.read() 325 start = data.rfind(stringEndArchive) 326 if start >= 0: 327 # found the magic number; attempt to unpack and interpret 328 recData = data[start:start+sizeEndCentDir] 329 if len(recData) != sizeEndCentDir: 330 # Zip file is corrupted. 331 return None 332 endrec = list(struct.unpack(structEndArchive, recData)) 333 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 334 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 335 endrec.append(comment) 336 endrec.append(maxCommentStart + start) 337 338 # Try to read the "Zip64 end of central directory" structure 339 return _EndRecData64(fpin, maxCommentStart + start - filesize, 340 endrec) 341 342 # Unable to find a valid end of central directory structure 343 return None 344 345 346class ZipInfo (object): 347 """Class with attributes describing each file in the ZIP archive.""" 348 349 __slots__ = ( 350 'orig_filename', 351 'filename', 352 'date_time', 353 'compress_type', 354 '_compresslevel', 355 'comment', 356 'extra', 357 'create_system', 358 'create_version', 359 'extract_version', 360 'reserved', 361 'flag_bits', 362 'volume', 363 'internal_attr', 364 'external_attr', 365 'header_offset', 366 'CRC', 367 'compress_size', 368 'file_size', 369 '_raw_time', 370 '_end_offset', 371 ) 372 373 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 374 self.orig_filename = filename # Original file name in archive 375 376 # Terminate the file name at the first null byte. Null bytes in file 377 # names are used as tricks by viruses in archives. 378 null_byte = filename.find(chr(0)) 379 if null_byte >= 0: 380 filename = filename[0:null_byte] 381 # This is used to ensure paths in generated ZIP files always use 382 # forward slashes as the directory separator, as required by the 383 # ZIP format specification. 384 if os.sep != "/" and os.sep in filename: 385 filename = filename.replace(os.sep, "/") 386 387 self.filename = filename # Normalized file name 388 self.date_time = date_time # year, month, day, hour, min, sec 389 390 if date_time[0] < 1980: 391 raise ValueError('ZIP does not support timestamps before 1980') 392 393 # Standard values: 394 self.compress_type = ZIP_STORED # Type of compression for the file 395 self._compresslevel = None # Level for the compressor 396 self.comment = b"" # Comment for each file 397 self.extra = b"" # ZIP extra data 398 if sys.platform == 'win32': 399 self.create_system = 0 # System which created ZIP archive 400 else: 401 # Assume everything else is unix-y 402 self.create_system = 3 # System which created ZIP archive 403 self.create_version = DEFAULT_VERSION # Version which created ZIP archive 404 self.extract_version = DEFAULT_VERSION # Version needed to extract archive 405 self.reserved = 0 # Must be zero 406 self.flag_bits = 0 # ZIP flag bits 407 self.volume = 0 # Volume number of file header 408 self.internal_attr = 0 # Internal attributes 409 self.external_attr = 0 # External file attributes 410 self.compress_size = 0 # Size of the compressed file 411 self.file_size = 0 # Size of the uncompressed file 412 self._end_offset = None # Start of the next local header or central directory 413 # Other attributes are set by class ZipFile: 414 # header_offset Byte offset to the file header 415 # CRC CRC-32 of the uncompressed file 416 417 def __repr__(self): 418 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)] 419 if self.compress_type != ZIP_STORED: 420 result.append(' compress_type=%s' % 421 compressor_names.get(self.compress_type, 422 self.compress_type)) 423 hi = self.external_attr >> 16 424 lo = self.external_attr & 0xFFFF 425 if hi: 426 result.append(' filemode=%r' % stat.filemode(hi)) 427 if lo: 428 result.append(' external_attr=%#x' % lo) 429 isdir = self.is_dir() 430 if not isdir or self.file_size: 431 result.append(' file_size=%r' % self.file_size) 432 if ((not isdir or self.compress_size) and 433 (self.compress_type != ZIP_STORED or 434 self.file_size != self.compress_size)): 435 result.append(' compress_size=%r' % self.compress_size) 436 result.append('>') 437 return ''.join(result) 438 439 def FileHeader(self, zip64=None): 440 """Return the per-file header as a bytes object. 441 442 When the optional zip64 arg is None rather than a bool, we will 443 decide based upon the file_size and compress_size, if known, 444 False otherwise. 445 """ 446 dt = self.date_time 447 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 448 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 449 if self.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 450 # Set these to zero because we write them after the file data 451 CRC = compress_size = file_size = 0 452 else: 453 CRC = self.CRC 454 compress_size = self.compress_size 455 file_size = self.file_size 456 457 extra = self.extra 458 459 min_version = 0 460 if zip64 is None: 461 # We always explicitly pass zip64 within this module.... This 462 # remains for anyone using ZipInfo.FileHeader as a public API. 463 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 464 if zip64: 465 fmt = '<HHQQ' 466 extra = extra + struct.pack(fmt, 467 1, struct.calcsize(fmt)-4, file_size, compress_size) 468 file_size = 0xffffffff 469 compress_size = 0xffffffff 470 min_version = ZIP64_VERSION 471 472 if self.compress_type == ZIP_BZIP2: 473 min_version = max(BZIP2_VERSION, min_version) 474 elif self.compress_type == ZIP_LZMA: 475 min_version = max(LZMA_VERSION, min_version) 476 477 self.extract_version = max(min_version, self.extract_version) 478 self.create_version = max(min_version, self.create_version) 479 filename, flag_bits = self._encodeFilenameFlags() 480 header = struct.pack(structFileHeader, stringFileHeader, 481 self.extract_version, self.reserved, flag_bits, 482 self.compress_type, dostime, dosdate, CRC, 483 compress_size, file_size, 484 len(filename), len(extra)) 485 return header + filename + extra 486 487 def _encodeFilenameFlags(self): 488 try: 489 return self.filename.encode('ascii'), self.flag_bits 490 except UnicodeEncodeError: 491 return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME 492 493 def _decodeExtra(self): 494 # Try to decode the extra field. 495 extra = self.extra 496 unpack = struct.unpack 497 while len(extra) >= 4: 498 tp, ln = unpack('<HH', extra[:4]) 499 if ln+4 > len(extra): 500 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 501 if tp == 0x0001: 502 data = extra[4:ln+4] 503 # ZIP64 extension (large files and/or large archives) 504 try: 505 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF): 506 field = "File size" 507 self.file_size, = unpack('<Q', data[:8]) 508 data = data[8:] 509 if self.compress_size == 0xFFFF_FFFF: 510 field = "Compress size" 511 self.compress_size, = unpack('<Q', data[:8]) 512 data = data[8:] 513 if self.header_offset == 0xFFFF_FFFF: 514 field = "Header offset" 515 self.header_offset, = unpack('<Q', data[:8]) 516 except struct.error: 517 raise BadZipFile(f"Corrupt zip64 extra field. " 518 f"{field} not found.") from None 519 520 extra = extra[ln+4:] 521 522 @classmethod 523 def from_file(cls, filename, arcname=None, *, strict_timestamps=True): 524 """Construct an appropriate ZipInfo for a file on the filesystem. 525 526 filename should be the path to a file or directory on the filesystem. 527 528 arcname is the name which it will have within the archive (by default, 529 this will be the same as filename, but without a drive letter and with 530 leading path separators removed). 531 """ 532 if isinstance(filename, os.PathLike): 533 filename = os.fspath(filename) 534 st = os.stat(filename) 535 isdir = stat.S_ISDIR(st.st_mode) 536 mtime = time.localtime(st.st_mtime) 537 date_time = mtime[0:6] 538 if not strict_timestamps and date_time[0] < 1980: 539 date_time = (1980, 1, 1, 0, 0, 0) 540 elif not strict_timestamps and date_time[0] > 2107: 541 date_time = (2107, 12, 31, 23, 59, 59) 542 # Create ZipInfo instance to store file information 543 if arcname is None: 544 arcname = filename 545 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 546 while arcname[0] in (os.sep, os.altsep): 547 arcname = arcname[1:] 548 if isdir: 549 arcname += '/' 550 zinfo = cls(arcname, date_time) 551 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes 552 if isdir: 553 zinfo.file_size = 0 554 zinfo.external_attr |= 0x10 # MS-DOS directory flag 555 else: 556 zinfo.file_size = st.st_size 557 558 return zinfo 559 560 def is_dir(self): 561 """Return True if this archive member is a directory.""" 562 return self.filename[-1] == '/' 563 564 565# ZIP encryption uses the CRC32 one-byte primitive for scrambling some 566# internal keys. We noticed that a direct implementation is faster than 567# relying on binascii.crc32(). 568 569_crctable = None 570def _gen_crc(crc): 571 for j in range(8): 572 if crc & 1: 573 crc = (crc >> 1) ^ 0xEDB88320 574 else: 575 crc >>= 1 576 return crc 577 578# ZIP supports a password-based form of encryption. Even though known 579# plaintext attacks have been found against it, it is still useful 580# to be able to get data out of such a file. 581# 582# Usage: 583# zd = _ZipDecrypter(mypwd) 584# plain_bytes = zd(cypher_bytes) 585 586def _ZipDecrypter(pwd): 587 key0 = 305419896 588 key1 = 591751049 589 key2 = 878082192 590 591 global _crctable 592 if _crctable is None: 593 _crctable = list(map(_gen_crc, range(256))) 594 crctable = _crctable 595 596 def crc32(ch, crc): 597 """Compute the CRC32 primitive on one byte.""" 598 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] 599 600 def update_keys(c): 601 nonlocal key0, key1, key2 602 key0 = crc32(c, key0) 603 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF 604 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF 605 key2 = crc32(key1 >> 24, key2) 606 607 for p in pwd: 608 update_keys(p) 609 610 def decrypter(data): 611 """Decrypt a bytes object.""" 612 result = bytearray() 613 append = result.append 614 for c in data: 615 k = key2 | 2 616 c ^= ((k * (k^1)) >> 8) & 0xFF 617 update_keys(c) 618 append(c) 619 return bytes(result) 620 621 return decrypter 622 623 624class LZMACompressor: 625 626 def __init__(self): 627 self._comp = None 628 629 def _init(self): 630 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1}) 631 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ 632 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props) 633 ]) 634 return struct.pack('<BBH', 9, 4, len(props)) + props 635 636 def compress(self, data): 637 if self._comp is None: 638 return self._init() + self._comp.compress(data) 639 return self._comp.compress(data) 640 641 def flush(self): 642 if self._comp is None: 643 return self._init() + self._comp.flush() 644 return self._comp.flush() 645 646 647class LZMADecompressor: 648 649 def __init__(self): 650 self._decomp = None 651 self._unconsumed = b'' 652 self.eof = False 653 654 def decompress(self, data): 655 if self._decomp is None: 656 self._unconsumed += data 657 if len(self._unconsumed) <= 4: 658 return b'' 659 psize, = struct.unpack('<H', self._unconsumed[2:4]) 660 if len(self._unconsumed) <= 4 + psize: 661 return b'' 662 663 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[ 664 lzma._decode_filter_properties(lzma.FILTER_LZMA1, 665 self._unconsumed[4:4 + psize]) 666 ]) 667 data = self._unconsumed[4 + psize:] 668 del self._unconsumed 669 670 result = self._decomp.decompress(data) 671 self.eof = self._decomp.eof 672 return result 673 674 675compressor_names = { 676 0: 'store', 677 1: 'shrink', 678 2: 'reduce', 679 3: 'reduce', 680 4: 'reduce', 681 5: 'reduce', 682 6: 'implode', 683 7: 'tokenize', 684 8: 'deflate', 685 9: 'deflate64', 686 10: 'implode', 687 12: 'bzip2', 688 14: 'lzma', 689 18: 'terse', 690 19: 'lz77', 691 97: 'wavpack', 692 98: 'ppmd', 693} 694 695def _check_compression(compression): 696 if compression == ZIP_STORED: 697 pass 698 elif compression == ZIP_DEFLATED: 699 if not zlib: 700 raise RuntimeError( 701 "Compression requires the (missing) zlib module") 702 elif compression == ZIP_BZIP2: 703 if not bz2: 704 raise RuntimeError( 705 "Compression requires the (missing) bz2 module") 706 elif compression == ZIP_LZMA: 707 if not lzma: 708 raise RuntimeError( 709 "Compression requires the (missing) lzma module") 710 else: 711 raise NotImplementedError("That compression method is not supported") 712 713 714def _get_compressor(compress_type, compresslevel=None): 715 if compress_type == ZIP_DEFLATED: 716 if compresslevel is not None: 717 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15) 718 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) 719 elif compress_type == ZIP_BZIP2: 720 if compresslevel is not None: 721 return bz2.BZ2Compressor(compresslevel) 722 return bz2.BZ2Compressor() 723 # compresslevel is ignored for ZIP_LZMA 724 elif compress_type == ZIP_LZMA: 725 return LZMACompressor() 726 else: 727 return None 728 729 730def _get_decompressor(compress_type): 731 _check_compression(compress_type) 732 if compress_type == ZIP_STORED: 733 return None 734 elif compress_type == ZIP_DEFLATED: 735 return zlib.decompressobj(-15) 736 elif compress_type == ZIP_BZIP2: 737 return bz2.BZ2Decompressor() 738 elif compress_type == ZIP_LZMA: 739 return LZMADecompressor() 740 else: 741 descr = compressor_names.get(compress_type) 742 if descr: 743 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr)) 744 else: 745 raise NotImplementedError("compression type %d" % (compress_type,)) 746 747 748class _SharedFile: 749 def __init__(self, file, pos, close, lock, writing): 750 self._file = file 751 self._pos = pos 752 self._close = close 753 self._lock = lock 754 self._writing = writing 755 self.seekable = file.seekable 756 757 def tell(self): 758 return self._pos 759 760 def seek(self, offset, whence=0): 761 with self._lock: 762 if self._writing(): 763 raise ValueError("Can't reposition in the ZIP file while " 764 "there is an open writing handle on it. " 765 "Close the writing handle before trying to read.") 766 self._file.seek(offset, whence) 767 self._pos = self._file.tell() 768 return self._pos 769 770 def read(self, n=-1): 771 with self._lock: 772 if self._writing(): 773 raise ValueError("Can't read from the ZIP file while there " 774 "is an open writing handle on it. " 775 "Close the writing handle before trying to read.") 776 self._file.seek(self._pos) 777 data = self._file.read(n) 778 self._pos = self._file.tell() 779 return data 780 781 def close(self): 782 if self._file is not None: 783 fileobj = self._file 784 self._file = None 785 self._close(fileobj) 786 787# Provide the tell method for unseekable stream 788class _Tellable: 789 def __init__(self, fp): 790 self.fp = fp 791 self.offset = 0 792 793 def write(self, data): 794 n = self.fp.write(data) 795 self.offset += n 796 return n 797 798 def tell(self): 799 return self.offset 800 801 def flush(self): 802 self.fp.flush() 803 804 def close(self): 805 self.fp.close() 806 807 808class ZipExtFile(io.BufferedIOBase): 809 """File-like object for reading an archive member. 810 Is returned by ZipFile.open(). 811 """ 812 813 # Max size supported by decompressor. 814 MAX_N = 1 << 31 - 1 815 816 # Read from compressed files in 4k blocks. 817 MIN_READ_SIZE = 4096 818 819 # Chunk size to read during seek 820 MAX_SEEK_READ = 1 << 24 821 822 def __init__(self, fileobj, mode, zipinfo, pwd=None, 823 close_fileobj=False): 824 self._fileobj = fileobj 825 self._pwd = pwd 826 self._close_fileobj = close_fileobj 827 828 self._compress_type = zipinfo.compress_type 829 self._compress_left = zipinfo.compress_size 830 self._left = zipinfo.file_size 831 832 self._decompressor = _get_decompressor(self._compress_type) 833 834 self._eof = False 835 self._readbuffer = b'' 836 self._offset = 0 837 838 self.newlines = None 839 840 self.mode = mode 841 self.name = zipinfo.filename 842 843 if hasattr(zipinfo, 'CRC'): 844 self._expected_crc = zipinfo.CRC 845 self._running_crc = crc32(b'') 846 else: 847 self._expected_crc = None 848 849 self._seekable = False 850 try: 851 if fileobj.seekable(): 852 self._orig_compress_start = fileobj.tell() 853 self._orig_compress_size = zipinfo.compress_size 854 self._orig_file_size = zipinfo.file_size 855 self._orig_start_crc = self._running_crc 856 self._seekable = True 857 except AttributeError: 858 pass 859 860 self._decrypter = None 861 if pwd: 862 if zipinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 863 # compare against the file type from extended local headers 864 check_byte = (zipinfo._raw_time >> 8) & 0xff 865 else: 866 # compare against the CRC otherwise 867 check_byte = (zipinfo.CRC >> 24) & 0xff 868 h = self._init_decrypter() 869 if h != check_byte: 870 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename) 871 872 873 def _init_decrypter(self): 874 self._decrypter = _ZipDecrypter(self._pwd) 875 # The first 12 bytes in the cypher stream is an encryption header 876 # used to strengthen the algorithm. The first 11 bytes are 877 # completely random, while the 12th contains the MSB of the CRC, 878 # or the MSB of the file time depending on the header type 879 # and is used to check the correctness of the password. 880 header = self._fileobj.read(12) 881 self._compress_left -= 12 882 return self._decrypter(header)[11] 883 884 def __repr__(self): 885 result = ['<%s.%s' % (self.__class__.__module__, 886 self.__class__.__qualname__)] 887 if not self.closed: 888 result.append(' name=%r mode=%r' % (self.name, self.mode)) 889 if self._compress_type != ZIP_STORED: 890 result.append(' compress_type=%s' % 891 compressor_names.get(self._compress_type, 892 self._compress_type)) 893 else: 894 result.append(' [closed]') 895 result.append('>') 896 return ''.join(result) 897 898 def readline(self, limit=-1): 899 """Read and return a line from the stream. 900 901 If limit is specified, at most limit bytes will be read. 902 """ 903 904 if limit < 0: 905 # Shortcut common case - newline found in buffer. 906 i = self._readbuffer.find(b'\n', self._offset) + 1 907 if i > 0: 908 line = self._readbuffer[self._offset: i] 909 self._offset = i 910 return line 911 912 return io.BufferedIOBase.readline(self, limit) 913 914 def peek(self, n=1): 915 """Returns buffered bytes without advancing the position.""" 916 if n > len(self._readbuffer) - self._offset: 917 chunk = self.read(n) 918 if len(chunk) > self._offset: 919 self._readbuffer = chunk + self._readbuffer[self._offset:] 920 self._offset = 0 921 else: 922 self._offset -= len(chunk) 923 924 # Return up to 512 bytes to reduce allocation overhead for tight loops. 925 return self._readbuffer[self._offset: self._offset + 512] 926 927 def readable(self): 928 if self.closed: 929 raise ValueError("I/O operation on closed file.") 930 return True 931 932 def read(self, n=-1): 933 """Read and return up to n bytes. 934 If the argument is omitted, None, or negative, data is read and returned until EOF is reached. 935 """ 936 if self.closed: 937 raise ValueError("read from closed file.") 938 if n is None or n < 0: 939 buf = self._readbuffer[self._offset:] 940 self._readbuffer = b'' 941 self._offset = 0 942 while not self._eof: 943 buf += self._read1(self.MAX_N) 944 return buf 945 946 end = n + self._offset 947 if end < len(self._readbuffer): 948 buf = self._readbuffer[self._offset:end] 949 self._offset = end 950 return buf 951 952 n = end - len(self._readbuffer) 953 buf = self._readbuffer[self._offset:] 954 self._readbuffer = b'' 955 self._offset = 0 956 while n > 0 and not self._eof: 957 data = self._read1(n) 958 if n < len(data): 959 self._readbuffer = data 960 self._offset = n 961 buf += data[:n] 962 break 963 buf += data 964 n -= len(data) 965 return buf 966 967 def _update_crc(self, newdata): 968 # Update the CRC using the given data. 969 if self._expected_crc is None: 970 # No need to compute the CRC if we don't have a reference value 971 return 972 self._running_crc = crc32(newdata, self._running_crc) 973 # Check the CRC if we're at the end of the file 974 if self._eof and self._running_crc != self._expected_crc: 975 raise BadZipFile("Bad CRC-32 for file %r" % self.name) 976 977 def read1(self, n): 978 """Read up to n bytes with at most one read() system call.""" 979 980 if n is None or n < 0: 981 buf = self._readbuffer[self._offset:] 982 self._readbuffer = b'' 983 self._offset = 0 984 while not self._eof: 985 data = self._read1(self.MAX_N) 986 if data: 987 buf += data 988 break 989 return buf 990 991 end = n + self._offset 992 if end < len(self._readbuffer): 993 buf = self._readbuffer[self._offset:end] 994 self._offset = end 995 return buf 996 997 n = end - len(self._readbuffer) 998 buf = self._readbuffer[self._offset:] 999 self._readbuffer = b'' 1000 self._offset = 0 1001 if n > 0: 1002 while not self._eof: 1003 data = self._read1(n) 1004 if n < len(data): 1005 self._readbuffer = data 1006 self._offset = n 1007 buf += data[:n] 1008 break 1009 if data: 1010 buf += data 1011 break 1012 return buf 1013 1014 def _read1(self, n): 1015 # Read up to n compressed bytes with at most one read() system call, 1016 # decrypt and decompress them. 1017 if self._eof or n <= 0: 1018 return b'' 1019 1020 # Read from file. 1021 if self._compress_type == ZIP_DEFLATED: 1022 ## Handle unconsumed data. 1023 data = self._decompressor.unconsumed_tail 1024 if n > len(data): 1025 data += self._read2(n - len(data)) 1026 else: 1027 data = self._read2(n) 1028 1029 if self._compress_type == ZIP_STORED: 1030 self._eof = self._compress_left <= 0 1031 elif self._compress_type == ZIP_DEFLATED: 1032 n = max(n, self.MIN_READ_SIZE) 1033 data = self._decompressor.decompress(data, n) 1034 self._eof = (self._decompressor.eof or 1035 self._compress_left <= 0 and 1036 not self._decompressor.unconsumed_tail) 1037 if self._eof: 1038 data += self._decompressor.flush() 1039 else: 1040 data = self._decompressor.decompress(data) 1041 self._eof = self._decompressor.eof or self._compress_left <= 0 1042 1043 data = data[:self._left] 1044 self._left -= len(data) 1045 if self._left <= 0: 1046 self._eof = True 1047 self._update_crc(data) 1048 return data 1049 1050 def _read2(self, n): 1051 if self._compress_left <= 0: 1052 return b'' 1053 1054 n = max(n, self.MIN_READ_SIZE) 1055 n = min(n, self._compress_left) 1056 1057 data = self._fileobj.read(n) 1058 self._compress_left -= len(data) 1059 if not data: 1060 raise EOFError 1061 1062 if self._decrypter is not None: 1063 data = self._decrypter(data) 1064 return data 1065 1066 def close(self): 1067 try: 1068 if self._close_fileobj: 1069 self._fileobj.close() 1070 finally: 1071 super().close() 1072 1073 def seekable(self): 1074 if self.closed: 1075 raise ValueError("I/O operation on closed file.") 1076 return self._seekable 1077 1078 def seek(self, offset, whence=0): 1079 if self.closed: 1080 raise ValueError("seek on closed file.") 1081 if not self._seekable: 1082 raise io.UnsupportedOperation("underlying stream is not seekable") 1083 curr_pos = self.tell() 1084 if whence == 0: # Seek from start of file 1085 new_pos = offset 1086 elif whence == 1: # Seek from current position 1087 new_pos = curr_pos + offset 1088 elif whence == 2: # Seek from EOF 1089 new_pos = self._orig_file_size + offset 1090 else: 1091 raise ValueError("whence must be os.SEEK_SET (0), " 1092 "os.SEEK_CUR (1), or os.SEEK_END (2)") 1093 1094 if new_pos > self._orig_file_size: 1095 new_pos = self._orig_file_size 1096 1097 if new_pos < 0: 1098 new_pos = 0 1099 1100 read_offset = new_pos - curr_pos 1101 buff_offset = read_offset + self._offset 1102 1103 if buff_offset >= 0 and buff_offset < len(self._readbuffer): 1104 # Just move the _offset index if the new position is in the _readbuffer 1105 self._offset = buff_offset 1106 read_offset = 0 1107 elif read_offset < 0: 1108 # Position is before the current position. Reset the ZipExtFile 1109 self._fileobj.seek(self._orig_compress_start) 1110 self._running_crc = self._orig_start_crc 1111 self._compress_left = self._orig_compress_size 1112 self._left = self._orig_file_size 1113 self._readbuffer = b'' 1114 self._offset = 0 1115 self._decompressor = _get_decompressor(self._compress_type) 1116 self._eof = False 1117 read_offset = new_pos 1118 if self._decrypter is not None: 1119 self._init_decrypter() 1120 1121 while read_offset > 0: 1122 read_len = min(self.MAX_SEEK_READ, read_offset) 1123 self.read(read_len) 1124 read_offset -= read_len 1125 1126 return self.tell() 1127 1128 def tell(self): 1129 if self.closed: 1130 raise ValueError("tell on closed file.") 1131 if not self._seekable: 1132 raise io.UnsupportedOperation("underlying stream is not seekable") 1133 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset 1134 return filepos 1135 1136 1137class _ZipWriteFile(io.BufferedIOBase): 1138 def __init__(self, zf, zinfo, zip64): 1139 self._zinfo = zinfo 1140 self._zip64 = zip64 1141 self._zipfile = zf 1142 self._compressor = _get_compressor(zinfo.compress_type, 1143 zinfo._compresslevel) 1144 self._file_size = 0 1145 self._compress_size = 0 1146 self._crc = 0 1147 1148 @property 1149 def _fileobj(self): 1150 return self._zipfile.fp 1151 1152 def writable(self): 1153 return True 1154 1155 def write(self, data): 1156 if self.closed: 1157 raise ValueError('I/O operation on closed file.') 1158 1159 # Accept any data that supports the buffer protocol 1160 if isinstance(data, (bytes, bytearray)): 1161 nbytes = len(data) 1162 else: 1163 data = memoryview(data) 1164 nbytes = data.nbytes 1165 self._file_size += nbytes 1166 1167 self._crc = crc32(data, self._crc) 1168 if self._compressor: 1169 data = self._compressor.compress(data) 1170 self._compress_size += len(data) 1171 self._fileobj.write(data) 1172 return nbytes 1173 1174 def close(self): 1175 if self.closed: 1176 return 1177 try: 1178 super().close() 1179 # Flush any data from the compressor, and update header info 1180 if self._compressor: 1181 buf = self._compressor.flush() 1182 self._compress_size += len(buf) 1183 self._fileobj.write(buf) 1184 self._zinfo.compress_size = self._compress_size 1185 else: 1186 self._zinfo.compress_size = self._file_size 1187 self._zinfo.CRC = self._crc 1188 self._zinfo.file_size = self._file_size 1189 1190 if not self._zip64: 1191 if self._file_size > ZIP64_LIMIT: 1192 raise RuntimeError("File size too large, try using force_zip64") 1193 if self._compress_size > ZIP64_LIMIT: 1194 raise RuntimeError("Compressed size too large, try using force_zip64") 1195 1196 # Write updated header info 1197 if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 1198 # Write CRC and file sizes after the file data 1199 fmt = '<LLQQ' if self._zip64 else '<LLLL' 1200 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC, 1201 self._zinfo.compress_size, self._zinfo.file_size)) 1202 self._zipfile.start_dir = self._fileobj.tell() 1203 else: 1204 # Seek backwards and write file header (which will now include 1205 # correct CRC and file sizes) 1206 1207 # Preserve current position in file 1208 self._zipfile.start_dir = self._fileobj.tell() 1209 self._fileobj.seek(self._zinfo.header_offset) 1210 self._fileobj.write(self._zinfo.FileHeader(self._zip64)) 1211 self._fileobj.seek(self._zipfile.start_dir) 1212 1213 # Successfully written: Add file to our caches 1214 self._zipfile.filelist.append(self._zinfo) 1215 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo 1216 finally: 1217 self._zipfile._writing = False 1218 1219 1220 1221class ZipFile: 1222 """ Class with methods to open, read, write, close, list zip files. 1223 1224 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, 1225 compresslevel=None) 1226 1227 file: Either the path to the file, or a file-like object. 1228 If it is a path, the file will be opened and closed by ZipFile. 1229 mode: The mode can be either read 'r', write 'w', exclusive create 'x', 1230 or append 'a'. 1231 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), 1232 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). 1233 allowZip64: if True ZipFile will create files with ZIP64 extensions when 1234 needed, otherwise it will raise an exception when this would 1235 be necessary. 1236 compresslevel: None (default for the given compression type) or an integer 1237 specifying the level to pass to the compressor. 1238 When using ZIP_STORED or ZIP_LZMA this keyword has no effect. 1239 When using ZIP_DEFLATED integers 0 through 9 are accepted. 1240 When using ZIP_BZIP2 integers 1 through 9 are accepted. 1241 1242 """ 1243 1244 fp = None # Set here since __del__ checks it 1245 _windows_illegal_name_trans_table = None 1246 1247 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, 1248 compresslevel=None, *, strict_timestamps=True, metadata_encoding=None): 1249 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', 1250 or append 'a'.""" 1251 if mode not in ('r', 'w', 'x', 'a'): 1252 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") 1253 1254 _check_compression(compression) 1255 1256 self._allowZip64 = allowZip64 1257 self._didModify = False 1258 self.debug = 0 # Level of printing: 0 through 3 1259 self.NameToInfo = {} # Find file info given name 1260 self.filelist = [] # List of ZipInfo instances for archive 1261 self.compression = compression # Method of compression 1262 self.compresslevel = compresslevel 1263 self.mode = mode 1264 self.pwd = None 1265 self._comment = b'' 1266 self._strict_timestamps = strict_timestamps 1267 self.metadata_encoding = metadata_encoding 1268 1269 # Check that we don't try to write with nonconforming codecs 1270 if self.metadata_encoding and mode != 'r': 1271 raise ValueError( 1272 "metadata_encoding is only supported for reading files") 1273 1274 # Check if we were passed a file-like object 1275 if isinstance(file, os.PathLike): 1276 file = os.fspath(file) 1277 if isinstance(file, str): 1278 # No, it's a filename 1279 self._filePassed = 0 1280 self.filename = file 1281 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b', 1282 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'} 1283 filemode = modeDict[mode] 1284 while True: 1285 try: 1286 self.fp = io.open(file, filemode) 1287 except OSError: 1288 if filemode in modeDict: 1289 filemode = modeDict[filemode] 1290 continue 1291 raise 1292 break 1293 else: 1294 self._filePassed = 1 1295 self.fp = file 1296 self.filename = getattr(file, 'name', None) 1297 self._fileRefCnt = 1 1298 self._lock = threading.RLock() 1299 self._seekable = True 1300 self._writing = False 1301 1302 try: 1303 if mode == 'r': 1304 self._RealGetContents() 1305 elif mode in ('w', 'x'): 1306 # set the modified flag so central directory gets written 1307 # even if no files are added to the archive 1308 self._didModify = True 1309 try: 1310 self.start_dir = self.fp.tell() 1311 except (AttributeError, OSError): 1312 self.fp = _Tellable(self.fp) 1313 self.start_dir = 0 1314 self._seekable = False 1315 else: 1316 # Some file-like objects can provide tell() but not seek() 1317 try: 1318 self.fp.seek(self.start_dir) 1319 except (AttributeError, OSError): 1320 self._seekable = False 1321 elif mode == 'a': 1322 try: 1323 # See if file is a zip file 1324 self._RealGetContents() 1325 # seek to start of directory and overwrite 1326 self.fp.seek(self.start_dir) 1327 except BadZipFile: 1328 # file is not a zip file, just append 1329 self.fp.seek(0, 2) 1330 1331 # set the modified flag so central directory gets written 1332 # even if no files are added to the archive 1333 self._didModify = True 1334 self.start_dir = self.fp.tell() 1335 else: 1336 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") 1337 except: 1338 fp = self.fp 1339 self.fp = None 1340 self._fpclose(fp) 1341 raise 1342 1343 def __enter__(self): 1344 return self 1345 1346 def __exit__(self, type, value, traceback): 1347 self.close() 1348 1349 def __repr__(self): 1350 result = ['<%s.%s' % (self.__class__.__module__, 1351 self.__class__.__qualname__)] 1352 if self.fp is not None: 1353 if self._filePassed: 1354 result.append(' file=%r' % self.fp) 1355 elif self.filename is not None: 1356 result.append(' filename=%r' % self.filename) 1357 result.append(' mode=%r' % self.mode) 1358 else: 1359 result.append(' [closed]') 1360 result.append('>') 1361 return ''.join(result) 1362 1363 def _RealGetContents(self): 1364 """Read in the table of contents for the ZIP file.""" 1365 fp = self.fp 1366 try: 1367 endrec = _EndRecData(fp) 1368 except OSError: 1369 raise BadZipFile("File is not a zip file") 1370 if not endrec: 1371 raise BadZipFile("File is not a zip file") 1372 if self.debug > 1: 1373 print(endrec) 1374 size_cd = endrec[_ECD_SIZE] # bytes in central directory 1375 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 1376 self._comment = endrec[_ECD_COMMENT] # archive comment 1377 1378 # "concat" is zero, unless zip was concatenated to another file 1379 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 1380 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 1381 # If Zip64 extension structures are present, account for them 1382 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 1383 1384 if self.debug > 2: 1385 inferred = concat + offset_cd 1386 print("given, inferred, offset", offset_cd, inferred, concat) 1387 # self.start_dir: Position of start of central directory 1388 self.start_dir = offset_cd + concat 1389 if self.start_dir < 0: 1390 raise BadZipFile("Bad offset for central directory") 1391 fp.seek(self.start_dir, 0) 1392 data = fp.read(size_cd) 1393 fp = io.BytesIO(data) 1394 total = 0 1395 while total < size_cd: 1396 centdir = fp.read(sizeCentralDir) 1397 if len(centdir) != sizeCentralDir: 1398 raise BadZipFile("Truncated central directory") 1399 centdir = struct.unpack(structCentralDir, centdir) 1400 if centdir[_CD_SIGNATURE] != stringCentralDir: 1401 raise BadZipFile("Bad magic number for central directory") 1402 if self.debug > 2: 1403 print(centdir) 1404 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 1405 flags = centdir[_CD_FLAG_BITS] 1406 if flags & _MASK_UTF_FILENAME: 1407 # UTF-8 file names extension 1408 filename = filename.decode('utf-8') 1409 else: 1410 # Historical ZIP filename encoding 1411 filename = filename.decode(self.metadata_encoding or 'cp437') 1412 # Create ZipInfo instance to store file information 1413 x = ZipInfo(filename) 1414 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 1415 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 1416 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 1417 (x.create_version, x.create_system, x.extract_version, x.reserved, 1418 x.flag_bits, x.compress_type, t, d, 1419 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 1420 if x.extract_version > MAX_EXTRACT_VERSION: 1421 raise NotImplementedError("zip file version %.1f" % 1422 (x.extract_version / 10)) 1423 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 1424 # Convert date/time code to (year, month, day, hour, min, sec) 1425 x._raw_time = t 1426 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 1427 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 1428 1429 x._decodeExtra() 1430 x.header_offset = x.header_offset + concat 1431 self.filelist.append(x) 1432 self.NameToInfo[x.filename] = x 1433 1434 # update total bytes read from central directory 1435 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 1436 + centdir[_CD_EXTRA_FIELD_LENGTH] 1437 + centdir[_CD_COMMENT_LENGTH]) 1438 1439 if self.debug > 2: 1440 print("total", total) 1441 1442 end_offset = self.start_dir 1443 for zinfo in sorted(self.filelist, 1444 key=lambda zinfo: zinfo.header_offset, 1445 reverse=True): 1446 zinfo._end_offset = end_offset 1447 end_offset = zinfo.header_offset 1448 1449 def namelist(self): 1450 """Return a list of file names in the archive.""" 1451 return [data.filename for data in self.filelist] 1452 1453 def infolist(self): 1454 """Return a list of class ZipInfo instances for files in the 1455 archive.""" 1456 return self.filelist 1457 1458 def printdir(self, file=None): 1459 """Print a table of contents for the zip file.""" 1460 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), 1461 file=file) 1462 for zinfo in self.filelist: 1463 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 1464 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), 1465 file=file) 1466 1467 def testzip(self): 1468 """Read all the files and check the CRC.""" 1469 chunk_size = 2 ** 20 1470 for zinfo in self.filelist: 1471 try: 1472 # Read by chunks, to avoid an OverflowError or a 1473 # MemoryError with very large embedded files. 1474 with self.open(zinfo.filename, "r") as f: 1475 while f.read(chunk_size): # Check CRC-32 1476 pass 1477 except BadZipFile: 1478 return zinfo.filename 1479 1480 def getinfo(self, name): 1481 """Return the instance of ZipInfo given 'name'.""" 1482 info = self.NameToInfo.get(name) 1483 if info is None: 1484 raise KeyError( 1485 'There is no item named %r in the archive' % name) 1486 1487 return info 1488 1489 def setpassword(self, pwd): 1490 """Set default password for encrypted files.""" 1491 if pwd and not isinstance(pwd, bytes): 1492 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1493 if pwd: 1494 self.pwd = pwd 1495 else: 1496 self.pwd = None 1497 1498 @property 1499 def comment(self): 1500 """The comment text associated with the ZIP file.""" 1501 return self._comment 1502 1503 @comment.setter 1504 def comment(self, comment): 1505 if not isinstance(comment, bytes): 1506 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) 1507 # check for valid comment length 1508 if len(comment) > ZIP_MAX_COMMENT: 1509 import warnings 1510 warnings.warn('Archive comment is too long; truncating to %d bytes' 1511 % ZIP_MAX_COMMENT, stacklevel=2) 1512 comment = comment[:ZIP_MAX_COMMENT] 1513 self._comment = comment 1514 self._didModify = True 1515 1516 def read(self, name, pwd=None): 1517 """Return file bytes for name.""" 1518 with self.open(name, "r", pwd) as fp: 1519 return fp.read() 1520 1521 def open(self, name, mode="r", pwd=None, *, force_zip64=False): 1522 """Return file-like object for 'name'. 1523 1524 name is a string for the file name within the ZIP file, or a ZipInfo 1525 object. 1526 1527 mode should be 'r' to read a file already in the ZIP file, or 'w' to 1528 write to a file newly added to the archive. 1529 1530 pwd is the password to decrypt files (only used for reading). 1531 1532 When writing, if the file size is not known in advance but may exceed 1533 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large 1534 files. If the size is known in advance, it is best to pass a ZipInfo 1535 instance for name, with zinfo.file_size set. 1536 """ 1537 if mode not in {"r", "w"}: 1538 raise ValueError('open() requires mode "r" or "w"') 1539 if pwd and (mode == "w"): 1540 raise ValueError("pwd is only supported for reading files") 1541 if not self.fp: 1542 raise ValueError( 1543 "Attempt to use ZIP archive that was already closed") 1544 1545 # Make sure we have an info object 1546 if isinstance(name, ZipInfo): 1547 # 'name' is already an info object 1548 zinfo = name 1549 elif mode == 'w': 1550 zinfo = ZipInfo(name) 1551 zinfo.compress_type = self.compression 1552 zinfo._compresslevel = self.compresslevel 1553 else: 1554 # Get info object for name 1555 zinfo = self.getinfo(name) 1556 1557 if mode == 'w': 1558 return self._open_to_write(zinfo, force_zip64=force_zip64) 1559 1560 if self._writing: 1561 raise ValueError("Can't read from the ZIP file while there " 1562 "is an open writing handle on it. " 1563 "Close the writing handle before trying to read.") 1564 1565 # Open for reading: 1566 self._fileRefCnt += 1 1567 zef_file = _SharedFile(self.fp, zinfo.header_offset, 1568 self._fpclose, self._lock, lambda: self._writing) 1569 try: 1570 # Skip the file header: 1571 fheader = zef_file.read(sizeFileHeader) 1572 if len(fheader) != sizeFileHeader: 1573 raise BadZipFile("Truncated file header") 1574 fheader = struct.unpack(structFileHeader, fheader) 1575 if fheader[_FH_SIGNATURE] != stringFileHeader: 1576 raise BadZipFile("Bad magic number for file header") 1577 1578 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 1579 if fheader[_FH_EXTRA_FIELD_LENGTH]: 1580 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 1581 1582 if zinfo.flag_bits & _MASK_COMPRESSED_PATCH: 1583 # Zip 2.7: compressed patched data 1584 raise NotImplementedError("compressed patched data (flag bit 5)") 1585 1586 if zinfo.flag_bits & _MASK_STRONG_ENCRYPTION: 1587 # strong encryption 1588 raise NotImplementedError("strong encryption (flag bit 6)") 1589 1590 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & _MASK_UTF_FILENAME: 1591 # UTF-8 filename 1592 fname_str = fname.decode("utf-8") 1593 else: 1594 fname_str = fname.decode(self.metadata_encoding or "cp437") 1595 1596 if fname_str != zinfo.orig_filename: 1597 raise BadZipFile( 1598 'File name in directory %r and header %r differ.' 1599 % (zinfo.orig_filename, fname)) 1600 1601 if (zinfo._end_offset is not None and 1602 zef_file.tell() + zinfo.compress_size > zinfo._end_offset): 1603 raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)") 1604 1605 # check for encrypted flag & handle password 1606 is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED 1607 if is_encrypted: 1608 if not pwd: 1609 pwd = self.pwd 1610 if pwd and not isinstance(pwd, bytes): 1611 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1612 if not pwd: 1613 raise RuntimeError("File %r is encrypted, password " 1614 "required for extraction" % name) 1615 else: 1616 pwd = None 1617 1618 return ZipExtFile(zef_file, mode, zinfo, pwd, True) 1619 except: 1620 zef_file.close() 1621 raise 1622 1623 def _open_to_write(self, zinfo, force_zip64=False): 1624 if force_zip64 and not self._allowZip64: 1625 raise ValueError( 1626 "force_zip64 is True, but allowZip64 was False when opening " 1627 "the ZIP file." 1628 ) 1629 if self._writing: 1630 raise ValueError("Can't write to the ZIP file while there is " 1631 "another write handle open on it. " 1632 "Close the first handle before opening another.") 1633 1634 # Size and CRC are overwritten with correct data after processing the file 1635 zinfo.compress_size = 0 1636 zinfo.CRC = 0 1637 1638 zinfo.flag_bits = 0x00 1639 if zinfo.compress_type == ZIP_LZMA: 1640 # Compressed data includes an end-of-stream (EOS) marker 1641 zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1 1642 if not self._seekable: 1643 zinfo.flag_bits |= _MASK_USE_DATA_DESCRIPTOR 1644 1645 if not zinfo.external_attr: 1646 zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- 1647 1648 # Compressed size can be larger than uncompressed size 1649 zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT) 1650 if not self._allowZip64 and zip64: 1651 raise LargeZipFile("Filesize would require ZIP64 extensions") 1652 1653 if self._seekable: 1654 self.fp.seek(self.start_dir) 1655 zinfo.header_offset = self.fp.tell() 1656 1657 self._writecheck(zinfo) 1658 self._didModify = True 1659 1660 self.fp.write(zinfo.FileHeader(zip64)) 1661 1662 self._writing = True 1663 return _ZipWriteFile(self, zinfo, zip64) 1664 1665 def extract(self, member, path=None, pwd=None): 1666 """Extract a member from the archive to the current working directory, 1667 using its full name. Its file information is extracted as accurately 1668 as possible. `member' may be a filename or a ZipInfo object. You can 1669 specify a different directory using `path'. 1670 """ 1671 if path is None: 1672 path = os.getcwd() 1673 else: 1674 path = os.fspath(path) 1675 1676 return self._extract_member(member, path, pwd) 1677 1678 def extractall(self, path=None, members=None, pwd=None): 1679 """Extract all members from the archive to the current working 1680 directory. `path' specifies a different directory to extract to. 1681 `members' is optional and must be a subset of the list returned 1682 by namelist(). 1683 """ 1684 if members is None: 1685 members = self.namelist() 1686 1687 if path is None: 1688 path = os.getcwd() 1689 else: 1690 path = os.fspath(path) 1691 1692 for zipinfo in members: 1693 self._extract_member(zipinfo, path, pwd) 1694 1695 @classmethod 1696 def _sanitize_windows_name(cls, arcname, pathsep): 1697 """Replace bad characters and remove trailing dots from parts.""" 1698 table = cls._windows_illegal_name_trans_table 1699 if not table: 1700 illegal = ':<>|"?*' 1701 table = str.maketrans(illegal, '_' * len(illegal)) 1702 cls._windows_illegal_name_trans_table = table 1703 arcname = arcname.translate(table) 1704 # remove trailing dots 1705 arcname = (x.rstrip('.') for x in arcname.split(pathsep)) 1706 # rejoin, removing empty parts. 1707 arcname = pathsep.join(x for x in arcname if x) 1708 return arcname 1709 1710 def _extract_member(self, member, targetpath, pwd): 1711 """Extract the ZipInfo object 'member' to a physical 1712 file on the path targetpath. 1713 """ 1714 if not isinstance(member, ZipInfo): 1715 member = self.getinfo(member) 1716 1717 # build the destination pathname, replacing 1718 # forward slashes to platform specific separators. 1719 arcname = member.filename.replace('/', os.path.sep) 1720 1721 if os.path.altsep: 1722 arcname = arcname.replace(os.path.altsep, os.path.sep) 1723 # interpret absolute pathname as relative, remove drive letter or 1724 # UNC path, redundant separators, "." and ".." components. 1725 arcname = os.path.splitdrive(arcname)[1] 1726 invalid_path_parts = ('', os.path.curdir, os.path.pardir) 1727 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1728 if x not in invalid_path_parts) 1729 if os.path.sep == '\\': 1730 # filter illegal characters on Windows 1731 arcname = self._sanitize_windows_name(arcname, os.path.sep) 1732 1733 targetpath = os.path.join(targetpath, arcname) 1734 targetpath = os.path.normpath(targetpath) 1735 1736 # Create all upper directories if necessary. 1737 upperdirs = os.path.dirname(targetpath) 1738 if upperdirs and not os.path.exists(upperdirs): 1739 os.makedirs(upperdirs) 1740 1741 if member.is_dir(): 1742 if not os.path.isdir(targetpath): 1743 os.mkdir(targetpath) 1744 return targetpath 1745 1746 with self.open(member, pwd=pwd) as source, \ 1747 open(targetpath, "wb") as target: 1748 shutil.copyfileobj(source, target) 1749 1750 return targetpath 1751 1752 def _writecheck(self, zinfo): 1753 """Check for errors before writing a file to the archive.""" 1754 if zinfo.filename in self.NameToInfo: 1755 import warnings 1756 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) 1757 if self.mode not in ('w', 'x', 'a'): 1758 raise ValueError("write() requires mode 'w', 'x', or 'a'") 1759 if not self.fp: 1760 raise ValueError( 1761 "Attempt to write ZIP archive that was already closed") 1762 _check_compression(zinfo.compress_type) 1763 if not self._allowZip64: 1764 requires_zip64 = None 1765 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT: 1766 requires_zip64 = "Files count" 1767 elif zinfo.file_size > ZIP64_LIMIT: 1768 requires_zip64 = "Filesize" 1769 elif zinfo.header_offset > ZIP64_LIMIT: 1770 requires_zip64 = "Zipfile size" 1771 if requires_zip64: 1772 raise LargeZipFile(requires_zip64 + 1773 " would require ZIP64 extensions") 1774 1775 def write(self, filename, arcname=None, 1776 compress_type=None, compresslevel=None): 1777 """Put the bytes from filename into the archive under the name 1778 arcname.""" 1779 if not self.fp: 1780 raise ValueError( 1781 "Attempt to write to ZIP archive that was already closed") 1782 if self._writing: 1783 raise ValueError( 1784 "Can't write to ZIP archive while an open writing handle exists" 1785 ) 1786 1787 zinfo = ZipInfo.from_file(filename, arcname, 1788 strict_timestamps=self._strict_timestamps) 1789 1790 if zinfo.is_dir(): 1791 zinfo.compress_size = 0 1792 zinfo.CRC = 0 1793 self.mkdir(zinfo) 1794 else: 1795 if compress_type is not None: 1796 zinfo.compress_type = compress_type 1797 else: 1798 zinfo.compress_type = self.compression 1799 1800 if compresslevel is not None: 1801 zinfo._compresslevel = compresslevel 1802 else: 1803 zinfo._compresslevel = self.compresslevel 1804 1805 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: 1806 shutil.copyfileobj(src, dest, 1024*8) 1807 1808 def writestr(self, zinfo_or_arcname, data, 1809 compress_type=None, compresslevel=None): 1810 """Write a file into the archive. The contents is 'data', which 1811 may be either a 'str' or a 'bytes' instance; if it is a 'str', 1812 it is encoded as UTF-8 first. 1813 'zinfo_or_arcname' is either a ZipInfo instance or 1814 the name of the file in the archive.""" 1815 if isinstance(data, str): 1816 data = data.encode("utf-8") 1817 if not isinstance(zinfo_or_arcname, ZipInfo): 1818 zinfo = ZipInfo(filename=zinfo_or_arcname, 1819 date_time=time.localtime(time.time())[:6]) 1820 zinfo.compress_type = self.compression 1821 zinfo._compresslevel = self.compresslevel 1822 if zinfo.filename[-1] == '/': 1823 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x 1824 zinfo.external_attr |= 0x10 # MS-DOS directory flag 1825 else: 1826 zinfo.external_attr = 0o600 << 16 # ?rw------- 1827 else: 1828 zinfo = zinfo_or_arcname 1829 1830 if not self.fp: 1831 raise ValueError( 1832 "Attempt to write to ZIP archive that was already closed") 1833 if self._writing: 1834 raise ValueError( 1835 "Can't write to ZIP archive while an open writing handle exists." 1836 ) 1837 1838 if compress_type is not None: 1839 zinfo.compress_type = compress_type 1840 1841 if compresslevel is not None: 1842 zinfo._compresslevel = compresslevel 1843 1844 zinfo.file_size = len(data) # Uncompressed size 1845 with self._lock: 1846 with self.open(zinfo, mode='w') as dest: 1847 dest.write(data) 1848 1849 def mkdir(self, zinfo_or_directory_name, mode=511): 1850 """Creates a directory inside the zip archive.""" 1851 if isinstance(zinfo_or_directory_name, ZipInfo): 1852 zinfo = zinfo_or_directory_name 1853 if not zinfo.is_dir(): 1854 raise ValueError("The given ZipInfo does not describe a directory") 1855 elif isinstance(zinfo_or_directory_name, str): 1856 directory_name = zinfo_or_directory_name 1857 if not directory_name.endswith("/"): 1858 directory_name += "/" 1859 zinfo = ZipInfo(directory_name) 1860 zinfo.compress_size = 0 1861 zinfo.CRC = 0 1862 zinfo.external_attr = ((0o40000 | mode) & 0xFFFF) << 16 1863 zinfo.file_size = 0 1864 zinfo.external_attr |= 0x10 1865 else: 1866 raise TypeError("Expected type str or ZipInfo") 1867 1868 with self._lock: 1869 if self._seekable: 1870 self.fp.seek(self.start_dir) 1871 zinfo.header_offset = self.fp.tell() # Start of header bytes 1872 if zinfo.compress_type == ZIP_LZMA: 1873 # Compressed data includes an end-of-stream (EOS) marker 1874 zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1 1875 1876 self._writecheck(zinfo) 1877 self._didModify = True 1878 1879 self.filelist.append(zinfo) 1880 self.NameToInfo[zinfo.filename] = zinfo 1881 self.fp.write(zinfo.FileHeader(False)) 1882 self.start_dir = self.fp.tell() 1883 1884 def __del__(self): 1885 """Call the "close()" method in case the user forgot.""" 1886 self.close() 1887 1888 def close(self): 1889 """Close the file, and for mode 'w', 'x' and 'a' write the ending 1890 records.""" 1891 if self.fp is None: 1892 return 1893 1894 if self._writing: 1895 raise ValueError("Can't close the ZIP file while there is " 1896 "an open writing handle on it. " 1897 "Close the writing handle before closing the zip.") 1898 1899 try: 1900 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records 1901 with self._lock: 1902 if self._seekable: 1903 self.fp.seek(self.start_dir) 1904 self._write_end_record() 1905 finally: 1906 fp = self.fp 1907 self.fp = None 1908 self._fpclose(fp) 1909 1910 def _write_end_record(self): 1911 for zinfo in self.filelist: # write central directory 1912 dt = zinfo.date_time 1913 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1914 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1915 extra = [] 1916 if zinfo.file_size > ZIP64_LIMIT \ 1917 or zinfo.compress_size > ZIP64_LIMIT: 1918 extra.append(zinfo.file_size) 1919 extra.append(zinfo.compress_size) 1920 file_size = 0xffffffff 1921 compress_size = 0xffffffff 1922 else: 1923 file_size = zinfo.file_size 1924 compress_size = zinfo.compress_size 1925 1926 if zinfo.header_offset > ZIP64_LIMIT: 1927 extra.append(zinfo.header_offset) 1928 header_offset = 0xffffffff 1929 else: 1930 header_offset = zinfo.header_offset 1931 1932 extra_data = zinfo.extra 1933 min_version = 0 1934 if extra: 1935 # Append a ZIP64 field to the extra's 1936 extra_data = _strip_extra(extra_data, (1,)) 1937 extra_data = struct.pack( 1938 '<HH' + 'Q'*len(extra), 1939 1, 8*len(extra), *extra) + extra_data 1940 1941 min_version = ZIP64_VERSION 1942 1943 if zinfo.compress_type == ZIP_BZIP2: 1944 min_version = max(BZIP2_VERSION, min_version) 1945 elif zinfo.compress_type == ZIP_LZMA: 1946 min_version = max(LZMA_VERSION, min_version) 1947 1948 extract_version = max(min_version, zinfo.extract_version) 1949 create_version = max(min_version, zinfo.create_version) 1950 filename, flag_bits = zinfo._encodeFilenameFlags() 1951 centdir = struct.pack(structCentralDir, 1952 stringCentralDir, create_version, 1953 zinfo.create_system, extract_version, zinfo.reserved, 1954 flag_bits, zinfo.compress_type, dostime, dosdate, 1955 zinfo.CRC, compress_size, file_size, 1956 len(filename), len(extra_data), len(zinfo.comment), 1957 0, zinfo.internal_attr, zinfo.external_attr, 1958 header_offset) 1959 self.fp.write(centdir) 1960 self.fp.write(filename) 1961 self.fp.write(extra_data) 1962 self.fp.write(zinfo.comment) 1963 1964 pos2 = self.fp.tell() 1965 # Write end-of-zip-archive record 1966 centDirCount = len(self.filelist) 1967 centDirSize = pos2 - self.start_dir 1968 centDirOffset = self.start_dir 1969 requires_zip64 = None 1970 if centDirCount > ZIP_FILECOUNT_LIMIT: 1971 requires_zip64 = "Files count" 1972 elif centDirOffset > ZIP64_LIMIT: 1973 requires_zip64 = "Central directory offset" 1974 elif centDirSize > ZIP64_LIMIT: 1975 requires_zip64 = "Central directory size" 1976 if requires_zip64: 1977 # Need to write the ZIP64 end-of-archive records 1978 if not self._allowZip64: 1979 raise LargeZipFile(requires_zip64 + 1980 " would require ZIP64 extensions") 1981 zip64endrec = struct.pack( 1982 structEndArchive64, stringEndArchive64, 1983 44, 45, 45, 0, 0, centDirCount, centDirCount, 1984 centDirSize, centDirOffset) 1985 self.fp.write(zip64endrec) 1986 1987 zip64locrec = struct.pack( 1988 structEndArchive64Locator, 1989 stringEndArchive64Locator, 0, pos2, 1) 1990 self.fp.write(zip64locrec) 1991 centDirCount = min(centDirCount, 0xFFFF) 1992 centDirSize = min(centDirSize, 0xFFFFFFFF) 1993 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1994 1995 endrec = struct.pack(structEndArchive, stringEndArchive, 1996 0, 0, centDirCount, centDirCount, 1997 centDirSize, centDirOffset, len(self._comment)) 1998 self.fp.write(endrec) 1999 self.fp.write(self._comment) 2000 if self.mode == "a": 2001 self.fp.truncate() 2002 self.fp.flush() 2003 2004 def _fpclose(self, fp): 2005 assert self._fileRefCnt > 0 2006 self._fileRefCnt -= 1 2007 if not self._fileRefCnt and not self._filePassed: 2008 fp.close() 2009 2010 2011class PyZipFile(ZipFile): 2012 """Class to create ZIP archives with Python library files and packages.""" 2013 2014 def __init__(self, file, mode="r", compression=ZIP_STORED, 2015 allowZip64=True, optimize=-1): 2016 ZipFile.__init__(self, file, mode=mode, compression=compression, 2017 allowZip64=allowZip64) 2018 self._optimize = optimize 2019 2020 def writepy(self, pathname, basename="", filterfunc=None): 2021 """Add all files from "pathname" to the ZIP archive. 2022 2023 If pathname is a package directory, search the directory and 2024 all package subdirectories recursively for all *.py and enter 2025 the modules into the archive. If pathname is a plain 2026 directory, listdir *.py and enter all modules. Else, pathname 2027 must be a Python *.py file and the module will be put into the 2028 archive. Added modules are always module.pyc. 2029 This method will compile the module.py into module.pyc if 2030 necessary. 2031 If filterfunc(pathname) is given, it is called with every argument. 2032 When it is False, the file or directory is skipped. 2033 """ 2034 pathname = os.fspath(pathname) 2035 if filterfunc and not filterfunc(pathname): 2036 if self.debug: 2037 label = 'path' if os.path.isdir(pathname) else 'file' 2038 print('%s %r skipped by filterfunc' % (label, pathname)) 2039 return 2040 dir, name = os.path.split(pathname) 2041 if os.path.isdir(pathname): 2042 initname = os.path.join(pathname, "__init__.py") 2043 if os.path.isfile(initname): 2044 # This is a package directory, add it 2045 if basename: 2046 basename = "%s/%s" % (basename, name) 2047 else: 2048 basename = name 2049 if self.debug: 2050 print("Adding package in", pathname, "as", basename) 2051 fname, arcname = self._get_codename(initname[0:-3], basename) 2052 if self.debug: 2053 print("Adding", arcname) 2054 self.write(fname, arcname) 2055 dirlist = sorted(os.listdir(pathname)) 2056 dirlist.remove("__init__.py") 2057 # Add all *.py files and package subdirectories 2058 for filename in dirlist: 2059 path = os.path.join(pathname, filename) 2060 root, ext = os.path.splitext(filename) 2061 if os.path.isdir(path): 2062 if os.path.isfile(os.path.join(path, "__init__.py")): 2063 # This is a package directory, add it 2064 self.writepy(path, basename, 2065 filterfunc=filterfunc) # Recursive call 2066 elif ext == ".py": 2067 if filterfunc and not filterfunc(path): 2068 if self.debug: 2069 print('file %r skipped by filterfunc' % path) 2070 continue 2071 fname, arcname = self._get_codename(path[0:-3], 2072 basename) 2073 if self.debug: 2074 print("Adding", arcname) 2075 self.write(fname, arcname) 2076 else: 2077 # This is NOT a package directory, add its files at top level 2078 if self.debug: 2079 print("Adding files from directory", pathname) 2080 for filename in sorted(os.listdir(pathname)): 2081 path = os.path.join(pathname, filename) 2082 root, ext = os.path.splitext(filename) 2083 if ext == ".py": 2084 if filterfunc and not filterfunc(path): 2085 if self.debug: 2086 print('file %r skipped by filterfunc' % path) 2087 continue 2088 fname, arcname = self._get_codename(path[0:-3], 2089 basename) 2090 if self.debug: 2091 print("Adding", arcname) 2092 self.write(fname, arcname) 2093 else: 2094 if pathname[-3:] != ".py": 2095 raise RuntimeError( 2096 'Files added with writepy() must end with ".py"') 2097 fname, arcname = self._get_codename(pathname[0:-3], basename) 2098 if self.debug: 2099 print("Adding file", arcname) 2100 self.write(fname, arcname) 2101 2102 def _get_codename(self, pathname, basename): 2103 """Return (filename, archivename) for the path. 2104 2105 Given a module name path, return the correct file path and 2106 archive name, compiling if necessary. For example, given 2107 /python/lib/string, return (/python/lib/string.pyc, string). 2108 """ 2109 def _compile(file, optimize=-1): 2110 import py_compile 2111 if self.debug: 2112 print("Compiling", file) 2113 try: 2114 py_compile.compile(file, doraise=True, optimize=optimize) 2115 except py_compile.PyCompileError as err: 2116 print(err.msg) 2117 return False 2118 return True 2119 2120 file_py = pathname + ".py" 2121 file_pyc = pathname + ".pyc" 2122 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='') 2123 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1) 2124 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2) 2125 if self._optimize == -1: 2126 # legacy mode: use whatever file is present 2127 if (os.path.isfile(file_pyc) and 2128 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): 2129 # Use .pyc file. 2130 arcname = fname = file_pyc 2131 elif (os.path.isfile(pycache_opt0) and 2132 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): 2133 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2134 # file name in the archive. 2135 fname = pycache_opt0 2136 arcname = file_pyc 2137 elif (os.path.isfile(pycache_opt1) and 2138 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): 2139 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2140 # file name in the archive. 2141 fname = pycache_opt1 2142 arcname = file_pyc 2143 elif (os.path.isfile(pycache_opt2) and 2144 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): 2145 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2146 # file name in the archive. 2147 fname = pycache_opt2 2148 arcname = file_pyc 2149 else: 2150 # Compile py into PEP 3147 pyc file. 2151 if _compile(file_py): 2152 if sys.flags.optimize == 0: 2153 fname = pycache_opt0 2154 elif sys.flags.optimize == 1: 2155 fname = pycache_opt1 2156 else: 2157 fname = pycache_opt2 2158 arcname = file_pyc 2159 else: 2160 fname = arcname = file_py 2161 else: 2162 # new mode: use given optimization level 2163 if self._optimize == 0: 2164 fname = pycache_opt0 2165 arcname = file_pyc 2166 else: 2167 arcname = file_pyc 2168 if self._optimize == 1: 2169 fname = pycache_opt1 2170 elif self._optimize == 2: 2171 fname = pycache_opt2 2172 else: 2173 msg = "invalid value for 'optimize': {!r}".format(self._optimize) 2174 raise ValueError(msg) 2175 if not (os.path.isfile(fname) and 2176 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): 2177 if not _compile(file_py, optimize=self._optimize): 2178 fname = arcname = file_py 2179 archivename = os.path.split(arcname)[1] 2180 if basename: 2181 archivename = "%s/%s" % (basename, archivename) 2182 return (fname, archivename) 2183 2184 2185def _parents(path): 2186 """ 2187 Given a path with elements separated by 2188 posixpath.sep, generate all parents of that path. 2189 2190 >>> list(_parents('b/d')) 2191 ['b'] 2192 >>> list(_parents('/b/d/')) 2193 ['/b'] 2194 >>> list(_parents('b/d/f/')) 2195 ['b/d', 'b'] 2196 >>> list(_parents('b')) 2197 [] 2198 >>> list(_parents('')) 2199 [] 2200 """ 2201 return itertools.islice(_ancestry(path), 1, None) 2202 2203 2204def _ancestry(path): 2205 """ 2206 Given a path with elements separated by 2207 posixpath.sep, generate all elements of that path 2208 2209 >>> list(_ancestry('b/d')) 2210 ['b/d', 'b'] 2211 >>> list(_ancestry('/b/d/')) 2212 ['/b/d', '/b'] 2213 >>> list(_ancestry('b/d/f/')) 2214 ['b/d/f', 'b/d', 'b'] 2215 >>> list(_ancestry('b')) 2216 ['b'] 2217 >>> list(_ancestry('')) 2218 [] 2219 """ 2220 path = path.rstrip(posixpath.sep) 2221 while path and path != posixpath.sep: 2222 yield path 2223 path, tail = posixpath.split(path) 2224 2225 2226_dedupe = dict.fromkeys 2227"""Deduplicate an iterable in original order""" 2228 2229 2230def _difference(minuend, subtrahend): 2231 """ 2232 Return items in minuend not in subtrahend, retaining order 2233 with O(1) lookup. 2234 """ 2235 return itertools.filterfalse(set(subtrahend).__contains__, minuend) 2236 2237 2238class CompleteDirs(ZipFile): 2239 """ 2240 A ZipFile subclass that ensures that implied directories 2241 are always included in the namelist. 2242 """ 2243 2244 @staticmethod 2245 def _implied_dirs(names): 2246 parents = itertools.chain.from_iterable(map(_parents, names)) 2247 as_dirs = (p + posixpath.sep for p in parents) 2248 return _dedupe(_difference(as_dirs, names)) 2249 2250 def namelist(self): 2251 names = super(CompleteDirs, self).namelist() 2252 return names + list(self._implied_dirs(names)) 2253 2254 def _name_set(self): 2255 return set(self.namelist()) 2256 2257 def resolve_dir(self, name): 2258 """ 2259 If the name represents a directory, return that name 2260 as a directory (with the trailing slash). 2261 """ 2262 names = self._name_set() 2263 dirname = name + '/' 2264 dir_match = name not in names and dirname in names 2265 return dirname if dir_match else name 2266 2267 def getinfo(self, name): 2268 """ 2269 Supplement getinfo for implied dirs. 2270 """ 2271 try: 2272 return super().getinfo(name) 2273 except KeyError: 2274 if not name.endswith('/') or name not in self._name_set(): 2275 raise 2276 return ZipInfo(filename=name) 2277 2278 @classmethod 2279 def make(cls, source): 2280 """ 2281 Given a source (filename or zipfile), return an 2282 appropriate CompleteDirs subclass. 2283 """ 2284 if isinstance(source, CompleteDirs): 2285 return source 2286 2287 if not isinstance(source, ZipFile): 2288 return cls(source) 2289 2290 # Only allow for FastLookup when supplied zipfile is read-only 2291 if 'r' not in source.mode: 2292 cls = CompleteDirs 2293 2294 source.__class__ = cls 2295 return source 2296 2297 2298class FastLookup(CompleteDirs): 2299 """ 2300 ZipFile subclass to ensure implicit 2301 dirs exist and are resolved rapidly. 2302 """ 2303 2304 def namelist(self): 2305 with contextlib.suppress(AttributeError): 2306 return self.__names 2307 self.__names = super(FastLookup, self).namelist() 2308 return self.__names 2309 2310 def _name_set(self): 2311 with contextlib.suppress(AttributeError): 2312 return self.__lookup 2313 self.__lookup = super(FastLookup, self)._name_set() 2314 return self.__lookup 2315 2316 2317def _extract_text_encoding(encoding=None, *args, **kwargs): 2318 # stacklevel=3 so that the caller of the caller see any warning. 2319 return io.text_encoding(encoding, 3), args, kwargs 2320 2321 2322class Path: 2323 """ 2324 A pathlib-compatible interface for zip files. 2325 2326 Consider a zip file with this structure:: 2327 2328 . 2329 ├── a.txt 2330 └── b 2331 ├── c.txt 2332 └── d 2333 └── e.txt 2334 2335 >>> data = io.BytesIO() 2336 >>> zf = ZipFile(data, 'w') 2337 >>> zf.writestr('a.txt', 'content of a') 2338 >>> zf.writestr('b/c.txt', 'content of c') 2339 >>> zf.writestr('b/d/e.txt', 'content of e') 2340 >>> zf.filename = 'mem/abcde.zip' 2341 2342 Path accepts the zipfile object itself or a filename 2343 2344 >>> root = Path(zf) 2345 2346 From there, several path operations are available. 2347 2348 Directory iteration (including the zip file itself): 2349 2350 >>> a, b = root.iterdir() 2351 >>> a 2352 Path('mem/abcde.zip', 'a.txt') 2353 >>> b 2354 Path('mem/abcde.zip', 'b/') 2355 2356 name property: 2357 2358 >>> b.name 2359 'b' 2360 2361 join with divide operator: 2362 2363 >>> c = b / 'c.txt' 2364 >>> c 2365 Path('mem/abcde.zip', 'b/c.txt') 2366 >>> c.name 2367 'c.txt' 2368 2369 Read text: 2370 2371 >>> c.read_text() 2372 'content of c' 2373 2374 existence: 2375 2376 >>> c.exists() 2377 True 2378 >>> (b / 'missing.txt').exists() 2379 False 2380 2381 Coercion to string: 2382 2383 >>> import os 2384 >>> str(c).replace(os.sep, posixpath.sep) 2385 'mem/abcde.zip/b/c.txt' 2386 2387 At the root, ``name``, ``filename``, and ``parent`` 2388 resolve to the zipfile. Note these attributes are not 2389 valid and will raise a ``ValueError`` if the zipfile 2390 has no filename. 2391 2392 >>> root.name 2393 'abcde.zip' 2394 >>> str(root.filename).replace(os.sep, posixpath.sep) 2395 'mem/abcde.zip' 2396 >>> str(root.parent) 2397 'mem' 2398 """ 2399 2400 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" 2401 2402 def __init__(self, root, at=""): 2403 """ 2404 Construct a Path from a ZipFile or filename. 2405 2406 Note: When the source is an existing ZipFile object, 2407 its type (__class__) will be mutated to a 2408 specialized type. If the caller wishes to retain the 2409 original type, the caller should either create a 2410 separate ZipFile object or pass a filename. 2411 """ 2412 self.root = FastLookup.make(root) 2413 self.at = at 2414 2415 def open(self, mode='r', *args, pwd=None, **kwargs): 2416 """ 2417 Open this entry as text or binary following the semantics 2418 of ``pathlib.Path.open()`` by passing arguments through 2419 to io.TextIOWrapper(). 2420 """ 2421 if self.is_dir(): 2422 raise IsADirectoryError(self) 2423 zip_mode = mode[0] 2424 if not self.exists() and zip_mode == 'r': 2425 raise FileNotFoundError(self) 2426 stream = self.root.open(self.at, zip_mode, pwd=pwd) 2427 if 'b' in mode: 2428 if args or kwargs: 2429 raise ValueError("encoding args invalid for binary operation") 2430 return stream 2431 # Text mode: 2432 encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) 2433 return io.TextIOWrapper(stream, encoding, *args, **kwargs) 2434 2435 @property 2436 def name(self): 2437 return pathlib.Path(self.at).name or self.filename.name 2438 2439 @property 2440 def suffix(self): 2441 return pathlib.Path(self.at).suffix or self.filename.suffix 2442 2443 @property 2444 def suffixes(self): 2445 return pathlib.Path(self.at).suffixes or self.filename.suffixes 2446 2447 @property 2448 def stem(self): 2449 return pathlib.Path(self.at).stem or self.filename.stem 2450 2451 @property 2452 def filename(self): 2453 return pathlib.Path(self.root.filename).joinpath(self.at) 2454 2455 def read_text(self, *args, **kwargs): 2456 encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) 2457 with self.open('r', encoding, *args, **kwargs) as strm: 2458 return strm.read() 2459 2460 def read_bytes(self): 2461 with self.open('rb') as strm: 2462 return strm.read() 2463 2464 def _is_child(self, path): 2465 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") 2466 2467 def _next(self, at): 2468 return self.__class__(self.root, at) 2469 2470 def is_dir(self): 2471 return not self.at or self.at.endswith("/") 2472 2473 def is_file(self): 2474 return self.exists() and not self.is_dir() 2475 2476 def exists(self): 2477 return self.at in self.root._name_set() 2478 2479 def iterdir(self): 2480 if not self.is_dir(): 2481 raise ValueError("Can't listdir a file") 2482 subs = map(self._next, self.root.namelist()) 2483 return filter(self._is_child, subs) 2484 2485 def __str__(self): 2486 return posixpath.join(self.root.filename, self.at) 2487 2488 def __repr__(self): 2489 return self.__repr.format(self=self) 2490 2491 def joinpath(self, *other): 2492 next = posixpath.join(self.at, *other) 2493 return self._next(self.root.resolve_dir(next)) 2494 2495 __truediv__ = joinpath 2496 2497 @property 2498 def parent(self): 2499 if not self.at: 2500 return self.filename.parent 2501 parent_at = posixpath.dirname(self.at.rstrip('/')) 2502 if parent_at: 2503 parent_at += '/' 2504 return self._next(parent_at) 2505 2506 2507def main(args=None): 2508 import argparse 2509 2510 description = 'A simple command-line interface for zipfile module.' 2511 parser = argparse.ArgumentParser(description=description) 2512 group = parser.add_mutually_exclusive_group(required=True) 2513 group.add_argument('-l', '--list', metavar='<zipfile>', 2514 help='Show listing of a zipfile') 2515 group.add_argument('-e', '--extract', nargs=2, 2516 metavar=('<zipfile>', '<output_dir>'), 2517 help='Extract zipfile into target dir') 2518 group.add_argument('-c', '--create', nargs='+', 2519 metavar=('<name>', '<file>'), 2520 help='Create zipfile from sources') 2521 group.add_argument('-t', '--test', metavar='<zipfile>', 2522 help='Test if a zipfile is valid') 2523 parser.add_argument('--metadata-encoding', metavar='<encoding>', 2524 help='Specify encoding of member names for -l, -e and -t') 2525 args = parser.parse_args(args) 2526 2527 encoding = args.metadata_encoding 2528 2529 if args.test is not None: 2530 src = args.test 2531 with ZipFile(src, 'r', metadata_encoding=encoding) as zf: 2532 badfile = zf.testzip() 2533 if badfile: 2534 print("The following enclosed file is corrupted: {!r}".format(badfile)) 2535 print("Done testing") 2536 2537 elif args.list is not None: 2538 src = args.list 2539 with ZipFile(src, 'r', metadata_encoding=encoding) as zf: 2540 zf.printdir() 2541 2542 elif args.extract is not None: 2543 src, curdir = args.extract 2544 with ZipFile(src, 'r', metadata_encoding=encoding) as zf: 2545 zf.extractall(curdir) 2546 2547 elif args.create is not None: 2548 if encoding: 2549 print("Non-conforming encodings not supported with -c.", 2550 file=sys.stderr) 2551 sys.exit(1) 2552 2553 zip_name = args.create.pop(0) 2554 files = args.create 2555 2556 def addToZip(zf, path, zippath): 2557 if os.path.isfile(path): 2558 zf.write(path, zippath, ZIP_DEFLATED) 2559 elif os.path.isdir(path): 2560 if zippath: 2561 zf.write(path, zippath) 2562 for nm in sorted(os.listdir(path)): 2563 addToZip(zf, 2564 os.path.join(path, nm), os.path.join(zippath, nm)) 2565 # else: ignore 2566 2567 with ZipFile(zip_name, 'w') as zf: 2568 for path in files: 2569 zippath = os.path.basename(path) 2570 if not zippath: 2571 zippath = os.path.basename(os.path.dirname(path)) 2572 if zippath in ('', os.curdir, os.pardir): 2573 zippath = '' 2574 addToZip(zf, path, zippath) 2575 2576 2577if __name__ == "__main__": 2578 main() 2579