1""" 2Read and write ZIP files. 3 4XXX references to utf-8 need further investigation. 5""" 6import binascii 7import importlib.util 8import io 9import itertools 10import os 11import posixpath 12import shutil 13import stat 14import struct 15import sys 16import threading 17import time 18import contextlib 19import pathlib 20 21try: 22 import zlib # We may need its compression method 23 crc32 = zlib.crc32 24except ImportError: 25 zlib = None 26 crc32 = binascii.crc32 27 28try: 29 import bz2 # We may need its compression method 30except ImportError: 31 bz2 = None 32 33try: 34 import lzma # We may need its compression method 35except ImportError: 36 lzma = None 37 38__all__ = ["BadZipFile", "BadZipfile", "error", 39 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", 40 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", 41 "Path"] 42 43class BadZipFile(Exception): 44 pass 45 46 47class LargeZipFile(Exception): 48 """ 49 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 50 and those extensions are disabled. 51 """ 52 53error = BadZipfile = BadZipFile # Pre-3.2 compatibility names 54 55 56ZIP64_LIMIT = (1 << 31) - 1 57ZIP_FILECOUNT_LIMIT = (1 << 16) - 1 58ZIP_MAX_COMMENT = (1 << 16) - 1 59 60# constants for Zip file compression methods 61ZIP_STORED = 0 62ZIP_DEFLATED = 8 63ZIP_BZIP2 = 12 64ZIP_LZMA = 14 65# Other ZIP compression methods not supported 66 67DEFAULT_VERSION = 20 68ZIP64_VERSION = 45 69BZIP2_VERSION = 46 70LZMA_VERSION = 63 71# we recognize (but not necessarily support) all features up to that version 72MAX_EXTRACT_VERSION = 63 73 74# Below are some formats and associated data for reading/writing headers using 75# the struct module. The names and structures of headers/records are those used 76# in the PKWARE description of the ZIP file format: 77# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 78# (URL valid as of January 2008) 79 80# The "end of central directory" structure, magic number, size, and indices 81# (section V.I in the format document) 82structEndArchive = b"<4s4H2LH" 83stringEndArchive = b"PK\005\006" 84sizeEndCentDir = struct.calcsize(structEndArchive) 85 86_ECD_SIGNATURE = 0 87_ECD_DISK_NUMBER = 1 88_ECD_DISK_START = 2 89_ECD_ENTRIES_THIS_DISK = 3 90_ECD_ENTRIES_TOTAL = 4 91_ECD_SIZE = 5 92_ECD_OFFSET = 6 93_ECD_COMMENT_SIZE = 7 94# These last two indices are not part of the structure as defined in the 95# spec, but they are used internally by this module as a convenience 96_ECD_COMMENT = 8 97_ECD_LOCATION = 9 98 99# The "central directory" structure, magic number, size, and indices 100# of entries in the structure (section V.F in the format document) 101structCentralDir = "<4s4B4HL2L5H2L" 102stringCentralDir = b"PK\001\002" 103sizeCentralDir = struct.calcsize(structCentralDir) 104 105# indexes of entries in the central directory structure 106_CD_SIGNATURE = 0 107_CD_CREATE_VERSION = 1 108_CD_CREATE_SYSTEM = 2 109_CD_EXTRACT_VERSION = 3 110_CD_EXTRACT_SYSTEM = 4 111_CD_FLAG_BITS = 5 112_CD_COMPRESS_TYPE = 6 113_CD_TIME = 7 114_CD_DATE = 8 115_CD_CRC = 9 116_CD_COMPRESSED_SIZE = 10 117_CD_UNCOMPRESSED_SIZE = 11 118_CD_FILENAME_LENGTH = 12 119_CD_EXTRA_FIELD_LENGTH = 13 120_CD_COMMENT_LENGTH = 14 121_CD_DISK_NUMBER_START = 15 122_CD_INTERNAL_FILE_ATTRIBUTES = 16 123_CD_EXTERNAL_FILE_ATTRIBUTES = 17 124_CD_LOCAL_HEADER_OFFSET = 18 125 126# The "local file header" structure, magic number, size, and indices 127# (section V.A in the format document) 128structFileHeader = "<4s2B4HL2L2H" 129stringFileHeader = b"PK\003\004" 130sizeFileHeader = struct.calcsize(structFileHeader) 131 132_FH_SIGNATURE = 0 133_FH_EXTRACT_VERSION = 1 134_FH_EXTRACT_SYSTEM = 2 135_FH_GENERAL_PURPOSE_FLAG_BITS = 3 136_FH_COMPRESSION_METHOD = 4 137_FH_LAST_MOD_TIME = 5 138_FH_LAST_MOD_DATE = 6 139_FH_CRC = 7 140_FH_COMPRESSED_SIZE = 8 141_FH_UNCOMPRESSED_SIZE = 9 142_FH_FILENAME_LENGTH = 10 143_FH_EXTRA_FIELD_LENGTH = 11 144 145# The "Zip64 end of central directory locator" structure, magic number, and size 146structEndArchive64Locator = "<4sLQL" 147stringEndArchive64Locator = b"PK\x06\x07" 148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 149 150# The "Zip64 end of central directory" record, magic number, size, and indices 151# (section V.G in the format document) 152structEndArchive64 = "<4sQ2H2L4Q" 153stringEndArchive64 = b"PK\x06\x06" 154sizeEndCentDir64 = struct.calcsize(structEndArchive64) 155 156_CD64_SIGNATURE = 0 157_CD64_DIRECTORY_RECSIZE = 1 158_CD64_CREATE_VERSION = 2 159_CD64_EXTRACT_VERSION = 3 160_CD64_DISK_NUMBER = 4 161_CD64_DISK_NUMBER_START = 5 162_CD64_NUMBER_ENTRIES_THIS_DISK = 6 163_CD64_NUMBER_ENTRIES_TOTAL = 7 164_CD64_DIRECTORY_SIZE = 8 165_CD64_OFFSET_START_CENTDIR = 9 166 167_DD_SIGNATURE = 0x08074b50 168 169_EXTRA_FIELD_STRUCT = struct.Struct('<HH') 170 171def _strip_extra(extra, xids): 172 # Remove Extra Fields with specified IDs. 173 unpack = _EXTRA_FIELD_STRUCT.unpack 174 modified = False 175 buffer = [] 176 start = i = 0 177 while i + 4 <= len(extra): 178 xid, xlen = unpack(extra[i : i + 4]) 179 j = i + 4 + xlen 180 if xid in xids: 181 if i != start: 182 buffer.append(extra[start : i]) 183 start = j 184 modified = True 185 i = j 186 if not modified: 187 return extra 188 return b''.join(buffer) 189 190def _check_zipfile(fp): 191 try: 192 if _EndRecData(fp): 193 return True # file has correct magic number 194 except OSError: 195 pass 196 return False 197 198def is_zipfile(filename): 199 """Quickly see if a file is a ZIP file by checking the magic number. 200 201 The filename argument may be a file or file-like object too. 202 """ 203 result = False 204 try: 205 if hasattr(filename, "read"): 206 result = _check_zipfile(fp=filename) 207 else: 208 with open(filename, "rb") as fp: 209 result = _check_zipfile(fp) 210 except OSError: 211 pass 212 return result 213 214def _EndRecData64(fpin, offset, endrec): 215 """ 216 Read the ZIP64 end-of-archive records and use that to update endrec 217 """ 218 try: 219 fpin.seek(offset - sizeEndCentDir64Locator, 2) 220 except OSError: 221 # If the seek fails, the file is not large enough to contain a ZIP64 222 # end-of-archive record, so just return the end record we were given. 223 return endrec 224 225 data = fpin.read(sizeEndCentDir64Locator) 226 if len(data) != sizeEndCentDir64Locator: 227 return endrec 228 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 229 if sig != stringEndArchive64Locator: 230 return endrec 231 232 if diskno != 0 or disks > 1: 233 raise BadZipFile("zipfiles that span multiple disks are not supported") 234 235 # Assume no 'zip64 extensible data' 236 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 237 data = fpin.read(sizeEndCentDir64) 238 if len(data) != sizeEndCentDir64: 239 return endrec 240 sig, sz, create_version, read_version, disk_num, disk_dir, \ 241 dircount, dircount2, dirsize, diroffset = \ 242 struct.unpack(structEndArchive64, data) 243 if sig != stringEndArchive64: 244 return endrec 245 246 # Update the original endrec using data from the ZIP64 record 247 endrec[_ECD_SIGNATURE] = sig 248 endrec[_ECD_DISK_NUMBER] = disk_num 249 endrec[_ECD_DISK_START] = disk_dir 250 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 251 endrec[_ECD_ENTRIES_TOTAL] = dircount2 252 endrec[_ECD_SIZE] = dirsize 253 endrec[_ECD_OFFSET] = diroffset 254 return endrec 255 256 257def _EndRecData(fpin): 258 """Return data from the "End of Central Directory" record, or None. 259 260 The data is a list of the nine items in the ZIP "End of central dir" 261 record followed by a tenth item, the file seek offset of this record.""" 262 263 # Determine file size 264 fpin.seek(0, 2) 265 filesize = fpin.tell() 266 267 # Check to see if this is ZIP file with no archive comment (the 268 # "end of central directory" structure should be the last item in the 269 # file if this is the case). 270 try: 271 fpin.seek(-sizeEndCentDir, 2) 272 except OSError: 273 return None 274 data = fpin.read() 275 if (len(data) == sizeEndCentDir and 276 data[0:4] == stringEndArchive and 277 data[-2:] == b"\000\000"): 278 # the signature is correct and there's no comment, unpack structure 279 endrec = struct.unpack(structEndArchive, data) 280 endrec=list(endrec) 281 282 # Append a blank comment and record start offset 283 endrec.append(b"") 284 endrec.append(filesize - sizeEndCentDir) 285 286 # Try to read the "Zip64 end of central directory" structure 287 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 288 289 # Either this is not a ZIP file, or it is a ZIP file with an archive 290 # comment. Search the end of the file for the "end of central directory" 291 # record signature. The comment is the last item in the ZIP file and may be 292 # up to 64K long. It is assumed that the "end of central directory" magic 293 # number does not appear in the comment. 294 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 295 fpin.seek(maxCommentStart, 0) 296 data = fpin.read() 297 start = data.rfind(stringEndArchive) 298 if start >= 0: 299 # found the magic number; attempt to unpack and interpret 300 recData = data[start:start+sizeEndCentDir] 301 if len(recData) != sizeEndCentDir: 302 # Zip file is corrupted. 303 return None 304 endrec = list(struct.unpack(structEndArchive, recData)) 305 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 306 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 307 endrec.append(comment) 308 endrec.append(maxCommentStart + start) 309 310 # Try to read the "Zip64 end of central directory" structure 311 return _EndRecData64(fpin, maxCommentStart + start - filesize, 312 endrec) 313 314 # Unable to find a valid end of central directory structure 315 return None 316 317 318class ZipInfo (object): 319 """Class with attributes describing each file in the ZIP archive.""" 320 321 __slots__ = ( 322 'orig_filename', 323 'filename', 324 'date_time', 325 'compress_type', 326 '_compresslevel', 327 'comment', 328 'extra', 329 'create_system', 330 'create_version', 331 'extract_version', 332 'reserved', 333 'flag_bits', 334 'volume', 335 'internal_attr', 336 'external_attr', 337 'header_offset', 338 'CRC', 339 'compress_size', 340 'file_size', 341 '_raw_time', 342 '_end_offset', 343 ) 344 345 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 346 self.orig_filename = filename # Original file name in archive 347 348 # Terminate the file name at the first null byte. Null bytes in file 349 # names are used as tricks by viruses in archives. 350 null_byte = filename.find(chr(0)) 351 if null_byte >= 0: 352 filename = filename[0:null_byte] 353 # This is used to ensure paths in generated ZIP files always use 354 # forward slashes as the directory separator, as required by the 355 # ZIP format specification. 356 if os.sep != "/" and os.sep in filename: 357 filename = filename.replace(os.sep, "/") 358 359 self.filename = filename # Normalized file name 360 self.date_time = date_time # year, month, day, hour, min, sec 361 362 if date_time[0] < 1980: 363 raise ValueError('ZIP does not support timestamps before 1980') 364 365 # Standard values: 366 self.compress_type = ZIP_STORED # Type of compression for the file 367 self._compresslevel = None # Level for the compressor 368 self.comment = b"" # Comment for each file 369 self.extra = b"" # ZIP extra data 370 if sys.platform == 'win32': 371 self.create_system = 0 # System which created ZIP archive 372 else: 373 # Assume everything else is unix-y 374 self.create_system = 3 # System which created ZIP archive 375 self.create_version = DEFAULT_VERSION # Version which created ZIP archive 376 self.extract_version = DEFAULT_VERSION # Version needed to extract archive 377 self.reserved = 0 # Must be zero 378 self.flag_bits = 0 # ZIP flag bits 379 self.volume = 0 # Volume number of file header 380 self.internal_attr = 0 # Internal attributes 381 self.external_attr = 0 # External file attributes 382 self.compress_size = 0 # Size of the compressed file 383 self.file_size = 0 # Size of the uncompressed file 384 self._end_offset = None # Start of the next local header or central directory 385 # Other attributes are set by class ZipFile: 386 # header_offset Byte offset to the file header 387 # CRC CRC-32 of the uncompressed file 388 389 def __repr__(self): 390 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)] 391 if self.compress_type != ZIP_STORED: 392 result.append(' compress_type=%s' % 393 compressor_names.get(self.compress_type, 394 self.compress_type)) 395 hi = self.external_attr >> 16 396 lo = self.external_attr & 0xFFFF 397 if hi: 398 result.append(' filemode=%r' % stat.filemode(hi)) 399 if lo: 400 result.append(' external_attr=%#x' % lo) 401 isdir = self.is_dir() 402 if not isdir or self.file_size: 403 result.append(' file_size=%r' % self.file_size) 404 if ((not isdir or self.compress_size) and 405 (self.compress_type != ZIP_STORED or 406 self.file_size != self.compress_size)): 407 result.append(' compress_size=%r' % self.compress_size) 408 result.append('>') 409 return ''.join(result) 410 411 def FileHeader(self, zip64=None): 412 """Return the per-file header as a bytes object.""" 413 dt = self.date_time 414 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 415 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 416 if self.flag_bits & 0x08: 417 # Set these to zero because we write them after the file data 418 CRC = compress_size = file_size = 0 419 else: 420 CRC = self.CRC 421 compress_size = self.compress_size 422 file_size = self.file_size 423 424 extra = self.extra 425 426 min_version = 0 427 if zip64 is None: 428 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 429 if zip64: 430 fmt = '<HHQQ' 431 extra = extra + struct.pack(fmt, 432 1, struct.calcsize(fmt)-4, file_size, compress_size) 433 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 434 if not zip64: 435 raise LargeZipFile("Filesize would require ZIP64 extensions") 436 # File is larger than what fits into a 4 byte integer, 437 # fall back to the ZIP64 extension 438 file_size = 0xffffffff 439 compress_size = 0xffffffff 440 min_version = ZIP64_VERSION 441 442 if self.compress_type == ZIP_BZIP2: 443 min_version = max(BZIP2_VERSION, min_version) 444 elif self.compress_type == ZIP_LZMA: 445 min_version = max(LZMA_VERSION, min_version) 446 447 self.extract_version = max(min_version, self.extract_version) 448 self.create_version = max(min_version, self.create_version) 449 filename, flag_bits = self._encodeFilenameFlags() 450 header = struct.pack(structFileHeader, stringFileHeader, 451 self.extract_version, self.reserved, flag_bits, 452 self.compress_type, dostime, dosdate, CRC, 453 compress_size, file_size, 454 len(filename), len(extra)) 455 return header + filename + extra 456 457 def _encodeFilenameFlags(self): 458 try: 459 return self.filename.encode('ascii'), self.flag_bits 460 except UnicodeEncodeError: 461 return self.filename.encode('utf-8'), self.flag_bits | 0x800 462 463 def _decodeExtra(self): 464 # Try to decode the extra field. 465 extra = self.extra 466 unpack = struct.unpack 467 while len(extra) >= 4: 468 tp, ln = unpack('<HH', extra[:4]) 469 if ln+4 > len(extra): 470 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 471 if tp == 0x0001: 472 data = extra[4:ln+4] 473 # ZIP64 extension (large files and/or large archives) 474 try: 475 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF): 476 field = "File size" 477 self.file_size, = unpack('<Q', data[:8]) 478 data = data[8:] 479 if self.compress_size == 0xFFFF_FFFF: 480 field = "Compress size" 481 self.compress_size, = unpack('<Q', data[:8]) 482 data = data[8:] 483 if self.header_offset == 0xFFFF_FFFF: 484 field = "Header offset" 485 self.header_offset, = unpack('<Q', data[:8]) 486 except struct.error: 487 raise BadZipFile(f"Corrupt zip64 extra field. " 488 f"{field} not found.") from None 489 490 extra = extra[ln+4:] 491 492 @classmethod 493 def from_file(cls, filename, arcname=None, *, strict_timestamps=True): 494 """Construct an appropriate ZipInfo for a file on the filesystem. 495 496 filename should be the path to a file or directory on the filesystem. 497 498 arcname is the name which it will have within the archive (by default, 499 this will be the same as filename, but without a drive letter and with 500 leading path separators removed). 501 """ 502 if isinstance(filename, os.PathLike): 503 filename = os.fspath(filename) 504 st = os.stat(filename) 505 isdir = stat.S_ISDIR(st.st_mode) 506 mtime = time.localtime(st.st_mtime) 507 date_time = mtime[0:6] 508 if not strict_timestamps and date_time[0] < 1980: 509 date_time = (1980, 1, 1, 0, 0, 0) 510 elif not strict_timestamps and date_time[0] > 2107: 511 date_time = (2107, 12, 31, 23, 59, 59) 512 # Create ZipInfo instance to store file information 513 if arcname is None: 514 arcname = filename 515 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 516 while arcname[0] in (os.sep, os.altsep): 517 arcname = arcname[1:] 518 if isdir: 519 arcname += '/' 520 zinfo = cls(arcname, date_time) 521 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes 522 if isdir: 523 zinfo.file_size = 0 524 zinfo.external_attr |= 0x10 # MS-DOS directory flag 525 else: 526 zinfo.file_size = st.st_size 527 528 return zinfo 529 530 def is_dir(self): 531 """Return True if this archive member is a directory.""" 532 return self.filename[-1] == '/' 533 534 535# ZIP encryption uses the CRC32 one-byte primitive for scrambling some 536# internal keys. We noticed that a direct implementation is faster than 537# relying on binascii.crc32(). 538 539_crctable = None 540def _gen_crc(crc): 541 for j in range(8): 542 if crc & 1: 543 crc = (crc >> 1) ^ 0xEDB88320 544 else: 545 crc >>= 1 546 return crc 547 548# ZIP supports a password-based form of encryption. Even though known 549# plaintext attacks have been found against it, it is still useful 550# to be able to get data out of such a file. 551# 552# Usage: 553# zd = _ZipDecrypter(mypwd) 554# plain_bytes = zd(cypher_bytes) 555 556def _ZipDecrypter(pwd): 557 key0 = 305419896 558 key1 = 591751049 559 key2 = 878082192 560 561 global _crctable 562 if _crctable is None: 563 _crctable = list(map(_gen_crc, range(256))) 564 crctable = _crctable 565 566 def crc32(ch, crc): 567 """Compute the CRC32 primitive on one byte.""" 568 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] 569 570 def update_keys(c): 571 nonlocal key0, key1, key2 572 key0 = crc32(c, key0) 573 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF 574 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF 575 key2 = crc32(key1 >> 24, key2) 576 577 for p in pwd: 578 update_keys(p) 579 580 def decrypter(data): 581 """Decrypt a bytes object.""" 582 result = bytearray() 583 append = result.append 584 for c in data: 585 k = key2 | 2 586 c ^= ((k * (k^1)) >> 8) & 0xFF 587 update_keys(c) 588 append(c) 589 return bytes(result) 590 591 return decrypter 592 593 594class LZMACompressor: 595 596 def __init__(self): 597 self._comp = None 598 599 def _init(self): 600 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1}) 601 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ 602 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props) 603 ]) 604 return struct.pack('<BBH', 9, 4, len(props)) + props 605 606 def compress(self, data): 607 if self._comp is None: 608 return self._init() + self._comp.compress(data) 609 return self._comp.compress(data) 610 611 def flush(self): 612 if self._comp is None: 613 return self._init() + self._comp.flush() 614 return self._comp.flush() 615 616 617class LZMADecompressor: 618 619 def __init__(self): 620 self._decomp = None 621 self._unconsumed = b'' 622 self.eof = False 623 624 def decompress(self, data): 625 if self._decomp is None: 626 self._unconsumed += data 627 if len(self._unconsumed) <= 4: 628 return b'' 629 psize, = struct.unpack('<H', self._unconsumed[2:4]) 630 if len(self._unconsumed) <= 4 + psize: 631 return b'' 632 633 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[ 634 lzma._decode_filter_properties(lzma.FILTER_LZMA1, 635 self._unconsumed[4:4 + psize]) 636 ]) 637 data = self._unconsumed[4 + psize:] 638 del self._unconsumed 639 640 result = self._decomp.decompress(data) 641 self.eof = self._decomp.eof 642 return result 643 644 645compressor_names = { 646 0: 'store', 647 1: 'shrink', 648 2: 'reduce', 649 3: 'reduce', 650 4: 'reduce', 651 5: 'reduce', 652 6: 'implode', 653 7: 'tokenize', 654 8: 'deflate', 655 9: 'deflate64', 656 10: 'implode', 657 12: 'bzip2', 658 14: 'lzma', 659 18: 'terse', 660 19: 'lz77', 661 97: 'wavpack', 662 98: 'ppmd', 663} 664 665def _check_compression(compression): 666 if compression == ZIP_STORED: 667 pass 668 elif compression == ZIP_DEFLATED: 669 if not zlib: 670 raise RuntimeError( 671 "Compression requires the (missing) zlib module") 672 elif compression == ZIP_BZIP2: 673 if not bz2: 674 raise RuntimeError( 675 "Compression requires the (missing) bz2 module") 676 elif compression == ZIP_LZMA: 677 if not lzma: 678 raise RuntimeError( 679 "Compression requires the (missing) lzma module") 680 else: 681 raise NotImplementedError("That compression method is not supported") 682 683 684def _get_compressor(compress_type, compresslevel=None): 685 if compress_type == ZIP_DEFLATED: 686 if compresslevel is not None: 687 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15) 688 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) 689 elif compress_type == ZIP_BZIP2: 690 if compresslevel is not None: 691 return bz2.BZ2Compressor(compresslevel) 692 return bz2.BZ2Compressor() 693 # compresslevel is ignored for ZIP_LZMA 694 elif compress_type == ZIP_LZMA: 695 return LZMACompressor() 696 else: 697 return None 698 699 700def _get_decompressor(compress_type): 701 _check_compression(compress_type) 702 if compress_type == ZIP_STORED: 703 return None 704 elif compress_type == ZIP_DEFLATED: 705 return zlib.decompressobj(-15) 706 elif compress_type == ZIP_BZIP2: 707 return bz2.BZ2Decompressor() 708 elif compress_type == ZIP_LZMA: 709 return LZMADecompressor() 710 else: 711 descr = compressor_names.get(compress_type) 712 if descr: 713 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr)) 714 else: 715 raise NotImplementedError("compression type %d" % (compress_type,)) 716 717 718class _SharedFile: 719 def __init__(self, file, pos, close, lock, writing): 720 self._file = file 721 self._pos = pos 722 self._close = close 723 self._lock = lock 724 self._writing = writing 725 self.seekable = file.seekable 726 self.tell = file.tell 727 728 def seek(self, offset, whence=0): 729 with self._lock: 730 if self._writing(): 731 raise ValueError("Can't reposition in the ZIP file while " 732 "there is an open writing handle on it. " 733 "Close the writing handle before trying to read.") 734 self._file.seek(offset, whence) 735 self._pos = self._file.tell() 736 return self._pos 737 738 def read(self, n=-1): 739 with self._lock: 740 if self._writing(): 741 raise ValueError("Can't read from the ZIP file while there " 742 "is an open writing handle on it. " 743 "Close the writing handle before trying to read.") 744 self._file.seek(self._pos) 745 data = self._file.read(n) 746 self._pos = self._file.tell() 747 return data 748 749 def close(self): 750 if self._file is not None: 751 fileobj = self._file 752 self._file = None 753 self._close(fileobj) 754 755# Provide the tell method for unseekable stream 756class _Tellable: 757 def __init__(self, fp): 758 self.fp = fp 759 self.offset = 0 760 761 def write(self, data): 762 n = self.fp.write(data) 763 self.offset += n 764 return n 765 766 def tell(self): 767 return self.offset 768 769 def flush(self): 770 self.fp.flush() 771 772 def close(self): 773 self.fp.close() 774 775 776class ZipExtFile(io.BufferedIOBase): 777 """File-like object for reading an archive member. 778 Is returned by ZipFile.open(). 779 """ 780 781 # Max size supported by decompressor. 782 MAX_N = 1 << 31 - 1 783 784 # Read from compressed files in 4k blocks. 785 MIN_READ_SIZE = 4096 786 787 # Chunk size to read during seek 788 MAX_SEEK_READ = 1 << 24 789 790 def __init__(self, fileobj, mode, zipinfo, pwd=None, 791 close_fileobj=False): 792 self._fileobj = fileobj 793 self._pwd = pwd 794 self._close_fileobj = close_fileobj 795 796 self._compress_type = zipinfo.compress_type 797 self._compress_left = zipinfo.compress_size 798 self._left = zipinfo.file_size 799 800 self._decompressor = _get_decompressor(self._compress_type) 801 802 self._eof = False 803 self._readbuffer = b'' 804 self._offset = 0 805 806 self.newlines = None 807 808 self.mode = mode 809 self.name = zipinfo.filename 810 811 if hasattr(zipinfo, 'CRC'): 812 self._expected_crc = zipinfo.CRC 813 self._running_crc = crc32(b'') 814 else: 815 self._expected_crc = None 816 817 self._seekable = False 818 try: 819 if fileobj.seekable(): 820 self._orig_compress_start = fileobj.tell() 821 self._orig_compress_size = zipinfo.compress_size 822 self._orig_file_size = zipinfo.file_size 823 self._orig_start_crc = self._running_crc 824 self._seekable = True 825 except AttributeError: 826 pass 827 828 self._decrypter = None 829 if pwd: 830 if zipinfo.flag_bits & 0x8: 831 # compare against the file type from extended local headers 832 check_byte = (zipinfo._raw_time >> 8) & 0xff 833 else: 834 # compare against the CRC otherwise 835 check_byte = (zipinfo.CRC >> 24) & 0xff 836 h = self._init_decrypter() 837 if h != check_byte: 838 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename) 839 840 841 def _init_decrypter(self): 842 self._decrypter = _ZipDecrypter(self._pwd) 843 # The first 12 bytes in the cypher stream is an encryption header 844 # used to strengthen the algorithm. The first 11 bytes are 845 # completely random, while the 12th contains the MSB of the CRC, 846 # or the MSB of the file time depending on the header type 847 # and is used to check the correctness of the password. 848 header = self._fileobj.read(12) 849 self._compress_left -= 12 850 return self._decrypter(header)[11] 851 852 def __repr__(self): 853 result = ['<%s.%s' % (self.__class__.__module__, 854 self.__class__.__qualname__)] 855 if not self.closed: 856 result.append(' name=%r mode=%r' % (self.name, self.mode)) 857 if self._compress_type != ZIP_STORED: 858 result.append(' compress_type=%s' % 859 compressor_names.get(self._compress_type, 860 self._compress_type)) 861 else: 862 result.append(' [closed]') 863 result.append('>') 864 return ''.join(result) 865 866 def readline(self, limit=-1): 867 """Read and return a line from the stream. 868 869 If limit is specified, at most limit bytes will be read. 870 """ 871 872 if limit < 0: 873 # Shortcut common case - newline found in buffer. 874 i = self._readbuffer.find(b'\n', self._offset) + 1 875 if i > 0: 876 line = self._readbuffer[self._offset: i] 877 self._offset = i 878 return line 879 880 return io.BufferedIOBase.readline(self, limit) 881 882 def peek(self, n=1): 883 """Returns buffered bytes without advancing the position.""" 884 if n > len(self._readbuffer) - self._offset: 885 chunk = self.read(n) 886 if len(chunk) > self._offset: 887 self._readbuffer = chunk + self._readbuffer[self._offset:] 888 self._offset = 0 889 else: 890 self._offset -= len(chunk) 891 892 # Return up to 512 bytes to reduce allocation overhead for tight loops. 893 return self._readbuffer[self._offset: self._offset + 512] 894 895 def readable(self): 896 if self.closed: 897 raise ValueError("I/O operation on closed file.") 898 return True 899 900 def read(self, n=-1): 901 """Read and return up to n bytes. 902 If the argument is omitted, None, or negative, data is read and returned until EOF is reached. 903 """ 904 if self.closed: 905 raise ValueError("read from closed file.") 906 if n is None or n < 0: 907 buf = self._readbuffer[self._offset:] 908 self._readbuffer = b'' 909 self._offset = 0 910 while not self._eof: 911 buf += self._read1(self.MAX_N) 912 return buf 913 914 end = n + self._offset 915 if end < len(self._readbuffer): 916 buf = self._readbuffer[self._offset:end] 917 self._offset = end 918 return buf 919 920 n = end - len(self._readbuffer) 921 buf = self._readbuffer[self._offset:] 922 self._readbuffer = b'' 923 self._offset = 0 924 while n > 0 and not self._eof: 925 data = self._read1(n) 926 if n < len(data): 927 self._readbuffer = data 928 self._offset = n 929 buf += data[:n] 930 break 931 buf += data 932 n -= len(data) 933 return buf 934 935 def _update_crc(self, newdata): 936 # Update the CRC using the given data. 937 if self._expected_crc is None: 938 # No need to compute the CRC if we don't have a reference value 939 return 940 self._running_crc = crc32(newdata, self._running_crc) 941 # Check the CRC if we're at the end of the file 942 if self._eof and self._running_crc != self._expected_crc: 943 raise BadZipFile("Bad CRC-32 for file %r" % self.name) 944 945 def read1(self, n): 946 """Read up to n bytes with at most one read() system call.""" 947 948 if n is None or n < 0: 949 buf = self._readbuffer[self._offset:] 950 self._readbuffer = b'' 951 self._offset = 0 952 while not self._eof: 953 data = self._read1(self.MAX_N) 954 if data: 955 buf += data 956 break 957 return buf 958 959 end = n + self._offset 960 if end < len(self._readbuffer): 961 buf = self._readbuffer[self._offset:end] 962 self._offset = end 963 return buf 964 965 n = end - len(self._readbuffer) 966 buf = self._readbuffer[self._offset:] 967 self._readbuffer = b'' 968 self._offset = 0 969 if n > 0: 970 while not self._eof: 971 data = self._read1(n) 972 if n < len(data): 973 self._readbuffer = data 974 self._offset = n 975 buf += data[:n] 976 break 977 if data: 978 buf += data 979 break 980 return buf 981 982 def _read1(self, n): 983 # Read up to n compressed bytes with at most one read() system call, 984 # decrypt and decompress them. 985 if self._eof or n <= 0: 986 return b'' 987 988 # Read from file. 989 if self._compress_type == ZIP_DEFLATED: 990 ## Handle unconsumed data. 991 data = self._decompressor.unconsumed_tail 992 if n > len(data): 993 data += self._read2(n - len(data)) 994 else: 995 data = self._read2(n) 996 997 if self._compress_type == ZIP_STORED: 998 self._eof = self._compress_left <= 0 999 elif self._compress_type == ZIP_DEFLATED: 1000 n = max(n, self.MIN_READ_SIZE) 1001 data = self._decompressor.decompress(data, n) 1002 self._eof = (self._decompressor.eof or 1003 self._compress_left <= 0 and 1004 not self._decompressor.unconsumed_tail) 1005 if self._eof: 1006 data += self._decompressor.flush() 1007 else: 1008 data = self._decompressor.decompress(data) 1009 self._eof = self._decompressor.eof or self._compress_left <= 0 1010 1011 data = data[:self._left] 1012 self._left -= len(data) 1013 if self._left <= 0: 1014 self._eof = True 1015 self._update_crc(data) 1016 return data 1017 1018 def _read2(self, n): 1019 if self._compress_left <= 0: 1020 return b'' 1021 1022 n = max(n, self.MIN_READ_SIZE) 1023 n = min(n, self._compress_left) 1024 1025 data = self._fileobj.read(n) 1026 self._compress_left -= len(data) 1027 if not data: 1028 raise EOFError 1029 1030 if self._decrypter is not None: 1031 data = self._decrypter(data) 1032 return data 1033 1034 def close(self): 1035 try: 1036 if self._close_fileobj: 1037 self._fileobj.close() 1038 finally: 1039 super().close() 1040 1041 def seekable(self): 1042 if self.closed: 1043 raise ValueError("I/O operation on closed file.") 1044 return self._seekable 1045 1046 def seek(self, offset, whence=0): 1047 if self.closed: 1048 raise ValueError("seek on closed file.") 1049 if not self._seekable: 1050 raise io.UnsupportedOperation("underlying stream is not seekable") 1051 curr_pos = self.tell() 1052 if whence == 0: # Seek from start of file 1053 new_pos = offset 1054 elif whence == 1: # Seek from current position 1055 new_pos = curr_pos + offset 1056 elif whence == 2: # Seek from EOF 1057 new_pos = self._orig_file_size + offset 1058 else: 1059 raise ValueError("whence must be os.SEEK_SET (0), " 1060 "os.SEEK_CUR (1), or os.SEEK_END (2)") 1061 1062 if new_pos > self._orig_file_size: 1063 new_pos = self._orig_file_size 1064 1065 if new_pos < 0: 1066 new_pos = 0 1067 1068 read_offset = new_pos - curr_pos 1069 buff_offset = read_offset + self._offset 1070 1071 if buff_offset >= 0 and buff_offset < len(self._readbuffer): 1072 # Just move the _offset index if the new position is in the _readbuffer 1073 self._offset = buff_offset 1074 read_offset = 0 1075 elif read_offset < 0: 1076 # Position is before the current position. Reset the ZipExtFile 1077 self._fileobj.seek(self._orig_compress_start) 1078 self._running_crc = self._orig_start_crc 1079 self._compress_left = self._orig_compress_size 1080 self._left = self._orig_file_size 1081 self._readbuffer = b'' 1082 self._offset = 0 1083 self._decompressor = _get_decompressor(self._compress_type) 1084 self._eof = False 1085 read_offset = new_pos 1086 if self._decrypter is not None: 1087 self._init_decrypter() 1088 1089 while read_offset > 0: 1090 read_len = min(self.MAX_SEEK_READ, read_offset) 1091 self.read(read_len) 1092 read_offset -= read_len 1093 1094 return self.tell() 1095 1096 def tell(self): 1097 if self.closed: 1098 raise ValueError("tell on closed file.") 1099 if not self._seekable: 1100 raise io.UnsupportedOperation("underlying stream is not seekable") 1101 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset 1102 return filepos 1103 1104 1105class _ZipWriteFile(io.BufferedIOBase): 1106 def __init__(self, zf, zinfo, zip64): 1107 self._zinfo = zinfo 1108 self._zip64 = zip64 1109 self._zipfile = zf 1110 self._compressor = _get_compressor(zinfo.compress_type, 1111 zinfo._compresslevel) 1112 self._file_size = 0 1113 self._compress_size = 0 1114 self._crc = 0 1115 1116 @property 1117 def _fileobj(self): 1118 return self._zipfile.fp 1119 1120 def writable(self): 1121 return True 1122 1123 def write(self, data): 1124 if self.closed: 1125 raise ValueError('I/O operation on closed file.') 1126 nbytes = len(data) 1127 self._file_size += nbytes 1128 self._crc = crc32(data, self._crc) 1129 if self._compressor: 1130 data = self._compressor.compress(data) 1131 self._compress_size += len(data) 1132 self._fileobj.write(data) 1133 return nbytes 1134 1135 def close(self): 1136 if self.closed: 1137 return 1138 try: 1139 super().close() 1140 # Flush any data from the compressor, and update header info 1141 if self._compressor: 1142 buf = self._compressor.flush() 1143 self._compress_size += len(buf) 1144 self._fileobj.write(buf) 1145 self._zinfo.compress_size = self._compress_size 1146 else: 1147 self._zinfo.compress_size = self._file_size 1148 self._zinfo.CRC = self._crc 1149 self._zinfo.file_size = self._file_size 1150 1151 # Write updated header info 1152 if self._zinfo.flag_bits & 0x08: 1153 # Write CRC and file sizes after the file data 1154 fmt = '<LLQQ' if self._zip64 else '<LLLL' 1155 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC, 1156 self._zinfo.compress_size, self._zinfo.file_size)) 1157 self._zipfile.start_dir = self._fileobj.tell() 1158 else: 1159 if not self._zip64: 1160 if self._file_size > ZIP64_LIMIT: 1161 raise RuntimeError( 1162 'File size unexpectedly exceeded ZIP64 limit') 1163 if self._compress_size > ZIP64_LIMIT: 1164 raise RuntimeError( 1165 'Compressed size unexpectedly exceeded ZIP64 limit') 1166 # Seek backwards and write file header (which will now include 1167 # correct CRC and file sizes) 1168 1169 # Preserve current position in file 1170 self._zipfile.start_dir = self._fileobj.tell() 1171 self._fileobj.seek(self._zinfo.header_offset) 1172 self._fileobj.write(self._zinfo.FileHeader(self._zip64)) 1173 self._fileobj.seek(self._zipfile.start_dir) 1174 1175 # Successfully written: Add file to our caches 1176 self._zipfile.filelist.append(self._zinfo) 1177 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo 1178 finally: 1179 self._zipfile._writing = False 1180 1181 1182 1183class ZipFile: 1184 """ Class with methods to open, read, write, close, list zip files. 1185 1186 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, 1187 compresslevel=None) 1188 1189 file: Either the path to the file, or a file-like object. 1190 If it is a path, the file will be opened and closed by ZipFile. 1191 mode: The mode can be either read 'r', write 'w', exclusive create 'x', 1192 or append 'a'. 1193 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), 1194 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). 1195 allowZip64: if True ZipFile will create files with ZIP64 extensions when 1196 needed, otherwise it will raise an exception when this would 1197 be necessary. 1198 compresslevel: None (default for the given compression type) or an integer 1199 specifying the level to pass to the compressor. 1200 When using ZIP_STORED or ZIP_LZMA this keyword has no effect. 1201 When using ZIP_DEFLATED integers 0 through 9 are accepted. 1202 When using ZIP_BZIP2 integers 1 through 9 are accepted. 1203 1204 """ 1205 1206 fp = None # Set here since __del__ checks it 1207 _windows_illegal_name_trans_table = None 1208 1209 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, 1210 compresslevel=None, *, strict_timestamps=True): 1211 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', 1212 or append 'a'.""" 1213 if mode not in ('r', 'w', 'x', 'a'): 1214 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") 1215 1216 _check_compression(compression) 1217 1218 self._allowZip64 = allowZip64 1219 self._didModify = False 1220 self.debug = 0 # Level of printing: 0 through 3 1221 self.NameToInfo = {} # Find file info given name 1222 self.filelist = [] # List of ZipInfo instances for archive 1223 self.compression = compression # Method of compression 1224 self.compresslevel = compresslevel 1225 self.mode = mode 1226 self.pwd = None 1227 self._comment = b'' 1228 self._strict_timestamps = strict_timestamps 1229 1230 # Check if we were passed a file-like object 1231 if isinstance(file, os.PathLike): 1232 file = os.fspath(file) 1233 if isinstance(file, str): 1234 # No, it's a filename 1235 self._filePassed = 0 1236 self.filename = file 1237 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b', 1238 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'} 1239 filemode = modeDict[mode] 1240 while True: 1241 try: 1242 self.fp = io.open(file, filemode) 1243 except OSError: 1244 if filemode in modeDict: 1245 filemode = modeDict[filemode] 1246 continue 1247 raise 1248 break 1249 else: 1250 self._filePassed = 1 1251 self.fp = file 1252 self.filename = getattr(file, 'name', None) 1253 self._fileRefCnt = 1 1254 self._lock = threading.RLock() 1255 self._seekable = True 1256 self._writing = False 1257 1258 try: 1259 if mode == 'r': 1260 self._RealGetContents() 1261 elif mode in ('w', 'x'): 1262 # set the modified flag so central directory gets written 1263 # even if no files are added to the archive 1264 self._didModify = True 1265 try: 1266 self.start_dir = self.fp.tell() 1267 except (AttributeError, OSError): 1268 self.fp = _Tellable(self.fp) 1269 self.start_dir = 0 1270 self._seekable = False 1271 else: 1272 # Some file-like objects can provide tell() but not seek() 1273 try: 1274 self.fp.seek(self.start_dir) 1275 except (AttributeError, OSError): 1276 self._seekable = False 1277 elif mode == 'a': 1278 try: 1279 # See if file is a zip file 1280 self._RealGetContents() 1281 # seek to start of directory and overwrite 1282 self.fp.seek(self.start_dir) 1283 except BadZipFile: 1284 # file is not a zip file, just append 1285 self.fp.seek(0, 2) 1286 1287 # set the modified flag so central directory gets written 1288 # even if no files are added to the archive 1289 self._didModify = True 1290 self.start_dir = self.fp.tell() 1291 else: 1292 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") 1293 except: 1294 fp = self.fp 1295 self.fp = None 1296 self._fpclose(fp) 1297 raise 1298 1299 def __enter__(self): 1300 return self 1301 1302 def __exit__(self, type, value, traceback): 1303 self.close() 1304 1305 def __repr__(self): 1306 result = ['<%s.%s' % (self.__class__.__module__, 1307 self.__class__.__qualname__)] 1308 if self.fp is not None: 1309 if self._filePassed: 1310 result.append(' file=%r' % self.fp) 1311 elif self.filename is not None: 1312 result.append(' filename=%r' % self.filename) 1313 result.append(' mode=%r' % self.mode) 1314 else: 1315 result.append(' [closed]') 1316 result.append('>') 1317 return ''.join(result) 1318 1319 def _RealGetContents(self): 1320 """Read in the table of contents for the ZIP file.""" 1321 fp = self.fp 1322 try: 1323 endrec = _EndRecData(fp) 1324 except OSError: 1325 raise BadZipFile("File is not a zip file") 1326 if not endrec: 1327 raise BadZipFile("File is not a zip file") 1328 if self.debug > 1: 1329 print(endrec) 1330 size_cd = endrec[_ECD_SIZE] # bytes in central directory 1331 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 1332 self._comment = endrec[_ECD_COMMENT] # archive comment 1333 1334 # "concat" is zero, unless zip was concatenated to another file 1335 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 1336 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 1337 # If Zip64 extension structures are present, account for them 1338 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 1339 1340 if self.debug > 2: 1341 inferred = concat + offset_cd 1342 print("given, inferred, offset", offset_cd, inferred, concat) 1343 # self.start_dir: Position of start of central directory 1344 self.start_dir = offset_cd + concat 1345 fp.seek(self.start_dir, 0) 1346 data = fp.read(size_cd) 1347 fp = io.BytesIO(data) 1348 total = 0 1349 while total < size_cd: 1350 centdir = fp.read(sizeCentralDir) 1351 if len(centdir) != sizeCentralDir: 1352 raise BadZipFile("Truncated central directory") 1353 centdir = struct.unpack(structCentralDir, centdir) 1354 if centdir[_CD_SIGNATURE] != stringCentralDir: 1355 raise BadZipFile("Bad magic number for central directory") 1356 if self.debug > 2: 1357 print(centdir) 1358 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 1359 flags = centdir[5] 1360 if flags & 0x800: 1361 # UTF-8 file names extension 1362 filename = filename.decode('utf-8') 1363 else: 1364 # Historical ZIP filename encoding 1365 filename = filename.decode('cp437') 1366 # Create ZipInfo instance to store file information 1367 x = ZipInfo(filename) 1368 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 1369 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 1370 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 1371 (x.create_version, x.create_system, x.extract_version, x.reserved, 1372 x.flag_bits, x.compress_type, t, d, 1373 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 1374 if x.extract_version > MAX_EXTRACT_VERSION: 1375 raise NotImplementedError("zip file version %.1f" % 1376 (x.extract_version / 10)) 1377 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 1378 # Convert date/time code to (year, month, day, hour, min, sec) 1379 x._raw_time = t 1380 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 1381 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 1382 1383 x._decodeExtra() 1384 x.header_offset = x.header_offset + concat 1385 self.filelist.append(x) 1386 self.NameToInfo[x.filename] = x 1387 1388 # update total bytes read from central directory 1389 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 1390 + centdir[_CD_EXTRA_FIELD_LENGTH] 1391 + centdir[_CD_COMMENT_LENGTH]) 1392 1393 if self.debug > 2: 1394 print("total", total) 1395 1396 end_offset = self.start_dir 1397 for zinfo in sorted(self.filelist, 1398 key=lambda zinfo: zinfo.header_offset, 1399 reverse=True): 1400 zinfo._end_offset = end_offset 1401 end_offset = zinfo.header_offset 1402 1403 def namelist(self): 1404 """Return a list of file names in the archive.""" 1405 return [data.filename for data in self.filelist] 1406 1407 def infolist(self): 1408 """Return a list of class ZipInfo instances for files in the 1409 archive.""" 1410 return self.filelist 1411 1412 def printdir(self, file=None): 1413 """Print a table of contents for the zip file.""" 1414 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), 1415 file=file) 1416 for zinfo in self.filelist: 1417 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 1418 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), 1419 file=file) 1420 1421 def testzip(self): 1422 """Read all the files and check the CRC.""" 1423 chunk_size = 2 ** 20 1424 for zinfo in self.filelist: 1425 try: 1426 # Read by chunks, to avoid an OverflowError or a 1427 # MemoryError with very large embedded files. 1428 with self.open(zinfo.filename, "r") as f: 1429 while f.read(chunk_size): # Check CRC-32 1430 pass 1431 except BadZipFile: 1432 return zinfo.filename 1433 1434 def getinfo(self, name): 1435 """Return the instance of ZipInfo given 'name'.""" 1436 info = self.NameToInfo.get(name) 1437 if info is None: 1438 raise KeyError( 1439 'There is no item named %r in the archive' % name) 1440 1441 return info 1442 1443 def setpassword(self, pwd): 1444 """Set default password for encrypted files.""" 1445 if pwd and not isinstance(pwd, bytes): 1446 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1447 if pwd: 1448 self.pwd = pwd 1449 else: 1450 self.pwd = None 1451 1452 @property 1453 def comment(self): 1454 """The comment text associated with the ZIP file.""" 1455 return self._comment 1456 1457 @comment.setter 1458 def comment(self, comment): 1459 if not isinstance(comment, bytes): 1460 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) 1461 # check for valid comment length 1462 if len(comment) > ZIP_MAX_COMMENT: 1463 import warnings 1464 warnings.warn('Archive comment is too long; truncating to %d bytes' 1465 % ZIP_MAX_COMMENT, stacklevel=2) 1466 comment = comment[:ZIP_MAX_COMMENT] 1467 self._comment = comment 1468 self._didModify = True 1469 1470 def read(self, name, pwd=None): 1471 """Return file bytes for name.""" 1472 with self.open(name, "r", pwd) as fp: 1473 return fp.read() 1474 1475 def open(self, name, mode="r", pwd=None, *, force_zip64=False): 1476 """Return file-like object for 'name'. 1477 1478 name is a string for the file name within the ZIP file, or a ZipInfo 1479 object. 1480 1481 mode should be 'r' to read a file already in the ZIP file, or 'w' to 1482 write to a file newly added to the archive. 1483 1484 pwd is the password to decrypt files (only used for reading). 1485 1486 When writing, if the file size is not known in advance but may exceed 1487 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large 1488 files. If the size is known in advance, it is best to pass a ZipInfo 1489 instance for name, with zinfo.file_size set. 1490 """ 1491 if mode not in {"r", "w"}: 1492 raise ValueError('open() requires mode "r" or "w"') 1493 if pwd and not isinstance(pwd, bytes): 1494 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1495 if pwd and (mode == "w"): 1496 raise ValueError("pwd is only supported for reading files") 1497 if not self.fp: 1498 raise ValueError( 1499 "Attempt to use ZIP archive that was already closed") 1500 1501 # Make sure we have an info object 1502 if isinstance(name, ZipInfo): 1503 # 'name' is already an info object 1504 zinfo = name 1505 elif mode == 'w': 1506 zinfo = ZipInfo(name) 1507 zinfo.compress_type = self.compression 1508 zinfo._compresslevel = self.compresslevel 1509 else: 1510 # Get info object for name 1511 zinfo = self.getinfo(name) 1512 1513 if mode == 'w': 1514 return self._open_to_write(zinfo, force_zip64=force_zip64) 1515 1516 if self._writing: 1517 raise ValueError("Can't read from the ZIP file while there " 1518 "is an open writing handle on it. " 1519 "Close the writing handle before trying to read.") 1520 1521 # Open for reading: 1522 self._fileRefCnt += 1 1523 zef_file = _SharedFile(self.fp, zinfo.header_offset, 1524 self._fpclose, self._lock, lambda: self._writing) 1525 try: 1526 # Skip the file header: 1527 fheader = zef_file.read(sizeFileHeader) 1528 if len(fheader) != sizeFileHeader: 1529 raise BadZipFile("Truncated file header") 1530 fheader = struct.unpack(structFileHeader, fheader) 1531 if fheader[_FH_SIGNATURE] != stringFileHeader: 1532 raise BadZipFile("Bad magic number for file header") 1533 1534 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 1535 if fheader[_FH_EXTRA_FIELD_LENGTH]: 1536 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 1537 1538 if zinfo.flag_bits & 0x20: 1539 # Zip 2.7: compressed patched data 1540 raise NotImplementedError("compressed patched data (flag bit 5)") 1541 1542 if zinfo.flag_bits & 0x40: 1543 # strong encryption 1544 raise NotImplementedError("strong encryption (flag bit 6)") 1545 1546 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800: 1547 # UTF-8 filename 1548 fname_str = fname.decode("utf-8") 1549 else: 1550 fname_str = fname.decode("cp437") 1551 1552 if fname_str != zinfo.orig_filename: 1553 raise BadZipFile( 1554 'File name in directory %r and header %r differ.' 1555 % (zinfo.orig_filename, fname)) 1556 1557 if (zinfo._end_offset is not None and 1558 zef_file.tell() + zinfo.compress_size > zinfo._end_offset): 1559 raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)") 1560 1561 # check for encrypted flag & handle password 1562 is_encrypted = zinfo.flag_bits & 0x1 1563 if is_encrypted: 1564 if not pwd: 1565 pwd = self.pwd 1566 if not pwd: 1567 raise RuntimeError("File %r is encrypted, password " 1568 "required for extraction" % name) 1569 else: 1570 pwd = None 1571 1572 return ZipExtFile(zef_file, mode, zinfo, pwd, True) 1573 except: 1574 zef_file.close() 1575 raise 1576 1577 def _open_to_write(self, zinfo, force_zip64=False): 1578 if force_zip64 and not self._allowZip64: 1579 raise ValueError( 1580 "force_zip64 is True, but allowZip64 was False when opening " 1581 "the ZIP file." 1582 ) 1583 if self._writing: 1584 raise ValueError("Can't write to the ZIP file while there is " 1585 "another write handle open on it. " 1586 "Close the first handle before opening another.") 1587 1588 # Size and CRC are overwritten with correct data after processing the file 1589 zinfo.compress_size = 0 1590 zinfo.CRC = 0 1591 1592 zinfo.flag_bits = 0x00 1593 if zinfo.compress_type == ZIP_LZMA: 1594 # Compressed data includes an end-of-stream (EOS) marker 1595 zinfo.flag_bits |= 0x02 1596 if not self._seekable: 1597 zinfo.flag_bits |= 0x08 1598 1599 if not zinfo.external_attr: 1600 zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- 1601 1602 # Compressed size can be larger than uncompressed size 1603 zip64 = self._allowZip64 and \ 1604 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT) 1605 1606 if self._seekable: 1607 self.fp.seek(self.start_dir) 1608 zinfo.header_offset = self.fp.tell() 1609 1610 self._writecheck(zinfo) 1611 self._didModify = True 1612 1613 self.fp.write(zinfo.FileHeader(zip64)) 1614 1615 self._writing = True 1616 return _ZipWriteFile(self, zinfo, zip64) 1617 1618 def extract(self, member, path=None, pwd=None): 1619 """Extract a member from the archive to the current working directory, 1620 using its full name. Its file information is extracted as accurately 1621 as possible. `member' may be a filename or a ZipInfo object. You can 1622 specify a different directory using `path'. 1623 """ 1624 if path is None: 1625 path = os.getcwd() 1626 else: 1627 path = os.fspath(path) 1628 1629 return self._extract_member(member, path, pwd) 1630 1631 def extractall(self, path=None, members=None, pwd=None): 1632 """Extract all members from the archive to the current working 1633 directory. `path' specifies a different directory to extract to. 1634 `members' is optional and must be a subset of the list returned 1635 by namelist(). 1636 """ 1637 if members is None: 1638 members = self.namelist() 1639 1640 if path is None: 1641 path = os.getcwd() 1642 else: 1643 path = os.fspath(path) 1644 1645 for zipinfo in members: 1646 self._extract_member(zipinfo, path, pwd) 1647 1648 @classmethod 1649 def _sanitize_windows_name(cls, arcname, pathsep): 1650 """Replace bad characters and remove trailing dots from parts.""" 1651 table = cls._windows_illegal_name_trans_table 1652 if not table: 1653 illegal = ':<>|"?*' 1654 table = str.maketrans(illegal, '_' * len(illegal)) 1655 cls._windows_illegal_name_trans_table = table 1656 arcname = arcname.translate(table) 1657 # remove trailing dots 1658 arcname = (x.rstrip('.') for x in arcname.split(pathsep)) 1659 # rejoin, removing empty parts. 1660 arcname = pathsep.join(x for x in arcname if x) 1661 return arcname 1662 1663 def _extract_member(self, member, targetpath, pwd): 1664 """Extract the ZipInfo object 'member' to a physical 1665 file on the path targetpath. 1666 """ 1667 if not isinstance(member, ZipInfo): 1668 member = self.getinfo(member) 1669 1670 # build the destination pathname, replacing 1671 # forward slashes to platform specific separators. 1672 arcname = member.filename.replace('/', os.path.sep) 1673 1674 if os.path.altsep: 1675 arcname = arcname.replace(os.path.altsep, os.path.sep) 1676 # interpret absolute pathname as relative, remove drive letter or 1677 # UNC path, redundant separators, "." and ".." components. 1678 arcname = os.path.splitdrive(arcname)[1] 1679 invalid_path_parts = ('', os.path.curdir, os.path.pardir) 1680 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1681 if x not in invalid_path_parts) 1682 if os.path.sep == '\\': 1683 # filter illegal characters on Windows 1684 arcname = self._sanitize_windows_name(arcname, os.path.sep) 1685 1686 targetpath = os.path.join(targetpath, arcname) 1687 targetpath = os.path.normpath(targetpath) 1688 1689 # Create all upper directories if necessary. 1690 upperdirs = os.path.dirname(targetpath) 1691 if upperdirs and not os.path.exists(upperdirs): 1692 os.makedirs(upperdirs) 1693 1694 if member.is_dir(): 1695 if not os.path.isdir(targetpath): 1696 os.mkdir(targetpath) 1697 return targetpath 1698 1699 with self.open(member, pwd=pwd) as source, \ 1700 open(targetpath, "wb") as target: 1701 shutil.copyfileobj(source, target) 1702 1703 return targetpath 1704 1705 def _writecheck(self, zinfo): 1706 """Check for errors before writing a file to the archive.""" 1707 if zinfo.filename in self.NameToInfo: 1708 import warnings 1709 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) 1710 if self.mode not in ('w', 'x', 'a'): 1711 raise ValueError("write() requires mode 'w', 'x', or 'a'") 1712 if not self.fp: 1713 raise ValueError( 1714 "Attempt to write ZIP archive that was already closed") 1715 _check_compression(zinfo.compress_type) 1716 if not self._allowZip64: 1717 requires_zip64 = None 1718 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT: 1719 requires_zip64 = "Files count" 1720 elif zinfo.file_size > ZIP64_LIMIT: 1721 requires_zip64 = "Filesize" 1722 elif zinfo.header_offset > ZIP64_LIMIT: 1723 requires_zip64 = "Zipfile size" 1724 if requires_zip64: 1725 raise LargeZipFile(requires_zip64 + 1726 " would require ZIP64 extensions") 1727 1728 def write(self, filename, arcname=None, 1729 compress_type=None, compresslevel=None): 1730 """Put the bytes from filename into the archive under the name 1731 arcname.""" 1732 if not self.fp: 1733 raise ValueError( 1734 "Attempt to write to ZIP archive that was already closed") 1735 if self._writing: 1736 raise ValueError( 1737 "Can't write to ZIP archive while an open writing handle exists" 1738 ) 1739 1740 zinfo = ZipInfo.from_file(filename, arcname, 1741 strict_timestamps=self._strict_timestamps) 1742 1743 if zinfo.is_dir(): 1744 zinfo.compress_size = 0 1745 zinfo.CRC = 0 1746 else: 1747 if compress_type is not None: 1748 zinfo.compress_type = compress_type 1749 else: 1750 zinfo.compress_type = self.compression 1751 1752 if compresslevel is not None: 1753 zinfo._compresslevel = compresslevel 1754 else: 1755 zinfo._compresslevel = self.compresslevel 1756 1757 if zinfo.is_dir(): 1758 with self._lock: 1759 if self._seekable: 1760 self.fp.seek(self.start_dir) 1761 zinfo.header_offset = self.fp.tell() # Start of header bytes 1762 if zinfo.compress_type == ZIP_LZMA: 1763 # Compressed data includes an end-of-stream (EOS) marker 1764 zinfo.flag_bits |= 0x02 1765 1766 self._writecheck(zinfo) 1767 self._didModify = True 1768 1769 self.filelist.append(zinfo) 1770 self.NameToInfo[zinfo.filename] = zinfo 1771 self.fp.write(zinfo.FileHeader(False)) 1772 self.start_dir = self.fp.tell() 1773 else: 1774 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: 1775 shutil.copyfileobj(src, dest, 1024*8) 1776 1777 def writestr(self, zinfo_or_arcname, data, 1778 compress_type=None, compresslevel=None): 1779 """Write a file into the archive. The contents is 'data', which 1780 may be either a 'str' or a 'bytes' instance; if it is a 'str', 1781 it is encoded as UTF-8 first. 1782 'zinfo_or_arcname' is either a ZipInfo instance or 1783 the name of the file in the archive.""" 1784 if isinstance(data, str): 1785 data = data.encode("utf-8") 1786 if not isinstance(zinfo_or_arcname, ZipInfo): 1787 zinfo = ZipInfo(filename=zinfo_or_arcname, 1788 date_time=time.localtime(time.time())[:6]) 1789 zinfo.compress_type = self.compression 1790 zinfo._compresslevel = self.compresslevel 1791 if zinfo.filename[-1] == '/': 1792 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x 1793 zinfo.external_attr |= 0x10 # MS-DOS directory flag 1794 else: 1795 zinfo.external_attr = 0o600 << 16 # ?rw------- 1796 else: 1797 zinfo = zinfo_or_arcname 1798 1799 if not self.fp: 1800 raise ValueError( 1801 "Attempt to write to ZIP archive that was already closed") 1802 if self._writing: 1803 raise ValueError( 1804 "Can't write to ZIP archive while an open writing handle exists." 1805 ) 1806 1807 if compress_type is not None: 1808 zinfo.compress_type = compress_type 1809 1810 if compresslevel is not None: 1811 zinfo._compresslevel = compresslevel 1812 1813 zinfo.file_size = len(data) # Uncompressed size 1814 with self._lock: 1815 with self.open(zinfo, mode='w') as dest: 1816 dest.write(data) 1817 1818 def __del__(self): 1819 """Call the "close()" method in case the user forgot.""" 1820 self.close() 1821 1822 def close(self): 1823 """Close the file, and for mode 'w', 'x' and 'a' write the ending 1824 records.""" 1825 if self.fp is None: 1826 return 1827 1828 if self._writing: 1829 raise ValueError("Can't close the ZIP file while there is " 1830 "an open writing handle on it. " 1831 "Close the writing handle before closing the zip.") 1832 1833 try: 1834 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records 1835 with self._lock: 1836 if self._seekable: 1837 self.fp.seek(self.start_dir) 1838 self._write_end_record() 1839 finally: 1840 fp = self.fp 1841 self.fp = None 1842 self._fpclose(fp) 1843 1844 def _write_end_record(self): 1845 for zinfo in self.filelist: # write central directory 1846 dt = zinfo.date_time 1847 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1848 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1849 extra = [] 1850 if zinfo.file_size > ZIP64_LIMIT \ 1851 or zinfo.compress_size > ZIP64_LIMIT: 1852 extra.append(zinfo.file_size) 1853 extra.append(zinfo.compress_size) 1854 file_size = 0xffffffff 1855 compress_size = 0xffffffff 1856 else: 1857 file_size = zinfo.file_size 1858 compress_size = zinfo.compress_size 1859 1860 if zinfo.header_offset > ZIP64_LIMIT: 1861 extra.append(zinfo.header_offset) 1862 header_offset = 0xffffffff 1863 else: 1864 header_offset = zinfo.header_offset 1865 1866 extra_data = zinfo.extra 1867 min_version = 0 1868 if extra: 1869 # Append a ZIP64 field to the extra's 1870 extra_data = _strip_extra(extra_data, (1,)) 1871 extra_data = struct.pack( 1872 '<HH' + 'Q'*len(extra), 1873 1, 8*len(extra), *extra) + extra_data 1874 1875 min_version = ZIP64_VERSION 1876 1877 if zinfo.compress_type == ZIP_BZIP2: 1878 min_version = max(BZIP2_VERSION, min_version) 1879 elif zinfo.compress_type == ZIP_LZMA: 1880 min_version = max(LZMA_VERSION, min_version) 1881 1882 extract_version = max(min_version, zinfo.extract_version) 1883 create_version = max(min_version, zinfo.create_version) 1884 filename, flag_bits = zinfo._encodeFilenameFlags() 1885 centdir = struct.pack(structCentralDir, 1886 stringCentralDir, create_version, 1887 zinfo.create_system, extract_version, zinfo.reserved, 1888 flag_bits, zinfo.compress_type, dostime, dosdate, 1889 zinfo.CRC, compress_size, file_size, 1890 len(filename), len(extra_data), len(zinfo.comment), 1891 0, zinfo.internal_attr, zinfo.external_attr, 1892 header_offset) 1893 self.fp.write(centdir) 1894 self.fp.write(filename) 1895 self.fp.write(extra_data) 1896 self.fp.write(zinfo.comment) 1897 1898 pos2 = self.fp.tell() 1899 # Write end-of-zip-archive record 1900 centDirCount = len(self.filelist) 1901 centDirSize = pos2 - self.start_dir 1902 centDirOffset = self.start_dir 1903 requires_zip64 = None 1904 if centDirCount > ZIP_FILECOUNT_LIMIT: 1905 requires_zip64 = "Files count" 1906 elif centDirOffset > ZIP64_LIMIT: 1907 requires_zip64 = "Central directory offset" 1908 elif centDirSize > ZIP64_LIMIT: 1909 requires_zip64 = "Central directory size" 1910 if requires_zip64: 1911 # Need to write the ZIP64 end-of-archive records 1912 if not self._allowZip64: 1913 raise LargeZipFile(requires_zip64 + 1914 " would require ZIP64 extensions") 1915 zip64endrec = struct.pack( 1916 structEndArchive64, stringEndArchive64, 1917 44, 45, 45, 0, 0, centDirCount, centDirCount, 1918 centDirSize, centDirOffset) 1919 self.fp.write(zip64endrec) 1920 1921 zip64locrec = struct.pack( 1922 structEndArchive64Locator, 1923 stringEndArchive64Locator, 0, pos2, 1) 1924 self.fp.write(zip64locrec) 1925 centDirCount = min(centDirCount, 0xFFFF) 1926 centDirSize = min(centDirSize, 0xFFFFFFFF) 1927 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1928 1929 endrec = struct.pack(structEndArchive, stringEndArchive, 1930 0, 0, centDirCount, centDirCount, 1931 centDirSize, centDirOffset, len(self._comment)) 1932 self.fp.write(endrec) 1933 self.fp.write(self._comment) 1934 if self.mode == "a": 1935 self.fp.truncate() 1936 self.fp.flush() 1937 1938 def _fpclose(self, fp): 1939 assert self._fileRefCnt > 0 1940 self._fileRefCnt -= 1 1941 if not self._fileRefCnt and not self._filePassed: 1942 fp.close() 1943 1944 1945class PyZipFile(ZipFile): 1946 """Class to create ZIP archives with Python library files and packages.""" 1947 1948 def __init__(self, file, mode="r", compression=ZIP_STORED, 1949 allowZip64=True, optimize=-1): 1950 ZipFile.__init__(self, file, mode=mode, compression=compression, 1951 allowZip64=allowZip64) 1952 self._optimize = optimize 1953 1954 def writepy(self, pathname, basename="", filterfunc=None): 1955 """Add all files from "pathname" to the ZIP archive. 1956 1957 If pathname is a package directory, search the directory and 1958 all package subdirectories recursively for all *.py and enter 1959 the modules into the archive. If pathname is a plain 1960 directory, listdir *.py and enter all modules. Else, pathname 1961 must be a Python *.py file and the module will be put into the 1962 archive. Added modules are always module.pyc. 1963 This method will compile the module.py into module.pyc if 1964 necessary. 1965 If filterfunc(pathname) is given, it is called with every argument. 1966 When it is False, the file or directory is skipped. 1967 """ 1968 pathname = os.fspath(pathname) 1969 if filterfunc and not filterfunc(pathname): 1970 if self.debug: 1971 label = 'path' if os.path.isdir(pathname) else 'file' 1972 print('%s %r skipped by filterfunc' % (label, pathname)) 1973 return 1974 dir, name = os.path.split(pathname) 1975 if os.path.isdir(pathname): 1976 initname = os.path.join(pathname, "__init__.py") 1977 if os.path.isfile(initname): 1978 # This is a package directory, add it 1979 if basename: 1980 basename = "%s/%s" % (basename, name) 1981 else: 1982 basename = name 1983 if self.debug: 1984 print("Adding package in", pathname, "as", basename) 1985 fname, arcname = self._get_codename(initname[0:-3], basename) 1986 if self.debug: 1987 print("Adding", arcname) 1988 self.write(fname, arcname) 1989 dirlist = sorted(os.listdir(pathname)) 1990 dirlist.remove("__init__.py") 1991 # Add all *.py files and package subdirectories 1992 for filename in dirlist: 1993 path = os.path.join(pathname, filename) 1994 root, ext = os.path.splitext(filename) 1995 if os.path.isdir(path): 1996 if os.path.isfile(os.path.join(path, "__init__.py")): 1997 # This is a package directory, add it 1998 self.writepy(path, basename, 1999 filterfunc=filterfunc) # Recursive call 2000 elif ext == ".py": 2001 if filterfunc and not filterfunc(path): 2002 if self.debug: 2003 print('file %r skipped by filterfunc' % path) 2004 continue 2005 fname, arcname = self._get_codename(path[0:-3], 2006 basename) 2007 if self.debug: 2008 print("Adding", arcname) 2009 self.write(fname, arcname) 2010 else: 2011 # This is NOT a package directory, add its files at top level 2012 if self.debug: 2013 print("Adding files from directory", pathname) 2014 for filename in sorted(os.listdir(pathname)): 2015 path = os.path.join(pathname, filename) 2016 root, ext = os.path.splitext(filename) 2017 if ext == ".py": 2018 if filterfunc and not filterfunc(path): 2019 if self.debug: 2020 print('file %r skipped by filterfunc' % path) 2021 continue 2022 fname, arcname = self._get_codename(path[0:-3], 2023 basename) 2024 if self.debug: 2025 print("Adding", arcname) 2026 self.write(fname, arcname) 2027 else: 2028 if pathname[-3:] != ".py": 2029 raise RuntimeError( 2030 'Files added with writepy() must end with ".py"') 2031 fname, arcname = self._get_codename(pathname[0:-3], basename) 2032 if self.debug: 2033 print("Adding file", arcname) 2034 self.write(fname, arcname) 2035 2036 def _get_codename(self, pathname, basename): 2037 """Return (filename, archivename) for the path. 2038 2039 Given a module name path, return the correct file path and 2040 archive name, compiling if necessary. For example, given 2041 /python/lib/string, return (/python/lib/string.pyc, string). 2042 """ 2043 def _compile(file, optimize=-1): 2044 import py_compile 2045 if self.debug: 2046 print("Compiling", file) 2047 try: 2048 py_compile.compile(file, doraise=True, optimize=optimize) 2049 except py_compile.PyCompileError as err: 2050 print(err.msg) 2051 return False 2052 return True 2053 2054 file_py = pathname + ".py" 2055 file_pyc = pathname + ".pyc" 2056 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='') 2057 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1) 2058 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2) 2059 if self._optimize == -1: 2060 # legacy mode: use whatever file is present 2061 if (os.path.isfile(file_pyc) and 2062 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): 2063 # Use .pyc file. 2064 arcname = fname = file_pyc 2065 elif (os.path.isfile(pycache_opt0) and 2066 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): 2067 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2068 # file name in the archive. 2069 fname = pycache_opt0 2070 arcname = file_pyc 2071 elif (os.path.isfile(pycache_opt1) and 2072 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): 2073 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2074 # file name in the archive. 2075 fname = pycache_opt1 2076 arcname = file_pyc 2077 elif (os.path.isfile(pycache_opt2) and 2078 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): 2079 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2080 # file name in the archive. 2081 fname = pycache_opt2 2082 arcname = file_pyc 2083 else: 2084 # Compile py into PEP 3147 pyc file. 2085 if _compile(file_py): 2086 if sys.flags.optimize == 0: 2087 fname = pycache_opt0 2088 elif sys.flags.optimize == 1: 2089 fname = pycache_opt1 2090 else: 2091 fname = pycache_opt2 2092 arcname = file_pyc 2093 else: 2094 fname = arcname = file_py 2095 else: 2096 # new mode: use given optimization level 2097 if self._optimize == 0: 2098 fname = pycache_opt0 2099 arcname = file_pyc 2100 else: 2101 arcname = file_pyc 2102 if self._optimize == 1: 2103 fname = pycache_opt1 2104 elif self._optimize == 2: 2105 fname = pycache_opt2 2106 else: 2107 msg = "invalid value for 'optimize': {!r}".format(self._optimize) 2108 raise ValueError(msg) 2109 if not (os.path.isfile(fname) and 2110 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): 2111 if not _compile(file_py, optimize=self._optimize): 2112 fname = arcname = file_py 2113 archivename = os.path.split(arcname)[1] 2114 if basename: 2115 archivename = "%s/%s" % (basename, archivename) 2116 return (fname, archivename) 2117 2118 2119def _parents(path): 2120 """ 2121 Given a path with elements separated by 2122 posixpath.sep, generate all parents of that path. 2123 2124 >>> list(_parents('b/d')) 2125 ['b'] 2126 >>> list(_parents('/b/d/')) 2127 ['/b'] 2128 >>> list(_parents('b/d/f/')) 2129 ['b/d', 'b'] 2130 >>> list(_parents('b')) 2131 [] 2132 >>> list(_parents('')) 2133 [] 2134 """ 2135 return itertools.islice(_ancestry(path), 1, None) 2136 2137 2138def _ancestry(path): 2139 """ 2140 Given a path with elements separated by 2141 posixpath.sep, generate all elements of that path 2142 2143 >>> list(_ancestry('b/d')) 2144 ['b/d', 'b'] 2145 >>> list(_ancestry('/b/d/')) 2146 ['/b/d', '/b'] 2147 >>> list(_ancestry('b/d/f/')) 2148 ['b/d/f', 'b/d', 'b'] 2149 >>> list(_ancestry('b')) 2150 ['b'] 2151 >>> list(_ancestry('')) 2152 [] 2153 """ 2154 path = path.rstrip(posixpath.sep) 2155 while path and path != posixpath.sep: 2156 yield path 2157 path, tail = posixpath.split(path) 2158 2159 2160_dedupe = dict.fromkeys 2161"""Deduplicate an iterable in original order""" 2162 2163 2164def _difference(minuend, subtrahend): 2165 """ 2166 Return items in minuend not in subtrahend, retaining order 2167 with O(1) lookup. 2168 """ 2169 return itertools.filterfalse(set(subtrahend).__contains__, minuend) 2170 2171 2172class CompleteDirs(ZipFile): 2173 """ 2174 A ZipFile subclass that ensures that implied directories 2175 are always included in the namelist. 2176 """ 2177 2178 @staticmethod 2179 def _implied_dirs(names): 2180 parents = itertools.chain.from_iterable(map(_parents, names)) 2181 as_dirs = (p + posixpath.sep for p in parents) 2182 return _dedupe(_difference(as_dirs, names)) 2183 2184 def namelist(self): 2185 names = super(CompleteDirs, self).namelist() 2186 return names + list(self._implied_dirs(names)) 2187 2188 def _name_set(self): 2189 return set(self.namelist()) 2190 2191 def resolve_dir(self, name): 2192 """ 2193 If the name represents a directory, return that name 2194 as a directory (with the trailing slash). 2195 """ 2196 names = self._name_set() 2197 dirname = name + '/' 2198 dir_match = name not in names and dirname in names 2199 return dirname if dir_match else name 2200 2201 @classmethod 2202 def make(cls, source): 2203 """ 2204 Given a source (filename or zipfile), return an 2205 appropriate CompleteDirs subclass. 2206 """ 2207 if isinstance(source, CompleteDirs): 2208 return source 2209 2210 if not isinstance(source, ZipFile): 2211 return cls(source) 2212 2213 # Only allow for FastLookup when supplied zipfile is read-only 2214 if 'r' not in source.mode: 2215 cls = CompleteDirs 2216 2217 source.__class__ = cls 2218 return source 2219 2220 2221class FastLookup(CompleteDirs): 2222 """ 2223 ZipFile subclass to ensure implicit 2224 dirs exist and are resolved rapidly. 2225 """ 2226 2227 def namelist(self): 2228 with contextlib.suppress(AttributeError): 2229 return self.__names 2230 self.__names = super(FastLookup, self).namelist() 2231 return self.__names 2232 2233 def _name_set(self): 2234 with contextlib.suppress(AttributeError): 2235 return self.__lookup 2236 self.__lookup = super(FastLookup, self)._name_set() 2237 return self.__lookup 2238 2239 2240class Path: 2241 """ 2242 A pathlib-compatible interface for zip files. 2243 2244 Consider a zip file with this structure:: 2245 2246 . 2247 ├── a.txt 2248 └── b 2249 ├── c.txt 2250 └── d 2251 └── e.txt 2252 2253 >>> data = io.BytesIO() 2254 >>> zf = ZipFile(data, 'w') 2255 >>> zf.writestr('a.txt', 'content of a') 2256 >>> zf.writestr('b/c.txt', 'content of c') 2257 >>> zf.writestr('b/d/e.txt', 'content of e') 2258 >>> zf.filename = 'mem/abcde.zip' 2259 2260 Path accepts the zipfile object itself or a filename 2261 2262 >>> root = Path(zf) 2263 2264 From there, several path operations are available. 2265 2266 Directory iteration (including the zip file itself): 2267 2268 >>> a, b = root.iterdir() 2269 >>> a 2270 Path('mem/abcde.zip', 'a.txt') 2271 >>> b 2272 Path('mem/abcde.zip', 'b/') 2273 2274 name property: 2275 2276 >>> b.name 2277 'b' 2278 2279 join with divide operator: 2280 2281 >>> c = b / 'c.txt' 2282 >>> c 2283 Path('mem/abcde.zip', 'b/c.txt') 2284 >>> c.name 2285 'c.txt' 2286 2287 Read text: 2288 2289 >>> c.read_text() 2290 'content of c' 2291 2292 existence: 2293 2294 >>> c.exists() 2295 True 2296 >>> (b / 'missing.txt').exists() 2297 False 2298 2299 Coercion to string: 2300 2301 >>> import os 2302 >>> str(c).replace(os.sep, posixpath.sep) 2303 'mem/abcde.zip/b/c.txt' 2304 2305 At the root, ``name``, ``filename``, and ``parent`` 2306 resolve to the zipfile. Note these attributes are not 2307 valid and will raise a ``ValueError`` if the zipfile 2308 has no filename. 2309 2310 >>> root.name 2311 'abcde.zip' 2312 >>> str(root.filename).replace(os.sep, posixpath.sep) 2313 'mem/abcde.zip' 2314 >>> str(root.parent) 2315 'mem' 2316 """ 2317 2318 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" 2319 2320 def __init__(self, root, at=""): 2321 """ 2322 Construct a Path from a ZipFile or filename. 2323 2324 Note: When the source is an existing ZipFile object, 2325 its type (__class__) will be mutated to a 2326 specialized type. If the caller wishes to retain the 2327 original type, the caller should either create a 2328 separate ZipFile object or pass a filename. 2329 """ 2330 self.root = FastLookup.make(root) 2331 self.at = at 2332 2333 def open(self, mode='r', *args, pwd=None, **kwargs): 2334 """ 2335 Open this entry as text or binary following the semantics 2336 of ``pathlib.Path.open()`` by passing arguments through 2337 to io.TextIOWrapper(). 2338 """ 2339 if self.is_dir(): 2340 raise IsADirectoryError(self) 2341 zip_mode = mode[0] 2342 if not self.exists() and zip_mode == 'r': 2343 raise FileNotFoundError(self) 2344 stream = self.root.open(self.at, zip_mode, pwd=pwd) 2345 if 'b' in mode: 2346 if args or kwargs: 2347 raise ValueError("encoding args invalid for binary operation") 2348 return stream 2349 else: 2350 kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) 2351 return io.TextIOWrapper(stream, *args, **kwargs) 2352 2353 @property 2354 def name(self): 2355 return pathlib.Path(self.at).name or self.filename.name 2356 2357 @property 2358 def filename(self): 2359 return pathlib.Path(self.root.filename).joinpath(self.at) 2360 2361 def read_text(self, *args, **kwargs): 2362 kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) 2363 with self.open('r', *args, **kwargs) as strm: 2364 return strm.read() 2365 2366 def read_bytes(self): 2367 with self.open('rb') as strm: 2368 return strm.read() 2369 2370 def _is_child(self, path): 2371 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") 2372 2373 def _next(self, at): 2374 return self.__class__(self.root, at) 2375 2376 def is_dir(self): 2377 return not self.at or self.at.endswith("/") 2378 2379 def is_file(self): 2380 return self.exists() and not self.is_dir() 2381 2382 def exists(self): 2383 return self.at in self.root._name_set() 2384 2385 def iterdir(self): 2386 if not self.is_dir(): 2387 raise ValueError("Can't listdir a file") 2388 subs = map(self._next, self.root.namelist()) 2389 return filter(self._is_child, subs) 2390 2391 def __str__(self): 2392 return posixpath.join(self.root.filename, self.at) 2393 2394 def __repr__(self): 2395 return self.__repr.format(self=self) 2396 2397 def joinpath(self, *other): 2398 next = posixpath.join(self.at, *other) 2399 return self._next(self.root.resolve_dir(next)) 2400 2401 __truediv__ = joinpath 2402 2403 @property 2404 def parent(self): 2405 if not self.at: 2406 return self.filename.parent 2407 parent_at = posixpath.dirname(self.at.rstrip('/')) 2408 if parent_at: 2409 parent_at += '/' 2410 return self._next(parent_at) 2411 2412 2413def main(args=None): 2414 import argparse 2415 2416 description = 'A simple command-line interface for zipfile module.' 2417 parser = argparse.ArgumentParser(description=description) 2418 group = parser.add_mutually_exclusive_group(required=True) 2419 group.add_argument('-l', '--list', metavar='<zipfile>', 2420 help='Show listing of a zipfile') 2421 group.add_argument('-e', '--extract', nargs=2, 2422 metavar=('<zipfile>', '<output_dir>'), 2423 help='Extract zipfile into target dir') 2424 group.add_argument('-c', '--create', nargs='+', 2425 metavar=('<name>', '<file>'), 2426 help='Create zipfile from sources') 2427 group.add_argument('-t', '--test', metavar='<zipfile>', 2428 help='Test if a zipfile is valid') 2429 args = parser.parse_args(args) 2430 2431 if args.test is not None: 2432 src = args.test 2433 with ZipFile(src, 'r') as zf: 2434 badfile = zf.testzip() 2435 if badfile: 2436 print("The following enclosed file is corrupted: {!r}".format(badfile)) 2437 print("Done testing") 2438 2439 elif args.list is not None: 2440 src = args.list 2441 with ZipFile(src, 'r') as zf: 2442 zf.printdir() 2443 2444 elif args.extract is not None: 2445 src, curdir = args.extract 2446 with ZipFile(src, 'r') as zf: 2447 zf.extractall(curdir) 2448 2449 elif args.create is not None: 2450 zip_name = args.create.pop(0) 2451 files = args.create 2452 2453 def addToZip(zf, path, zippath): 2454 if os.path.isfile(path): 2455 zf.write(path, zippath, ZIP_DEFLATED) 2456 elif os.path.isdir(path): 2457 if zippath: 2458 zf.write(path, zippath) 2459 for nm in sorted(os.listdir(path)): 2460 addToZip(zf, 2461 os.path.join(path, nm), os.path.join(zippath, nm)) 2462 # else: ignore 2463 2464 with ZipFile(zip_name, 'w') as zf: 2465 for path in files: 2466 zippath = os.path.basename(path) 2467 if not zippath: 2468 zippath = os.path.basename(os.path.dirname(path)) 2469 if zippath in ('', os.curdir, os.pardir): 2470 zippath = '' 2471 addToZip(zf, path, zippath) 2472 2473 2474if __name__ == "__main__": 2475 main() 2476