1""" 2Read and write ZIP files. 3 4XXX references to utf-8 need further investigation. 5""" 6import binascii 7import importlib.util 8import io 9import itertools 10import os 11import posixpath 12import shutil 13import stat 14import struct 15import sys 16import threading 17import time 18import contextlib 19import pathlib 20 21try: 22 import zlib # We may need its compression method 23 crc32 = zlib.crc32 24except ImportError: 25 zlib = None 26 crc32 = binascii.crc32 27 28try: 29 import bz2 # We may need its compression method 30except ImportError: 31 bz2 = None 32 33try: 34 import lzma # We may need its compression method 35except ImportError: 36 lzma = None 37 38__all__ = ["BadZipFile", "BadZipfile", "error", 39 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", 40 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", 41 "Path"] 42 43class BadZipFile(Exception): 44 pass 45 46 47class LargeZipFile(Exception): 48 """ 49 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 50 and those extensions are disabled. 51 """ 52 53error = BadZipfile = BadZipFile # Pre-3.2 compatibility names 54 55 56ZIP64_LIMIT = (1 << 31) - 1 57ZIP_FILECOUNT_LIMIT = (1 << 16) - 1 58ZIP_MAX_COMMENT = (1 << 16) - 1 59 60# constants for Zip file compression methods 61ZIP_STORED = 0 62ZIP_DEFLATED = 8 63ZIP_BZIP2 = 12 64ZIP_LZMA = 14 65# Other ZIP compression methods not supported 66 67DEFAULT_VERSION = 20 68ZIP64_VERSION = 45 69BZIP2_VERSION = 46 70LZMA_VERSION = 63 71# we recognize (but not necessarily support) all features up to that version 72MAX_EXTRACT_VERSION = 63 73 74# Below are some formats and associated data for reading/writing headers using 75# the struct module. The names and structures of headers/records are those used 76# in the PKWARE description of the ZIP file format: 77# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 78# (URL valid as of January 2008) 79 80# The "end of central directory" structure, magic number, size, and indices 81# (section V.I in the format document) 82structEndArchive = b"<4s4H2LH" 83stringEndArchive = b"PK\005\006" 84sizeEndCentDir = struct.calcsize(structEndArchive) 85 86_ECD_SIGNATURE = 0 87_ECD_DISK_NUMBER = 1 88_ECD_DISK_START = 2 89_ECD_ENTRIES_THIS_DISK = 3 90_ECD_ENTRIES_TOTAL = 4 91_ECD_SIZE = 5 92_ECD_OFFSET = 6 93_ECD_COMMENT_SIZE = 7 94# These last two indices are not part of the structure as defined in the 95# spec, but they are used internally by this module as a convenience 96_ECD_COMMENT = 8 97_ECD_LOCATION = 9 98 99# The "central directory" structure, magic number, size, and indices 100# of entries in the structure (section V.F in the format document) 101structCentralDir = "<4s4B4HL2L5H2L" 102stringCentralDir = b"PK\001\002" 103sizeCentralDir = struct.calcsize(structCentralDir) 104 105# indexes of entries in the central directory structure 106_CD_SIGNATURE = 0 107_CD_CREATE_VERSION = 1 108_CD_CREATE_SYSTEM = 2 109_CD_EXTRACT_VERSION = 3 110_CD_EXTRACT_SYSTEM = 4 111_CD_FLAG_BITS = 5 112_CD_COMPRESS_TYPE = 6 113_CD_TIME = 7 114_CD_DATE = 8 115_CD_CRC = 9 116_CD_COMPRESSED_SIZE = 10 117_CD_UNCOMPRESSED_SIZE = 11 118_CD_FILENAME_LENGTH = 12 119_CD_EXTRA_FIELD_LENGTH = 13 120_CD_COMMENT_LENGTH = 14 121_CD_DISK_NUMBER_START = 15 122_CD_INTERNAL_FILE_ATTRIBUTES = 16 123_CD_EXTERNAL_FILE_ATTRIBUTES = 17 124_CD_LOCAL_HEADER_OFFSET = 18 125 126# The "local file header" structure, magic number, size, and indices 127# (section V.A in the format document) 128structFileHeader = "<4s2B4HL2L2H" 129stringFileHeader = b"PK\003\004" 130sizeFileHeader = struct.calcsize(structFileHeader) 131 132_FH_SIGNATURE = 0 133_FH_EXTRACT_VERSION = 1 134_FH_EXTRACT_SYSTEM = 2 135_FH_GENERAL_PURPOSE_FLAG_BITS = 3 136_FH_COMPRESSION_METHOD = 4 137_FH_LAST_MOD_TIME = 5 138_FH_LAST_MOD_DATE = 6 139_FH_CRC = 7 140_FH_COMPRESSED_SIZE = 8 141_FH_UNCOMPRESSED_SIZE = 9 142_FH_FILENAME_LENGTH = 10 143_FH_EXTRA_FIELD_LENGTH = 11 144 145# The "Zip64 end of central directory locator" structure, magic number, and size 146structEndArchive64Locator = "<4sLQL" 147stringEndArchive64Locator = b"PK\x06\x07" 148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 149 150# The "Zip64 end of central directory" record, magic number, size, and indices 151# (section V.G in the format document) 152structEndArchive64 = "<4sQ2H2L4Q" 153stringEndArchive64 = b"PK\x06\x06" 154sizeEndCentDir64 = struct.calcsize(structEndArchive64) 155 156_CD64_SIGNATURE = 0 157_CD64_DIRECTORY_RECSIZE = 1 158_CD64_CREATE_VERSION = 2 159_CD64_EXTRACT_VERSION = 3 160_CD64_DISK_NUMBER = 4 161_CD64_DISK_NUMBER_START = 5 162_CD64_NUMBER_ENTRIES_THIS_DISK = 6 163_CD64_NUMBER_ENTRIES_TOTAL = 7 164_CD64_DIRECTORY_SIZE = 8 165_CD64_OFFSET_START_CENTDIR = 9 166 167_DD_SIGNATURE = 0x08074b50 168 169_EXTRA_FIELD_STRUCT = struct.Struct('<HH') 170 171def _strip_extra(extra, xids): 172 # Remove Extra Fields with specified IDs. 173 unpack = _EXTRA_FIELD_STRUCT.unpack 174 modified = False 175 buffer = [] 176 start = i = 0 177 while i + 4 <= len(extra): 178 xid, xlen = unpack(extra[i : i + 4]) 179 j = i + 4 + xlen 180 if xid in xids: 181 if i != start: 182 buffer.append(extra[start : i]) 183 start = j 184 modified = True 185 i = j 186 if not modified: 187 return extra 188 return b''.join(buffer) 189 190def _check_zipfile(fp): 191 try: 192 if _EndRecData(fp): 193 return True # file has correct magic number 194 except OSError: 195 pass 196 return False 197 198def is_zipfile(filename): 199 """Quickly see if a file is a ZIP file by checking the magic number. 200 201 The filename argument may be a file or file-like object too. 202 """ 203 result = False 204 try: 205 if hasattr(filename, "read"): 206 result = _check_zipfile(fp=filename) 207 else: 208 with open(filename, "rb") as fp: 209 result = _check_zipfile(fp) 210 except OSError: 211 pass 212 return result 213 214def _EndRecData64(fpin, offset, endrec): 215 """ 216 Read the ZIP64 end-of-archive records and use that to update endrec 217 """ 218 try: 219 fpin.seek(offset - sizeEndCentDir64Locator, 2) 220 except OSError: 221 # If the seek fails, the file is not large enough to contain a ZIP64 222 # end-of-archive record, so just return the end record we were given. 223 return endrec 224 225 data = fpin.read(sizeEndCentDir64Locator) 226 if len(data) != sizeEndCentDir64Locator: 227 return endrec 228 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 229 if sig != stringEndArchive64Locator: 230 return endrec 231 232 if diskno != 0 or disks > 1: 233 raise BadZipFile("zipfiles that span multiple disks are not supported") 234 235 # Assume no 'zip64 extensible data' 236 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 237 data = fpin.read(sizeEndCentDir64) 238 if len(data) != sizeEndCentDir64: 239 return endrec 240 sig, sz, create_version, read_version, disk_num, disk_dir, \ 241 dircount, dircount2, dirsize, diroffset = \ 242 struct.unpack(structEndArchive64, data) 243 if sig != stringEndArchive64: 244 return endrec 245 246 # Update the original endrec using data from the ZIP64 record 247 endrec[_ECD_SIGNATURE] = sig 248 endrec[_ECD_DISK_NUMBER] = disk_num 249 endrec[_ECD_DISK_START] = disk_dir 250 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 251 endrec[_ECD_ENTRIES_TOTAL] = dircount2 252 endrec[_ECD_SIZE] = dirsize 253 endrec[_ECD_OFFSET] = diroffset 254 return endrec 255 256 257def _EndRecData(fpin): 258 """Return data from the "End of Central Directory" record, or None. 259 260 The data is a list of the nine items in the ZIP "End of central dir" 261 record followed by a tenth item, the file seek offset of this record.""" 262 263 # Determine file size 264 fpin.seek(0, 2) 265 filesize = fpin.tell() 266 267 # Check to see if this is ZIP file with no archive comment (the 268 # "end of central directory" structure should be the last item in the 269 # file if this is the case). 270 try: 271 fpin.seek(-sizeEndCentDir, 2) 272 except OSError: 273 return None 274 data = fpin.read() 275 if (len(data) == sizeEndCentDir and 276 data[0:4] == stringEndArchive and 277 data[-2:] == b"\000\000"): 278 # the signature is correct and there's no comment, unpack structure 279 endrec = struct.unpack(structEndArchive, data) 280 endrec=list(endrec) 281 282 # Append a blank comment and record start offset 283 endrec.append(b"") 284 endrec.append(filesize - sizeEndCentDir) 285 286 # Try to read the "Zip64 end of central directory" structure 287 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 288 289 # Either this is not a ZIP file, or it is a ZIP file with an archive 290 # comment. Search the end of the file for the "end of central directory" 291 # record signature. The comment is the last item in the ZIP file and may be 292 # up to 64K long. It is assumed that the "end of central directory" magic 293 # number does not appear in the comment. 294 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 295 fpin.seek(maxCommentStart, 0) 296 data = fpin.read() 297 start = data.rfind(stringEndArchive) 298 if start >= 0: 299 # found the magic number; attempt to unpack and interpret 300 recData = data[start:start+sizeEndCentDir] 301 if len(recData) != sizeEndCentDir: 302 # Zip file is corrupted. 303 return None 304 endrec = list(struct.unpack(structEndArchive, recData)) 305 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 306 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 307 endrec.append(comment) 308 endrec.append(maxCommentStart + start) 309 310 # Try to read the "Zip64 end of central directory" structure 311 return _EndRecData64(fpin, maxCommentStart + start - filesize, 312 endrec) 313 314 # Unable to find a valid end of central directory structure 315 return None 316 317 318class ZipInfo (object): 319 """Class with attributes describing each file in the ZIP archive.""" 320 321 __slots__ = ( 322 'orig_filename', 323 'filename', 324 'date_time', 325 'compress_type', 326 '_compresslevel', 327 'comment', 328 'extra', 329 'create_system', 330 'create_version', 331 'extract_version', 332 'reserved', 333 'flag_bits', 334 'volume', 335 'internal_attr', 336 'external_attr', 337 'header_offset', 338 'CRC', 339 'compress_size', 340 'file_size', 341 '_raw_time', 342 ) 343 344 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 345 self.orig_filename = filename # Original file name in archive 346 347 # Terminate the file name at the first null byte. Null bytes in file 348 # names are used as tricks by viruses in archives. 349 null_byte = filename.find(chr(0)) 350 if null_byte >= 0: 351 filename = filename[0:null_byte] 352 # This is used to ensure paths in generated ZIP files always use 353 # forward slashes as the directory separator, as required by the 354 # ZIP format specification. 355 if os.sep != "/" and os.sep in filename: 356 filename = filename.replace(os.sep, "/") 357 358 self.filename = filename # Normalized file name 359 self.date_time = date_time # year, month, day, hour, min, sec 360 361 if date_time[0] < 1980: 362 raise ValueError('ZIP does not support timestamps before 1980') 363 364 # Standard values: 365 self.compress_type = ZIP_STORED # Type of compression for the file 366 self._compresslevel = None # Level for the compressor 367 self.comment = b"" # Comment for each file 368 self.extra = b"" # ZIP extra data 369 if sys.platform == 'win32': 370 self.create_system = 0 # System which created ZIP archive 371 else: 372 # Assume everything else is unix-y 373 self.create_system = 3 # System which created ZIP archive 374 self.create_version = DEFAULT_VERSION # Version which created ZIP archive 375 self.extract_version = DEFAULT_VERSION # Version needed to extract archive 376 self.reserved = 0 # Must be zero 377 self.flag_bits = 0 # ZIP flag bits 378 self.volume = 0 # Volume number of file header 379 self.internal_attr = 0 # Internal attributes 380 self.external_attr = 0 # External file attributes 381 self.compress_size = 0 # Size of the compressed file 382 self.file_size = 0 # Size of the uncompressed file 383 # Other attributes are set by class ZipFile: 384 # header_offset Byte offset to the file header 385 # CRC CRC-32 of the uncompressed file 386 387 def __repr__(self): 388 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)] 389 if self.compress_type != ZIP_STORED: 390 result.append(' compress_type=%s' % 391 compressor_names.get(self.compress_type, 392 self.compress_type)) 393 hi = self.external_attr >> 16 394 lo = self.external_attr & 0xFFFF 395 if hi: 396 result.append(' filemode=%r' % stat.filemode(hi)) 397 if lo: 398 result.append(' external_attr=%#x' % lo) 399 isdir = self.is_dir() 400 if not isdir or self.file_size: 401 result.append(' file_size=%r' % self.file_size) 402 if ((not isdir or self.compress_size) and 403 (self.compress_type != ZIP_STORED or 404 self.file_size != self.compress_size)): 405 result.append(' compress_size=%r' % self.compress_size) 406 result.append('>') 407 return ''.join(result) 408 409 def FileHeader(self, zip64=None): 410 """Return the per-file header as a bytes object.""" 411 dt = self.date_time 412 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 413 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 414 if self.flag_bits & 0x08: 415 # Set these to zero because we write them after the file data 416 CRC = compress_size = file_size = 0 417 else: 418 CRC = self.CRC 419 compress_size = self.compress_size 420 file_size = self.file_size 421 422 extra = self.extra 423 424 min_version = 0 425 if zip64 is None: 426 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 427 if zip64: 428 fmt = '<HHQQ' 429 extra = extra + struct.pack(fmt, 430 1, struct.calcsize(fmt)-4, file_size, compress_size) 431 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 432 if not zip64: 433 raise LargeZipFile("Filesize would require ZIP64 extensions") 434 # File is larger than what fits into a 4 byte integer, 435 # fall back to the ZIP64 extension 436 file_size = 0xffffffff 437 compress_size = 0xffffffff 438 min_version = ZIP64_VERSION 439 440 if self.compress_type == ZIP_BZIP2: 441 min_version = max(BZIP2_VERSION, min_version) 442 elif self.compress_type == ZIP_LZMA: 443 min_version = max(LZMA_VERSION, min_version) 444 445 self.extract_version = max(min_version, self.extract_version) 446 self.create_version = max(min_version, self.create_version) 447 filename, flag_bits = self._encodeFilenameFlags() 448 header = struct.pack(structFileHeader, stringFileHeader, 449 self.extract_version, self.reserved, flag_bits, 450 self.compress_type, dostime, dosdate, CRC, 451 compress_size, file_size, 452 len(filename), len(extra)) 453 return header + filename + extra 454 455 def _encodeFilenameFlags(self): 456 try: 457 return self.filename.encode('ascii'), self.flag_bits 458 except UnicodeEncodeError: 459 return self.filename.encode('utf-8'), self.flag_bits | 0x800 460 461 def _decodeExtra(self): 462 # Try to decode the extra field. 463 extra = self.extra 464 unpack = struct.unpack 465 while len(extra) >= 4: 466 tp, ln = unpack('<HH', extra[:4]) 467 if ln+4 > len(extra): 468 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 469 if tp == 0x0001: 470 data = extra[4:ln+4] 471 # ZIP64 extension (large files and/or large archives) 472 try: 473 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF): 474 field = "File size" 475 self.file_size, = unpack('<Q', data[:8]) 476 data = data[8:] 477 if self.compress_size == 0xFFFF_FFFF: 478 field = "Compress size" 479 self.compress_size, = unpack('<Q', data[:8]) 480 data = data[8:] 481 if self.header_offset == 0xFFFF_FFFF: 482 field = "Header offset" 483 self.header_offset, = unpack('<Q', data[:8]) 484 except struct.error: 485 raise BadZipFile(f"Corrupt zip64 extra field. " 486 f"{field} not found.") from None 487 488 extra = extra[ln+4:] 489 490 @classmethod 491 def from_file(cls, filename, arcname=None, *, strict_timestamps=True): 492 """Construct an appropriate ZipInfo for a file on the filesystem. 493 494 filename should be the path to a file or directory on the filesystem. 495 496 arcname is the name which it will have within the archive (by default, 497 this will be the same as filename, but without a drive letter and with 498 leading path separators removed). 499 """ 500 if isinstance(filename, os.PathLike): 501 filename = os.fspath(filename) 502 st = os.stat(filename) 503 isdir = stat.S_ISDIR(st.st_mode) 504 mtime = time.localtime(st.st_mtime) 505 date_time = mtime[0:6] 506 if not strict_timestamps and date_time[0] < 1980: 507 date_time = (1980, 1, 1, 0, 0, 0) 508 elif not strict_timestamps and date_time[0] > 2107: 509 date_time = (2107, 12, 31, 23, 59, 59) 510 # Create ZipInfo instance to store file information 511 if arcname is None: 512 arcname = filename 513 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 514 while arcname[0] in (os.sep, os.altsep): 515 arcname = arcname[1:] 516 if isdir: 517 arcname += '/' 518 zinfo = cls(arcname, date_time) 519 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes 520 if isdir: 521 zinfo.file_size = 0 522 zinfo.external_attr |= 0x10 # MS-DOS directory flag 523 else: 524 zinfo.file_size = st.st_size 525 526 return zinfo 527 528 def is_dir(self): 529 """Return True if this archive member is a directory.""" 530 return self.filename[-1] == '/' 531 532 533# ZIP encryption uses the CRC32 one-byte primitive for scrambling some 534# internal keys. We noticed that a direct implementation is faster than 535# relying on binascii.crc32(). 536 537_crctable = None 538def _gen_crc(crc): 539 for j in range(8): 540 if crc & 1: 541 crc = (crc >> 1) ^ 0xEDB88320 542 else: 543 crc >>= 1 544 return crc 545 546# ZIP supports a password-based form of encryption. Even though known 547# plaintext attacks have been found against it, it is still useful 548# to be able to get data out of such a file. 549# 550# Usage: 551# zd = _ZipDecrypter(mypwd) 552# plain_bytes = zd(cypher_bytes) 553 554def _ZipDecrypter(pwd): 555 key0 = 305419896 556 key1 = 591751049 557 key2 = 878082192 558 559 global _crctable 560 if _crctable is None: 561 _crctable = list(map(_gen_crc, range(256))) 562 crctable = _crctable 563 564 def crc32(ch, crc): 565 """Compute the CRC32 primitive on one byte.""" 566 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] 567 568 def update_keys(c): 569 nonlocal key0, key1, key2 570 key0 = crc32(c, key0) 571 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF 572 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF 573 key2 = crc32(key1 >> 24, key2) 574 575 for p in pwd: 576 update_keys(p) 577 578 def decrypter(data): 579 """Decrypt a bytes object.""" 580 result = bytearray() 581 append = result.append 582 for c in data: 583 k = key2 | 2 584 c ^= ((k * (k^1)) >> 8) & 0xFF 585 update_keys(c) 586 append(c) 587 return bytes(result) 588 589 return decrypter 590 591 592class LZMACompressor: 593 594 def __init__(self): 595 self._comp = None 596 597 def _init(self): 598 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1}) 599 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ 600 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props) 601 ]) 602 return struct.pack('<BBH', 9, 4, len(props)) + props 603 604 def compress(self, data): 605 if self._comp is None: 606 return self._init() + self._comp.compress(data) 607 return self._comp.compress(data) 608 609 def flush(self): 610 if self._comp is None: 611 return self._init() + self._comp.flush() 612 return self._comp.flush() 613 614 615class LZMADecompressor: 616 617 def __init__(self): 618 self._decomp = None 619 self._unconsumed = b'' 620 self.eof = False 621 622 def decompress(self, data): 623 if self._decomp is None: 624 self._unconsumed += data 625 if len(self._unconsumed) <= 4: 626 return b'' 627 psize, = struct.unpack('<H', self._unconsumed[2:4]) 628 if len(self._unconsumed) <= 4 + psize: 629 return b'' 630 631 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[ 632 lzma._decode_filter_properties(lzma.FILTER_LZMA1, 633 self._unconsumed[4:4 + psize]) 634 ]) 635 data = self._unconsumed[4 + psize:] 636 del self._unconsumed 637 638 result = self._decomp.decompress(data) 639 self.eof = self._decomp.eof 640 return result 641 642 643compressor_names = { 644 0: 'store', 645 1: 'shrink', 646 2: 'reduce', 647 3: 'reduce', 648 4: 'reduce', 649 5: 'reduce', 650 6: 'implode', 651 7: 'tokenize', 652 8: 'deflate', 653 9: 'deflate64', 654 10: 'implode', 655 12: 'bzip2', 656 14: 'lzma', 657 18: 'terse', 658 19: 'lz77', 659 97: 'wavpack', 660 98: 'ppmd', 661} 662 663def _check_compression(compression): 664 if compression == ZIP_STORED: 665 pass 666 elif compression == ZIP_DEFLATED: 667 if not zlib: 668 raise RuntimeError( 669 "Compression requires the (missing) zlib module") 670 elif compression == ZIP_BZIP2: 671 if not bz2: 672 raise RuntimeError( 673 "Compression requires the (missing) bz2 module") 674 elif compression == ZIP_LZMA: 675 if not lzma: 676 raise RuntimeError( 677 "Compression requires the (missing) lzma module") 678 else: 679 raise NotImplementedError("That compression method is not supported") 680 681 682def _get_compressor(compress_type, compresslevel=None): 683 if compress_type == ZIP_DEFLATED: 684 if compresslevel is not None: 685 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15) 686 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) 687 elif compress_type == ZIP_BZIP2: 688 if compresslevel is not None: 689 return bz2.BZ2Compressor(compresslevel) 690 return bz2.BZ2Compressor() 691 # compresslevel is ignored for ZIP_LZMA 692 elif compress_type == ZIP_LZMA: 693 return LZMACompressor() 694 else: 695 return None 696 697 698def _get_decompressor(compress_type): 699 _check_compression(compress_type) 700 if compress_type == ZIP_STORED: 701 return None 702 elif compress_type == ZIP_DEFLATED: 703 return zlib.decompressobj(-15) 704 elif compress_type == ZIP_BZIP2: 705 return bz2.BZ2Decompressor() 706 elif compress_type == ZIP_LZMA: 707 return LZMADecompressor() 708 else: 709 descr = compressor_names.get(compress_type) 710 if descr: 711 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr)) 712 else: 713 raise NotImplementedError("compression type %d" % (compress_type,)) 714 715 716class _SharedFile: 717 def __init__(self, file, pos, close, lock, writing): 718 self._file = file 719 self._pos = pos 720 self._close = close 721 self._lock = lock 722 self._writing = writing 723 self.seekable = file.seekable 724 self.tell = file.tell 725 726 def seek(self, offset, whence=0): 727 with self._lock: 728 if self._writing(): 729 raise ValueError("Can't reposition in the ZIP file while " 730 "there is an open writing handle on it. " 731 "Close the writing handle before trying to read.") 732 self._file.seek(offset, whence) 733 self._pos = self._file.tell() 734 return self._pos 735 736 def read(self, n=-1): 737 with self._lock: 738 if self._writing(): 739 raise ValueError("Can't read from the ZIP file while there " 740 "is an open writing handle on it. " 741 "Close the writing handle before trying to read.") 742 self._file.seek(self._pos) 743 data = self._file.read(n) 744 self._pos = self._file.tell() 745 return data 746 747 def close(self): 748 if self._file is not None: 749 fileobj = self._file 750 self._file = None 751 self._close(fileobj) 752 753# Provide the tell method for unseekable stream 754class _Tellable: 755 def __init__(self, fp): 756 self.fp = fp 757 self.offset = 0 758 759 def write(self, data): 760 n = self.fp.write(data) 761 self.offset += n 762 return n 763 764 def tell(self): 765 return self.offset 766 767 def flush(self): 768 self.fp.flush() 769 770 def close(self): 771 self.fp.close() 772 773 774class ZipExtFile(io.BufferedIOBase): 775 """File-like object for reading an archive member. 776 Is returned by ZipFile.open(). 777 """ 778 779 # Max size supported by decompressor. 780 MAX_N = 1 << 31 - 1 781 782 # Read from compressed files in 4k blocks. 783 MIN_READ_SIZE = 4096 784 785 # Chunk size to read during seek 786 MAX_SEEK_READ = 1 << 24 787 788 def __init__(self, fileobj, mode, zipinfo, pwd=None, 789 close_fileobj=False): 790 self._fileobj = fileobj 791 self._pwd = pwd 792 self._close_fileobj = close_fileobj 793 794 self._compress_type = zipinfo.compress_type 795 self._compress_left = zipinfo.compress_size 796 self._left = zipinfo.file_size 797 798 self._decompressor = _get_decompressor(self._compress_type) 799 800 self._eof = False 801 self._readbuffer = b'' 802 self._offset = 0 803 804 self.newlines = None 805 806 self.mode = mode 807 self.name = zipinfo.filename 808 809 if hasattr(zipinfo, 'CRC'): 810 self._expected_crc = zipinfo.CRC 811 self._running_crc = crc32(b'') 812 else: 813 self._expected_crc = None 814 815 self._seekable = False 816 try: 817 if fileobj.seekable(): 818 self._orig_compress_start = fileobj.tell() 819 self._orig_compress_size = zipinfo.compress_size 820 self._orig_file_size = zipinfo.file_size 821 self._orig_start_crc = self._running_crc 822 self._seekable = True 823 except AttributeError: 824 pass 825 826 self._decrypter = None 827 if pwd: 828 if zipinfo.flag_bits & 0x8: 829 # compare against the file type from extended local headers 830 check_byte = (zipinfo._raw_time >> 8) & 0xff 831 else: 832 # compare against the CRC otherwise 833 check_byte = (zipinfo.CRC >> 24) & 0xff 834 h = self._init_decrypter() 835 if h != check_byte: 836 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename) 837 838 839 def _init_decrypter(self): 840 self._decrypter = _ZipDecrypter(self._pwd) 841 # The first 12 bytes in the cypher stream is an encryption header 842 # used to strengthen the algorithm. The first 11 bytes are 843 # completely random, while the 12th contains the MSB of the CRC, 844 # or the MSB of the file time depending on the header type 845 # and is used to check the correctness of the password. 846 header = self._fileobj.read(12) 847 self._compress_left -= 12 848 return self._decrypter(header)[11] 849 850 def __repr__(self): 851 result = ['<%s.%s' % (self.__class__.__module__, 852 self.__class__.__qualname__)] 853 if not self.closed: 854 result.append(' name=%r mode=%r' % (self.name, self.mode)) 855 if self._compress_type != ZIP_STORED: 856 result.append(' compress_type=%s' % 857 compressor_names.get(self._compress_type, 858 self._compress_type)) 859 else: 860 result.append(' [closed]') 861 result.append('>') 862 return ''.join(result) 863 864 def readline(self, limit=-1): 865 """Read and return a line from the stream. 866 867 If limit is specified, at most limit bytes will be read. 868 """ 869 870 if limit < 0: 871 # Shortcut common case - newline found in buffer. 872 i = self._readbuffer.find(b'\n', self._offset) + 1 873 if i > 0: 874 line = self._readbuffer[self._offset: i] 875 self._offset = i 876 return line 877 878 return io.BufferedIOBase.readline(self, limit) 879 880 def peek(self, n=1): 881 """Returns buffered bytes without advancing the position.""" 882 if n > len(self._readbuffer) - self._offset: 883 chunk = self.read(n) 884 if len(chunk) > self._offset: 885 self._readbuffer = chunk + self._readbuffer[self._offset:] 886 self._offset = 0 887 else: 888 self._offset -= len(chunk) 889 890 # Return up to 512 bytes to reduce allocation overhead for tight loops. 891 return self._readbuffer[self._offset: self._offset + 512] 892 893 def readable(self): 894 if self.closed: 895 raise ValueError("I/O operation on closed file.") 896 return True 897 898 def read(self, n=-1): 899 """Read and return up to n bytes. 900 If the argument is omitted, None, or negative, data is read and returned until EOF is reached. 901 """ 902 if self.closed: 903 raise ValueError("read from closed file.") 904 if n is None or n < 0: 905 buf = self._readbuffer[self._offset:] 906 self._readbuffer = b'' 907 self._offset = 0 908 while not self._eof: 909 buf += self._read1(self.MAX_N) 910 return buf 911 912 end = n + self._offset 913 if end < len(self._readbuffer): 914 buf = self._readbuffer[self._offset:end] 915 self._offset = end 916 return buf 917 918 n = end - len(self._readbuffer) 919 buf = self._readbuffer[self._offset:] 920 self._readbuffer = b'' 921 self._offset = 0 922 while n > 0 and not self._eof: 923 data = self._read1(n) 924 if n < len(data): 925 self._readbuffer = data 926 self._offset = n 927 buf += data[:n] 928 break 929 buf += data 930 n -= len(data) 931 return buf 932 933 def _update_crc(self, newdata): 934 # Update the CRC using the given data. 935 if self._expected_crc is None: 936 # No need to compute the CRC if we don't have a reference value 937 return 938 self._running_crc = crc32(newdata, self._running_crc) 939 # Check the CRC if we're at the end of the file 940 if self._eof and self._running_crc != self._expected_crc: 941 raise BadZipFile("Bad CRC-32 for file %r" % self.name) 942 943 def read1(self, n): 944 """Read up to n bytes with at most one read() system call.""" 945 946 if n is None or n < 0: 947 buf = self._readbuffer[self._offset:] 948 self._readbuffer = b'' 949 self._offset = 0 950 while not self._eof: 951 data = self._read1(self.MAX_N) 952 if data: 953 buf += data 954 break 955 return buf 956 957 end = n + self._offset 958 if end < len(self._readbuffer): 959 buf = self._readbuffer[self._offset:end] 960 self._offset = end 961 return buf 962 963 n = end - len(self._readbuffer) 964 buf = self._readbuffer[self._offset:] 965 self._readbuffer = b'' 966 self._offset = 0 967 if n > 0: 968 while not self._eof: 969 data = self._read1(n) 970 if n < len(data): 971 self._readbuffer = data 972 self._offset = n 973 buf += data[:n] 974 break 975 if data: 976 buf += data 977 break 978 return buf 979 980 def _read1(self, n): 981 # Read up to n compressed bytes with at most one read() system call, 982 # decrypt and decompress them. 983 if self._eof or n <= 0: 984 return b'' 985 986 # Read from file. 987 if self._compress_type == ZIP_DEFLATED: 988 ## Handle unconsumed data. 989 data = self._decompressor.unconsumed_tail 990 if n > len(data): 991 data += self._read2(n - len(data)) 992 else: 993 data = self._read2(n) 994 995 if self._compress_type == ZIP_STORED: 996 self._eof = self._compress_left <= 0 997 elif self._compress_type == ZIP_DEFLATED: 998 n = max(n, self.MIN_READ_SIZE) 999 data = self._decompressor.decompress(data, n) 1000 self._eof = (self._decompressor.eof or 1001 self._compress_left <= 0 and 1002 not self._decompressor.unconsumed_tail) 1003 if self._eof: 1004 data += self._decompressor.flush() 1005 else: 1006 data = self._decompressor.decompress(data) 1007 self._eof = self._decompressor.eof or self._compress_left <= 0 1008 1009 data = data[:self._left] 1010 self._left -= len(data) 1011 if self._left <= 0: 1012 self._eof = True 1013 self._update_crc(data) 1014 return data 1015 1016 def _read2(self, n): 1017 if self._compress_left <= 0: 1018 return b'' 1019 1020 n = max(n, self.MIN_READ_SIZE) 1021 n = min(n, self._compress_left) 1022 1023 data = self._fileobj.read(n) 1024 self._compress_left -= len(data) 1025 if not data: 1026 raise EOFError 1027 1028 if self._decrypter is not None: 1029 data = self._decrypter(data) 1030 return data 1031 1032 def close(self): 1033 try: 1034 if self._close_fileobj: 1035 self._fileobj.close() 1036 finally: 1037 super().close() 1038 1039 def seekable(self): 1040 if self.closed: 1041 raise ValueError("I/O operation on closed file.") 1042 return self._seekable 1043 1044 def seek(self, offset, whence=0): 1045 if self.closed: 1046 raise ValueError("seek on closed file.") 1047 if not self._seekable: 1048 raise io.UnsupportedOperation("underlying stream is not seekable") 1049 curr_pos = self.tell() 1050 if whence == 0: # Seek from start of file 1051 new_pos = offset 1052 elif whence == 1: # Seek from current position 1053 new_pos = curr_pos + offset 1054 elif whence == 2: # Seek from EOF 1055 new_pos = self._orig_file_size + offset 1056 else: 1057 raise ValueError("whence must be os.SEEK_SET (0), " 1058 "os.SEEK_CUR (1), or os.SEEK_END (2)") 1059 1060 if new_pos > self._orig_file_size: 1061 new_pos = self._orig_file_size 1062 1063 if new_pos < 0: 1064 new_pos = 0 1065 1066 read_offset = new_pos - curr_pos 1067 buff_offset = read_offset + self._offset 1068 1069 if buff_offset >= 0 and buff_offset < len(self._readbuffer): 1070 # Just move the _offset index if the new position is in the _readbuffer 1071 self._offset = buff_offset 1072 read_offset = 0 1073 elif read_offset < 0: 1074 # Position is before the current position. Reset the ZipExtFile 1075 self._fileobj.seek(self._orig_compress_start) 1076 self._running_crc = self._orig_start_crc 1077 self._compress_left = self._orig_compress_size 1078 self._left = self._orig_file_size 1079 self._readbuffer = b'' 1080 self._offset = 0 1081 self._decompressor = _get_decompressor(self._compress_type) 1082 self._eof = False 1083 read_offset = new_pos 1084 if self._decrypter is not None: 1085 self._init_decrypter() 1086 1087 while read_offset > 0: 1088 read_len = min(self.MAX_SEEK_READ, read_offset) 1089 self.read(read_len) 1090 read_offset -= read_len 1091 1092 return self.tell() 1093 1094 def tell(self): 1095 if self.closed: 1096 raise ValueError("tell on closed file.") 1097 if not self._seekable: 1098 raise io.UnsupportedOperation("underlying stream is not seekable") 1099 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset 1100 return filepos 1101 1102 1103class _ZipWriteFile(io.BufferedIOBase): 1104 def __init__(self, zf, zinfo, zip64): 1105 self._zinfo = zinfo 1106 self._zip64 = zip64 1107 self._zipfile = zf 1108 self._compressor = _get_compressor(zinfo.compress_type, 1109 zinfo._compresslevel) 1110 self._file_size = 0 1111 self._compress_size = 0 1112 self._crc = 0 1113 1114 @property 1115 def _fileobj(self): 1116 return self._zipfile.fp 1117 1118 def writable(self): 1119 return True 1120 1121 def write(self, data): 1122 if self.closed: 1123 raise ValueError('I/O operation on closed file.') 1124 1125 # Accept any data that supports the buffer protocol 1126 if isinstance(data, (bytes, bytearray)): 1127 nbytes = len(data) 1128 else: 1129 data = memoryview(data) 1130 nbytes = data.nbytes 1131 self._file_size += nbytes 1132 1133 self._crc = crc32(data, self._crc) 1134 if self._compressor: 1135 data = self._compressor.compress(data) 1136 self._compress_size += len(data) 1137 self._fileobj.write(data) 1138 return nbytes 1139 1140 def close(self): 1141 if self.closed: 1142 return 1143 try: 1144 super().close() 1145 # Flush any data from the compressor, and update header info 1146 if self._compressor: 1147 buf = self._compressor.flush() 1148 self._compress_size += len(buf) 1149 self._fileobj.write(buf) 1150 self._zinfo.compress_size = self._compress_size 1151 else: 1152 self._zinfo.compress_size = self._file_size 1153 self._zinfo.CRC = self._crc 1154 self._zinfo.file_size = self._file_size 1155 1156 # Write updated header info 1157 if self._zinfo.flag_bits & 0x08: 1158 # Write CRC and file sizes after the file data 1159 fmt = '<LLQQ' if self._zip64 else '<LLLL' 1160 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC, 1161 self._zinfo.compress_size, self._zinfo.file_size)) 1162 self._zipfile.start_dir = self._fileobj.tell() 1163 else: 1164 if not self._zip64: 1165 if self._file_size > ZIP64_LIMIT: 1166 raise RuntimeError( 1167 'File size unexpectedly exceeded ZIP64 limit') 1168 if self._compress_size > ZIP64_LIMIT: 1169 raise RuntimeError( 1170 'Compressed size unexpectedly exceeded ZIP64 limit') 1171 # Seek backwards and write file header (which will now include 1172 # correct CRC and file sizes) 1173 1174 # Preserve current position in file 1175 self._zipfile.start_dir = self._fileobj.tell() 1176 self._fileobj.seek(self._zinfo.header_offset) 1177 self._fileobj.write(self._zinfo.FileHeader(self._zip64)) 1178 self._fileobj.seek(self._zipfile.start_dir) 1179 1180 # Successfully written: Add file to our caches 1181 self._zipfile.filelist.append(self._zinfo) 1182 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo 1183 finally: 1184 self._zipfile._writing = False 1185 1186 1187 1188class ZipFile: 1189 """ Class with methods to open, read, write, close, list zip files. 1190 1191 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, 1192 compresslevel=None) 1193 1194 file: Either the path to the file, or a file-like object. 1195 If it is a path, the file will be opened and closed by ZipFile. 1196 mode: The mode can be either read 'r', write 'w', exclusive create 'x', 1197 or append 'a'. 1198 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), 1199 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). 1200 allowZip64: if True ZipFile will create files with ZIP64 extensions when 1201 needed, otherwise it will raise an exception when this would 1202 be necessary. 1203 compresslevel: None (default for the given compression type) or an integer 1204 specifying the level to pass to the compressor. 1205 When using ZIP_STORED or ZIP_LZMA this keyword has no effect. 1206 When using ZIP_DEFLATED integers 0 through 9 are accepted. 1207 When using ZIP_BZIP2 integers 1 through 9 are accepted. 1208 1209 """ 1210 1211 fp = None # Set here since __del__ checks it 1212 _windows_illegal_name_trans_table = None 1213 1214 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, 1215 compresslevel=None, *, strict_timestamps=True): 1216 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', 1217 or append 'a'.""" 1218 if mode not in ('r', 'w', 'x', 'a'): 1219 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") 1220 1221 _check_compression(compression) 1222 1223 self._allowZip64 = allowZip64 1224 self._didModify = False 1225 self.debug = 0 # Level of printing: 0 through 3 1226 self.NameToInfo = {} # Find file info given name 1227 self.filelist = [] # List of ZipInfo instances for archive 1228 self.compression = compression # Method of compression 1229 self.compresslevel = compresslevel 1230 self.mode = mode 1231 self.pwd = None 1232 self._comment = b'' 1233 self._strict_timestamps = strict_timestamps 1234 1235 # Check if we were passed a file-like object 1236 if isinstance(file, os.PathLike): 1237 file = os.fspath(file) 1238 if isinstance(file, str): 1239 # No, it's a filename 1240 self._filePassed = 0 1241 self.filename = file 1242 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b', 1243 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'} 1244 filemode = modeDict[mode] 1245 while True: 1246 try: 1247 self.fp = io.open(file, filemode) 1248 except OSError: 1249 if filemode in modeDict: 1250 filemode = modeDict[filemode] 1251 continue 1252 raise 1253 break 1254 else: 1255 self._filePassed = 1 1256 self.fp = file 1257 self.filename = getattr(file, 'name', None) 1258 self._fileRefCnt = 1 1259 self._lock = threading.RLock() 1260 self._seekable = True 1261 self._writing = False 1262 1263 try: 1264 if mode == 'r': 1265 self._RealGetContents() 1266 elif mode in ('w', 'x'): 1267 # set the modified flag so central directory gets written 1268 # even if no files are added to the archive 1269 self._didModify = True 1270 try: 1271 self.start_dir = self.fp.tell() 1272 except (AttributeError, OSError): 1273 self.fp = _Tellable(self.fp) 1274 self.start_dir = 0 1275 self._seekable = False 1276 else: 1277 # Some file-like objects can provide tell() but not seek() 1278 try: 1279 self.fp.seek(self.start_dir) 1280 except (AttributeError, OSError): 1281 self._seekable = False 1282 elif mode == 'a': 1283 try: 1284 # See if file is a zip file 1285 self._RealGetContents() 1286 # seek to start of directory and overwrite 1287 self.fp.seek(self.start_dir) 1288 except BadZipFile: 1289 # file is not a zip file, just append 1290 self.fp.seek(0, 2) 1291 1292 # set the modified flag so central directory gets written 1293 # even if no files are added to the archive 1294 self._didModify = True 1295 self.start_dir = self.fp.tell() 1296 else: 1297 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") 1298 except: 1299 fp = self.fp 1300 self.fp = None 1301 self._fpclose(fp) 1302 raise 1303 1304 def __enter__(self): 1305 return self 1306 1307 def __exit__(self, type, value, traceback): 1308 self.close() 1309 1310 def __repr__(self): 1311 result = ['<%s.%s' % (self.__class__.__module__, 1312 self.__class__.__qualname__)] 1313 if self.fp is not None: 1314 if self._filePassed: 1315 result.append(' file=%r' % self.fp) 1316 elif self.filename is not None: 1317 result.append(' filename=%r' % self.filename) 1318 result.append(' mode=%r' % self.mode) 1319 else: 1320 result.append(' [closed]') 1321 result.append('>') 1322 return ''.join(result) 1323 1324 def _RealGetContents(self): 1325 """Read in the table of contents for the ZIP file.""" 1326 fp = self.fp 1327 try: 1328 endrec = _EndRecData(fp) 1329 except OSError: 1330 raise BadZipFile("File is not a zip file") 1331 if not endrec: 1332 raise BadZipFile("File is not a zip file") 1333 if self.debug > 1: 1334 print(endrec) 1335 size_cd = endrec[_ECD_SIZE] # bytes in central directory 1336 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 1337 self._comment = endrec[_ECD_COMMENT] # archive comment 1338 1339 # "concat" is zero, unless zip was concatenated to another file 1340 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 1341 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 1342 # If Zip64 extension structures are present, account for them 1343 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 1344 1345 if self.debug > 2: 1346 inferred = concat + offset_cd 1347 print("given, inferred, offset", offset_cd, inferred, concat) 1348 # self.start_dir: Position of start of central directory 1349 self.start_dir = offset_cd + concat 1350 fp.seek(self.start_dir, 0) 1351 data = fp.read(size_cd) 1352 fp = io.BytesIO(data) 1353 total = 0 1354 while total < size_cd: 1355 centdir = fp.read(sizeCentralDir) 1356 if len(centdir) != sizeCentralDir: 1357 raise BadZipFile("Truncated central directory") 1358 centdir = struct.unpack(structCentralDir, centdir) 1359 if centdir[_CD_SIGNATURE] != stringCentralDir: 1360 raise BadZipFile("Bad magic number for central directory") 1361 if self.debug > 2: 1362 print(centdir) 1363 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 1364 flags = centdir[5] 1365 if flags & 0x800: 1366 # UTF-8 file names extension 1367 filename = filename.decode('utf-8') 1368 else: 1369 # Historical ZIP filename encoding 1370 filename = filename.decode('cp437') 1371 # Create ZipInfo instance to store file information 1372 x = ZipInfo(filename) 1373 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 1374 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 1375 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 1376 (x.create_version, x.create_system, x.extract_version, x.reserved, 1377 x.flag_bits, x.compress_type, t, d, 1378 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 1379 if x.extract_version > MAX_EXTRACT_VERSION: 1380 raise NotImplementedError("zip file version %.1f" % 1381 (x.extract_version / 10)) 1382 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 1383 # Convert date/time code to (year, month, day, hour, min, sec) 1384 x._raw_time = t 1385 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 1386 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 1387 1388 x._decodeExtra() 1389 x.header_offset = x.header_offset + concat 1390 self.filelist.append(x) 1391 self.NameToInfo[x.filename] = x 1392 1393 # update total bytes read from central directory 1394 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 1395 + centdir[_CD_EXTRA_FIELD_LENGTH] 1396 + centdir[_CD_COMMENT_LENGTH]) 1397 1398 if self.debug > 2: 1399 print("total", total) 1400 1401 1402 def namelist(self): 1403 """Return a list of file names in the archive.""" 1404 return [data.filename for data in self.filelist] 1405 1406 def infolist(self): 1407 """Return a list of class ZipInfo instances for files in the 1408 archive.""" 1409 return self.filelist 1410 1411 def printdir(self, file=None): 1412 """Print a table of contents for the zip file.""" 1413 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), 1414 file=file) 1415 for zinfo in self.filelist: 1416 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 1417 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), 1418 file=file) 1419 1420 def testzip(self): 1421 """Read all the files and check the CRC.""" 1422 chunk_size = 2 ** 20 1423 for zinfo in self.filelist: 1424 try: 1425 # Read by chunks, to avoid an OverflowError or a 1426 # MemoryError with very large embedded files. 1427 with self.open(zinfo.filename, "r") as f: 1428 while f.read(chunk_size): # Check CRC-32 1429 pass 1430 except BadZipFile: 1431 return zinfo.filename 1432 1433 def getinfo(self, name): 1434 """Return the instance of ZipInfo given 'name'.""" 1435 info = self.NameToInfo.get(name) 1436 if info is None: 1437 raise KeyError( 1438 'There is no item named %r in the archive' % name) 1439 1440 return info 1441 1442 def setpassword(self, pwd): 1443 """Set default password for encrypted files.""" 1444 if pwd and not isinstance(pwd, bytes): 1445 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1446 if pwd: 1447 self.pwd = pwd 1448 else: 1449 self.pwd = None 1450 1451 @property 1452 def comment(self): 1453 """The comment text associated with the ZIP file.""" 1454 return self._comment 1455 1456 @comment.setter 1457 def comment(self, comment): 1458 if not isinstance(comment, bytes): 1459 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) 1460 # check for valid comment length 1461 if len(comment) > ZIP_MAX_COMMENT: 1462 import warnings 1463 warnings.warn('Archive comment is too long; truncating to %d bytes' 1464 % ZIP_MAX_COMMENT, stacklevel=2) 1465 comment = comment[:ZIP_MAX_COMMENT] 1466 self._comment = comment 1467 self._didModify = True 1468 1469 def read(self, name, pwd=None): 1470 """Return file bytes for name.""" 1471 with self.open(name, "r", pwd) as fp: 1472 return fp.read() 1473 1474 def open(self, name, mode="r", pwd=None, *, force_zip64=False): 1475 """Return file-like object for 'name'. 1476 1477 name is a string for the file name within the ZIP file, or a ZipInfo 1478 object. 1479 1480 mode should be 'r' to read a file already in the ZIP file, or 'w' to 1481 write to a file newly added to the archive. 1482 1483 pwd is the password to decrypt files (only used for reading). 1484 1485 When writing, if the file size is not known in advance but may exceed 1486 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large 1487 files. If the size is known in advance, it is best to pass a ZipInfo 1488 instance for name, with zinfo.file_size set. 1489 """ 1490 if mode not in {"r", "w"}: 1491 raise ValueError('open() requires mode "r" or "w"') 1492 if pwd and not isinstance(pwd, bytes): 1493 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1494 if pwd and (mode == "w"): 1495 raise ValueError("pwd is only supported for reading files") 1496 if not self.fp: 1497 raise ValueError( 1498 "Attempt to use ZIP archive that was already closed") 1499 1500 # Make sure we have an info object 1501 if isinstance(name, ZipInfo): 1502 # 'name' is already an info object 1503 zinfo = name 1504 elif mode == 'w': 1505 zinfo = ZipInfo(name) 1506 zinfo.compress_type = self.compression 1507 zinfo._compresslevel = self.compresslevel 1508 else: 1509 # Get info object for name 1510 zinfo = self.getinfo(name) 1511 1512 if mode == 'w': 1513 return self._open_to_write(zinfo, force_zip64=force_zip64) 1514 1515 if self._writing: 1516 raise ValueError("Can't read from the ZIP file while there " 1517 "is an open writing handle on it. " 1518 "Close the writing handle before trying to read.") 1519 1520 # Open for reading: 1521 self._fileRefCnt += 1 1522 zef_file = _SharedFile(self.fp, zinfo.header_offset, 1523 self._fpclose, self._lock, lambda: self._writing) 1524 try: 1525 # Skip the file header: 1526 fheader = zef_file.read(sizeFileHeader) 1527 if len(fheader) != sizeFileHeader: 1528 raise BadZipFile("Truncated file header") 1529 fheader = struct.unpack(structFileHeader, fheader) 1530 if fheader[_FH_SIGNATURE] != stringFileHeader: 1531 raise BadZipFile("Bad magic number for file header") 1532 1533 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 1534 if fheader[_FH_EXTRA_FIELD_LENGTH]: 1535 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 1536 1537 if zinfo.flag_bits & 0x20: 1538 # Zip 2.7: compressed patched data 1539 raise NotImplementedError("compressed patched data (flag bit 5)") 1540 1541 if zinfo.flag_bits & 0x40: 1542 # strong encryption 1543 raise NotImplementedError("strong encryption (flag bit 6)") 1544 1545 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800: 1546 # UTF-8 filename 1547 fname_str = fname.decode("utf-8") 1548 else: 1549 fname_str = fname.decode("cp437") 1550 1551 if fname_str != zinfo.orig_filename: 1552 raise BadZipFile( 1553 'File name in directory %r and header %r differ.' 1554 % (zinfo.orig_filename, fname)) 1555 1556 # check for encrypted flag & handle password 1557 is_encrypted = zinfo.flag_bits & 0x1 1558 if is_encrypted: 1559 if not pwd: 1560 pwd = self.pwd 1561 if not pwd: 1562 raise RuntimeError("File %r is encrypted, password " 1563 "required for extraction" % name) 1564 else: 1565 pwd = None 1566 1567 return ZipExtFile(zef_file, mode, zinfo, pwd, True) 1568 except: 1569 zef_file.close() 1570 raise 1571 1572 def _open_to_write(self, zinfo, force_zip64=False): 1573 if force_zip64 and not self._allowZip64: 1574 raise ValueError( 1575 "force_zip64 is True, but allowZip64 was False when opening " 1576 "the ZIP file." 1577 ) 1578 if self._writing: 1579 raise ValueError("Can't write to the ZIP file while there is " 1580 "another write handle open on it. " 1581 "Close the first handle before opening another.") 1582 1583 # Size and CRC are overwritten with correct data after processing the file 1584 zinfo.compress_size = 0 1585 zinfo.CRC = 0 1586 1587 zinfo.flag_bits = 0x00 1588 if zinfo.compress_type == ZIP_LZMA: 1589 # Compressed data includes an end-of-stream (EOS) marker 1590 zinfo.flag_bits |= 0x02 1591 if not self._seekable: 1592 zinfo.flag_bits |= 0x08 1593 1594 if not zinfo.external_attr: 1595 zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- 1596 1597 # Compressed size can be larger than uncompressed size 1598 zip64 = self._allowZip64 and \ 1599 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT) 1600 1601 if self._seekable: 1602 self.fp.seek(self.start_dir) 1603 zinfo.header_offset = self.fp.tell() 1604 1605 self._writecheck(zinfo) 1606 self._didModify = True 1607 1608 self.fp.write(zinfo.FileHeader(zip64)) 1609 1610 self._writing = True 1611 return _ZipWriteFile(self, zinfo, zip64) 1612 1613 def extract(self, member, path=None, pwd=None): 1614 """Extract a member from the archive to the current working directory, 1615 using its full name. Its file information is extracted as accurately 1616 as possible. `member' may be a filename or a ZipInfo object. You can 1617 specify a different directory using `path'. 1618 """ 1619 if path is None: 1620 path = os.getcwd() 1621 else: 1622 path = os.fspath(path) 1623 1624 return self._extract_member(member, path, pwd) 1625 1626 def extractall(self, path=None, members=None, pwd=None): 1627 """Extract all members from the archive to the current working 1628 directory. `path' specifies a different directory to extract to. 1629 `members' is optional and must be a subset of the list returned 1630 by namelist(). 1631 """ 1632 if members is None: 1633 members = self.namelist() 1634 1635 if path is None: 1636 path = os.getcwd() 1637 else: 1638 path = os.fspath(path) 1639 1640 for zipinfo in members: 1641 self._extract_member(zipinfo, path, pwd) 1642 1643 @classmethod 1644 def _sanitize_windows_name(cls, arcname, pathsep): 1645 """Replace bad characters and remove trailing dots from parts.""" 1646 table = cls._windows_illegal_name_trans_table 1647 if not table: 1648 illegal = ':<>|"?*' 1649 table = str.maketrans(illegal, '_' * len(illegal)) 1650 cls._windows_illegal_name_trans_table = table 1651 arcname = arcname.translate(table) 1652 # remove trailing dots 1653 arcname = (x.rstrip('.') for x in arcname.split(pathsep)) 1654 # rejoin, removing empty parts. 1655 arcname = pathsep.join(x for x in arcname if x) 1656 return arcname 1657 1658 def _extract_member(self, member, targetpath, pwd): 1659 """Extract the ZipInfo object 'member' to a physical 1660 file on the path targetpath. 1661 """ 1662 if not isinstance(member, ZipInfo): 1663 member = self.getinfo(member) 1664 1665 # build the destination pathname, replacing 1666 # forward slashes to platform specific separators. 1667 arcname = member.filename.replace('/', os.path.sep) 1668 1669 if os.path.altsep: 1670 arcname = arcname.replace(os.path.altsep, os.path.sep) 1671 # interpret absolute pathname as relative, remove drive letter or 1672 # UNC path, redundant separators, "." and ".." components. 1673 arcname = os.path.splitdrive(arcname)[1] 1674 invalid_path_parts = ('', os.path.curdir, os.path.pardir) 1675 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1676 if x not in invalid_path_parts) 1677 if os.path.sep == '\\': 1678 # filter illegal characters on Windows 1679 arcname = self._sanitize_windows_name(arcname, os.path.sep) 1680 1681 targetpath = os.path.join(targetpath, arcname) 1682 targetpath = os.path.normpath(targetpath) 1683 1684 # Create all upper directories if necessary. 1685 upperdirs = os.path.dirname(targetpath) 1686 if upperdirs and not os.path.exists(upperdirs): 1687 os.makedirs(upperdirs) 1688 1689 if member.is_dir(): 1690 if not os.path.isdir(targetpath): 1691 os.mkdir(targetpath) 1692 return targetpath 1693 1694 with self.open(member, pwd=pwd) as source, \ 1695 open(targetpath, "wb") as target: 1696 shutil.copyfileobj(source, target) 1697 1698 return targetpath 1699 1700 def _writecheck(self, zinfo): 1701 """Check for errors before writing a file to the archive.""" 1702 if zinfo.filename in self.NameToInfo: 1703 import warnings 1704 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) 1705 if self.mode not in ('w', 'x', 'a'): 1706 raise ValueError("write() requires mode 'w', 'x', or 'a'") 1707 if not self.fp: 1708 raise ValueError( 1709 "Attempt to write ZIP archive that was already closed") 1710 _check_compression(zinfo.compress_type) 1711 if not self._allowZip64: 1712 requires_zip64 = None 1713 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT: 1714 requires_zip64 = "Files count" 1715 elif zinfo.file_size > ZIP64_LIMIT: 1716 requires_zip64 = "Filesize" 1717 elif zinfo.header_offset > ZIP64_LIMIT: 1718 requires_zip64 = "Zipfile size" 1719 if requires_zip64: 1720 raise LargeZipFile(requires_zip64 + 1721 " would require ZIP64 extensions") 1722 1723 def write(self, filename, arcname=None, 1724 compress_type=None, compresslevel=None): 1725 """Put the bytes from filename into the archive under the name 1726 arcname.""" 1727 if not self.fp: 1728 raise ValueError( 1729 "Attempt to write to ZIP archive that was already closed") 1730 if self._writing: 1731 raise ValueError( 1732 "Can't write to ZIP archive while an open writing handle exists" 1733 ) 1734 1735 zinfo = ZipInfo.from_file(filename, arcname, 1736 strict_timestamps=self._strict_timestamps) 1737 1738 if zinfo.is_dir(): 1739 zinfo.compress_size = 0 1740 zinfo.CRC = 0 1741 else: 1742 if compress_type is not None: 1743 zinfo.compress_type = compress_type 1744 else: 1745 zinfo.compress_type = self.compression 1746 1747 if compresslevel is not None: 1748 zinfo._compresslevel = compresslevel 1749 else: 1750 zinfo._compresslevel = self.compresslevel 1751 1752 if zinfo.is_dir(): 1753 with self._lock: 1754 if self._seekable: 1755 self.fp.seek(self.start_dir) 1756 zinfo.header_offset = self.fp.tell() # Start of header bytes 1757 if zinfo.compress_type == ZIP_LZMA: 1758 # Compressed data includes an end-of-stream (EOS) marker 1759 zinfo.flag_bits |= 0x02 1760 1761 self._writecheck(zinfo) 1762 self._didModify = True 1763 1764 self.filelist.append(zinfo) 1765 self.NameToInfo[zinfo.filename] = zinfo 1766 self.fp.write(zinfo.FileHeader(False)) 1767 self.start_dir = self.fp.tell() 1768 else: 1769 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: 1770 shutil.copyfileobj(src, dest, 1024*8) 1771 1772 def writestr(self, zinfo_or_arcname, data, 1773 compress_type=None, compresslevel=None): 1774 """Write a file into the archive. The contents is 'data', which 1775 may be either a 'str' or a 'bytes' instance; if it is a 'str', 1776 it is encoded as UTF-8 first. 1777 'zinfo_or_arcname' is either a ZipInfo instance or 1778 the name of the file in the archive.""" 1779 if isinstance(data, str): 1780 data = data.encode("utf-8") 1781 if not isinstance(zinfo_or_arcname, ZipInfo): 1782 zinfo = ZipInfo(filename=zinfo_or_arcname, 1783 date_time=time.localtime(time.time())[:6]) 1784 zinfo.compress_type = self.compression 1785 zinfo._compresslevel = self.compresslevel 1786 if zinfo.filename[-1] == '/': 1787 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x 1788 zinfo.external_attr |= 0x10 # MS-DOS directory flag 1789 else: 1790 zinfo.external_attr = 0o600 << 16 # ?rw------- 1791 else: 1792 zinfo = zinfo_or_arcname 1793 1794 if not self.fp: 1795 raise ValueError( 1796 "Attempt to write to ZIP archive that was already closed") 1797 if self._writing: 1798 raise ValueError( 1799 "Can't write to ZIP archive while an open writing handle exists." 1800 ) 1801 1802 if compress_type is not None: 1803 zinfo.compress_type = compress_type 1804 1805 if compresslevel is not None: 1806 zinfo._compresslevel = compresslevel 1807 1808 zinfo.file_size = len(data) # Uncompressed size 1809 with self._lock: 1810 with self.open(zinfo, mode='w') as dest: 1811 dest.write(data) 1812 1813 def __del__(self): 1814 """Call the "close()" method in case the user forgot.""" 1815 self.close() 1816 1817 def close(self): 1818 """Close the file, and for mode 'w', 'x' and 'a' write the ending 1819 records.""" 1820 if self.fp is None: 1821 return 1822 1823 if self._writing: 1824 raise ValueError("Can't close the ZIP file while there is " 1825 "an open writing handle on it. " 1826 "Close the writing handle before closing the zip.") 1827 1828 try: 1829 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records 1830 with self._lock: 1831 if self._seekable: 1832 self.fp.seek(self.start_dir) 1833 self._write_end_record() 1834 finally: 1835 fp = self.fp 1836 self.fp = None 1837 self._fpclose(fp) 1838 1839 def _write_end_record(self): 1840 for zinfo in self.filelist: # write central directory 1841 dt = zinfo.date_time 1842 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1843 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1844 extra = [] 1845 if zinfo.file_size > ZIP64_LIMIT \ 1846 or zinfo.compress_size > ZIP64_LIMIT: 1847 extra.append(zinfo.file_size) 1848 extra.append(zinfo.compress_size) 1849 file_size = 0xffffffff 1850 compress_size = 0xffffffff 1851 else: 1852 file_size = zinfo.file_size 1853 compress_size = zinfo.compress_size 1854 1855 if zinfo.header_offset > ZIP64_LIMIT: 1856 extra.append(zinfo.header_offset) 1857 header_offset = 0xffffffff 1858 else: 1859 header_offset = zinfo.header_offset 1860 1861 extra_data = zinfo.extra 1862 min_version = 0 1863 if extra: 1864 # Append a ZIP64 field to the extra's 1865 extra_data = _strip_extra(extra_data, (1,)) 1866 extra_data = struct.pack( 1867 '<HH' + 'Q'*len(extra), 1868 1, 8*len(extra), *extra) + extra_data 1869 1870 min_version = ZIP64_VERSION 1871 1872 if zinfo.compress_type == ZIP_BZIP2: 1873 min_version = max(BZIP2_VERSION, min_version) 1874 elif zinfo.compress_type == ZIP_LZMA: 1875 min_version = max(LZMA_VERSION, min_version) 1876 1877 extract_version = max(min_version, zinfo.extract_version) 1878 create_version = max(min_version, zinfo.create_version) 1879 filename, flag_bits = zinfo._encodeFilenameFlags() 1880 centdir = struct.pack(structCentralDir, 1881 stringCentralDir, create_version, 1882 zinfo.create_system, extract_version, zinfo.reserved, 1883 flag_bits, zinfo.compress_type, dostime, dosdate, 1884 zinfo.CRC, compress_size, file_size, 1885 len(filename), len(extra_data), len(zinfo.comment), 1886 0, zinfo.internal_attr, zinfo.external_attr, 1887 header_offset) 1888 self.fp.write(centdir) 1889 self.fp.write(filename) 1890 self.fp.write(extra_data) 1891 self.fp.write(zinfo.comment) 1892 1893 pos2 = self.fp.tell() 1894 # Write end-of-zip-archive record 1895 centDirCount = len(self.filelist) 1896 centDirSize = pos2 - self.start_dir 1897 centDirOffset = self.start_dir 1898 requires_zip64 = None 1899 if centDirCount > ZIP_FILECOUNT_LIMIT: 1900 requires_zip64 = "Files count" 1901 elif centDirOffset > ZIP64_LIMIT: 1902 requires_zip64 = "Central directory offset" 1903 elif centDirSize > ZIP64_LIMIT: 1904 requires_zip64 = "Central directory size" 1905 if requires_zip64: 1906 # Need to write the ZIP64 end-of-archive records 1907 if not self._allowZip64: 1908 raise LargeZipFile(requires_zip64 + 1909 " would require ZIP64 extensions") 1910 zip64endrec = struct.pack( 1911 structEndArchive64, stringEndArchive64, 1912 44, 45, 45, 0, 0, centDirCount, centDirCount, 1913 centDirSize, centDirOffset) 1914 self.fp.write(zip64endrec) 1915 1916 zip64locrec = struct.pack( 1917 structEndArchive64Locator, 1918 stringEndArchive64Locator, 0, pos2, 1) 1919 self.fp.write(zip64locrec) 1920 centDirCount = min(centDirCount, 0xFFFF) 1921 centDirSize = min(centDirSize, 0xFFFFFFFF) 1922 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1923 1924 endrec = struct.pack(structEndArchive, stringEndArchive, 1925 0, 0, centDirCount, centDirCount, 1926 centDirSize, centDirOffset, len(self._comment)) 1927 self.fp.write(endrec) 1928 self.fp.write(self._comment) 1929 if self.mode == "a": 1930 self.fp.truncate() 1931 self.fp.flush() 1932 1933 def _fpclose(self, fp): 1934 assert self._fileRefCnt > 0 1935 self._fileRefCnt -= 1 1936 if not self._fileRefCnt and not self._filePassed: 1937 fp.close() 1938 1939 1940class PyZipFile(ZipFile): 1941 """Class to create ZIP archives with Python library files and packages.""" 1942 1943 def __init__(self, file, mode="r", compression=ZIP_STORED, 1944 allowZip64=True, optimize=-1): 1945 ZipFile.__init__(self, file, mode=mode, compression=compression, 1946 allowZip64=allowZip64) 1947 self._optimize = optimize 1948 1949 def writepy(self, pathname, basename="", filterfunc=None): 1950 """Add all files from "pathname" to the ZIP archive. 1951 1952 If pathname is a package directory, search the directory and 1953 all package subdirectories recursively for all *.py and enter 1954 the modules into the archive. If pathname is a plain 1955 directory, listdir *.py and enter all modules. Else, pathname 1956 must be a Python *.py file and the module will be put into the 1957 archive. Added modules are always module.pyc. 1958 This method will compile the module.py into module.pyc if 1959 necessary. 1960 If filterfunc(pathname) is given, it is called with every argument. 1961 When it is False, the file or directory is skipped. 1962 """ 1963 pathname = os.fspath(pathname) 1964 if filterfunc and not filterfunc(pathname): 1965 if self.debug: 1966 label = 'path' if os.path.isdir(pathname) else 'file' 1967 print('%s %r skipped by filterfunc' % (label, pathname)) 1968 return 1969 dir, name = os.path.split(pathname) 1970 if os.path.isdir(pathname): 1971 initname = os.path.join(pathname, "__init__.py") 1972 if os.path.isfile(initname): 1973 # This is a package directory, add it 1974 if basename: 1975 basename = "%s/%s" % (basename, name) 1976 else: 1977 basename = name 1978 if self.debug: 1979 print("Adding package in", pathname, "as", basename) 1980 fname, arcname = self._get_codename(initname[0:-3], basename) 1981 if self.debug: 1982 print("Adding", arcname) 1983 self.write(fname, arcname) 1984 dirlist = sorted(os.listdir(pathname)) 1985 dirlist.remove("__init__.py") 1986 # Add all *.py files and package subdirectories 1987 for filename in dirlist: 1988 path = os.path.join(pathname, filename) 1989 root, ext = os.path.splitext(filename) 1990 if os.path.isdir(path): 1991 if os.path.isfile(os.path.join(path, "__init__.py")): 1992 # This is a package directory, add it 1993 self.writepy(path, basename, 1994 filterfunc=filterfunc) # Recursive call 1995 elif ext == ".py": 1996 if filterfunc and not filterfunc(path): 1997 if self.debug: 1998 print('file %r skipped by filterfunc' % path) 1999 continue 2000 fname, arcname = self._get_codename(path[0:-3], 2001 basename) 2002 if self.debug: 2003 print("Adding", arcname) 2004 self.write(fname, arcname) 2005 else: 2006 # This is NOT a package directory, add its files at top level 2007 if self.debug: 2008 print("Adding files from directory", pathname) 2009 for filename in sorted(os.listdir(pathname)): 2010 path = os.path.join(pathname, filename) 2011 root, ext = os.path.splitext(filename) 2012 if ext == ".py": 2013 if filterfunc and not filterfunc(path): 2014 if self.debug: 2015 print('file %r skipped by filterfunc' % path) 2016 continue 2017 fname, arcname = self._get_codename(path[0:-3], 2018 basename) 2019 if self.debug: 2020 print("Adding", arcname) 2021 self.write(fname, arcname) 2022 else: 2023 if pathname[-3:] != ".py": 2024 raise RuntimeError( 2025 'Files added with writepy() must end with ".py"') 2026 fname, arcname = self._get_codename(pathname[0:-3], basename) 2027 if self.debug: 2028 print("Adding file", arcname) 2029 self.write(fname, arcname) 2030 2031 def _get_codename(self, pathname, basename): 2032 """Return (filename, archivename) for the path. 2033 2034 Given a module name path, return the correct file path and 2035 archive name, compiling if necessary. For example, given 2036 /python/lib/string, return (/python/lib/string.pyc, string). 2037 """ 2038 def _compile(file, optimize=-1): 2039 import py_compile 2040 if self.debug: 2041 print("Compiling", file) 2042 try: 2043 py_compile.compile(file, doraise=True, optimize=optimize) 2044 except py_compile.PyCompileError as err: 2045 print(err.msg) 2046 return False 2047 return True 2048 2049 file_py = pathname + ".py" 2050 file_pyc = pathname + ".pyc" 2051 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='') 2052 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1) 2053 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2) 2054 if self._optimize == -1: 2055 # legacy mode: use whatever file is present 2056 if (os.path.isfile(file_pyc) and 2057 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): 2058 # Use .pyc file. 2059 arcname = fname = file_pyc 2060 elif (os.path.isfile(pycache_opt0) and 2061 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): 2062 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2063 # file name in the archive. 2064 fname = pycache_opt0 2065 arcname = file_pyc 2066 elif (os.path.isfile(pycache_opt1) and 2067 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): 2068 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2069 # file name in the archive. 2070 fname = pycache_opt1 2071 arcname = file_pyc 2072 elif (os.path.isfile(pycache_opt2) and 2073 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): 2074 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2075 # file name in the archive. 2076 fname = pycache_opt2 2077 arcname = file_pyc 2078 else: 2079 # Compile py into PEP 3147 pyc file. 2080 if _compile(file_py): 2081 if sys.flags.optimize == 0: 2082 fname = pycache_opt0 2083 elif sys.flags.optimize == 1: 2084 fname = pycache_opt1 2085 else: 2086 fname = pycache_opt2 2087 arcname = file_pyc 2088 else: 2089 fname = arcname = file_py 2090 else: 2091 # new mode: use given optimization level 2092 if self._optimize == 0: 2093 fname = pycache_opt0 2094 arcname = file_pyc 2095 else: 2096 arcname = file_pyc 2097 if self._optimize == 1: 2098 fname = pycache_opt1 2099 elif self._optimize == 2: 2100 fname = pycache_opt2 2101 else: 2102 msg = "invalid value for 'optimize': {!r}".format(self._optimize) 2103 raise ValueError(msg) 2104 if not (os.path.isfile(fname) and 2105 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): 2106 if not _compile(file_py, optimize=self._optimize): 2107 fname = arcname = file_py 2108 archivename = os.path.split(arcname)[1] 2109 if basename: 2110 archivename = "%s/%s" % (basename, archivename) 2111 return (fname, archivename) 2112 2113 2114def _parents(path): 2115 """ 2116 Given a path with elements separated by 2117 posixpath.sep, generate all parents of that path. 2118 2119 >>> list(_parents('b/d')) 2120 ['b'] 2121 >>> list(_parents('/b/d/')) 2122 ['/b'] 2123 >>> list(_parents('b/d/f/')) 2124 ['b/d', 'b'] 2125 >>> list(_parents('b')) 2126 [] 2127 >>> list(_parents('')) 2128 [] 2129 """ 2130 return itertools.islice(_ancestry(path), 1, None) 2131 2132 2133def _ancestry(path): 2134 """ 2135 Given a path with elements separated by 2136 posixpath.sep, generate all elements of that path 2137 2138 >>> list(_ancestry('b/d')) 2139 ['b/d', 'b'] 2140 >>> list(_ancestry('/b/d/')) 2141 ['/b/d', '/b'] 2142 >>> list(_ancestry('b/d/f/')) 2143 ['b/d/f', 'b/d', 'b'] 2144 >>> list(_ancestry('b')) 2145 ['b'] 2146 >>> list(_ancestry('')) 2147 [] 2148 """ 2149 path = path.rstrip(posixpath.sep) 2150 while path and path != posixpath.sep: 2151 yield path 2152 path, tail = posixpath.split(path) 2153 2154 2155_dedupe = dict.fromkeys 2156"""Deduplicate an iterable in original order""" 2157 2158 2159def _difference(minuend, subtrahend): 2160 """ 2161 Return items in minuend not in subtrahend, retaining order 2162 with O(1) lookup. 2163 """ 2164 return itertools.filterfalse(set(subtrahend).__contains__, minuend) 2165 2166 2167class CompleteDirs(ZipFile): 2168 """ 2169 A ZipFile subclass that ensures that implied directories 2170 are always included in the namelist. 2171 """ 2172 2173 @staticmethod 2174 def _implied_dirs(names): 2175 parents = itertools.chain.from_iterable(map(_parents, names)) 2176 as_dirs = (p + posixpath.sep for p in parents) 2177 return _dedupe(_difference(as_dirs, names)) 2178 2179 def namelist(self): 2180 names = super(CompleteDirs, self).namelist() 2181 return names + list(self._implied_dirs(names)) 2182 2183 def _name_set(self): 2184 return set(self.namelist()) 2185 2186 def resolve_dir(self, name): 2187 """ 2188 If the name represents a directory, return that name 2189 as a directory (with the trailing slash). 2190 """ 2191 names = self._name_set() 2192 dirname = name + '/' 2193 dir_match = name not in names and dirname in names 2194 return dirname if dir_match else name 2195 2196 @classmethod 2197 def make(cls, source): 2198 """ 2199 Given a source (filename or zipfile), return an 2200 appropriate CompleteDirs subclass. 2201 """ 2202 if isinstance(source, CompleteDirs): 2203 return source 2204 2205 if not isinstance(source, ZipFile): 2206 return cls(source) 2207 2208 # Only allow for FastLookup when supplied zipfile is read-only 2209 if 'r' not in source.mode: 2210 cls = CompleteDirs 2211 2212 source.__class__ = cls 2213 return source 2214 2215 2216class FastLookup(CompleteDirs): 2217 """ 2218 ZipFile subclass to ensure implicit 2219 dirs exist and are resolved rapidly. 2220 """ 2221 2222 def namelist(self): 2223 with contextlib.suppress(AttributeError): 2224 return self.__names 2225 self.__names = super(FastLookup, self).namelist() 2226 return self.__names 2227 2228 def _name_set(self): 2229 with contextlib.suppress(AttributeError): 2230 return self.__lookup 2231 self.__lookup = super(FastLookup, self)._name_set() 2232 return self.__lookup 2233 2234 2235class Path: 2236 """ 2237 A pathlib-compatible interface for zip files. 2238 2239 Consider a zip file with this structure:: 2240 2241 . 2242 ├── a.txt 2243 └── b 2244 ├── c.txt 2245 └── d 2246 └── e.txt 2247 2248 >>> data = io.BytesIO() 2249 >>> zf = ZipFile(data, 'w') 2250 >>> zf.writestr('a.txt', 'content of a') 2251 >>> zf.writestr('b/c.txt', 'content of c') 2252 >>> zf.writestr('b/d/e.txt', 'content of e') 2253 >>> zf.filename = 'mem/abcde.zip' 2254 2255 Path accepts the zipfile object itself or a filename 2256 2257 >>> root = Path(zf) 2258 2259 From there, several path operations are available. 2260 2261 Directory iteration (including the zip file itself): 2262 2263 >>> a, b = root.iterdir() 2264 >>> a 2265 Path('mem/abcde.zip', 'a.txt') 2266 >>> b 2267 Path('mem/abcde.zip', 'b/') 2268 2269 name property: 2270 2271 >>> b.name 2272 'b' 2273 2274 join with divide operator: 2275 2276 >>> c = b / 'c.txt' 2277 >>> c 2278 Path('mem/abcde.zip', 'b/c.txt') 2279 >>> c.name 2280 'c.txt' 2281 2282 Read text: 2283 2284 >>> c.read_text() 2285 'content of c' 2286 2287 existence: 2288 2289 >>> c.exists() 2290 True 2291 >>> (b / 'missing.txt').exists() 2292 False 2293 2294 Coercion to string: 2295 2296 >>> import os 2297 >>> str(c).replace(os.sep, posixpath.sep) 2298 'mem/abcde.zip/b/c.txt' 2299 2300 At the root, ``name``, ``filename``, and ``parent`` 2301 resolve to the zipfile. Note these attributes are not 2302 valid and will raise a ``ValueError`` if the zipfile 2303 has no filename. 2304 2305 >>> root.name 2306 'abcde.zip' 2307 >>> str(root.filename).replace(os.sep, posixpath.sep) 2308 'mem/abcde.zip' 2309 >>> str(root.parent) 2310 'mem' 2311 """ 2312 2313 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" 2314 2315 def __init__(self, root, at=""): 2316 """ 2317 Construct a Path from a ZipFile or filename. 2318 2319 Note: When the source is an existing ZipFile object, 2320 its type (__class__) will be mutated to a 2321 specialized type. If the caller wishes to retain the 2322 original type, the caller should either create a 2323 separate ZipFile object or pass a filename. 2324 """ 2325 self.root = FastLookup.make(root) 2326 self.at = at 2327 2328 def open(self, mode='r', *args, pwd=None, **kwargs): 2329 """ 2330 Open this entry as text or binary following the semantics 2331 of ``pathlib.Path.open()`` by passing arguments through 2332 to io.TextIOWrapper(). 2333 """ 2334 if self.is_dir(): 2335 raise IsADirectoryError(self) 2336 zip_mode = mode[0] 2337 if not self.exists() and zip_mode == 'r': 2338 raise FileNotFoundError(self) 2339 stream = self.root.open(self.at, zip_mode, pwd=pwd) 2340 if 'b' in mode: 2341 if args or kwargs: 2342 raise ValueError("encoding args invalid for binary operation") 2343 return stream 2344 else: 2345 kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) 2346 return io.TextIOWrapper(stream, *args, **kwargs) 2347 2348 @property 2349 def name(self): 2350 return pathlib.Path(self.at).name or self.filename.name 2351 2352 @property 2353 def filename(self): 2354 return pathlib.Path(self.root.filename).joinpath(self.at) 2355 2356 def read_text(self, *args, **kwargs): 2357 kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) 2358 with self.open('r', *args, **kwargs) as strm: 2359 return strm.read() 2360 2361 def read_bytes(self): 2362 with self.open('rb') as strm: 2363 return strm.read() 2364 2365 def _is_child(self, path): 2366 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") 2367 2368 def _next(self, at): 2369 return self.__class__(self.root, at) 2370 2371 def is_dir(self): 2372 return not self.at or self.at.endswith("/") 2373 2374 def is_file(self): 2375 return self.exists() and not self.is_dir() 2376 2377 def exists(self): 2378 return self.at in self.root._name_set() 2379 2380 def iterdir(self): 2381 if not self.is_dir(): 2382 raise ValueError("Can't listdir a file") 2383 subs = map(self._next, self.root.namelist()) 2384 return filter(self._is_child, subs) 2385 2386 def __str__(self): 2387 return posixpath.join(self.root.filename, self.at) 2388 2389 def __repr__(self): 2390 return self.__repr.format(self=self) 2391 2392 def joinpath(self, *other): 2393 next = posixpath.join(self.at, *other) 2394 return self._next(self.root.resolve_dir(next)) 2395 2396 __truediv__ = joinpath 2397 2398 @property 2399 def parent(self): 2400 if not self.at: 2401 return self.filename.parent 2402 parent_at = posixpath.dirname(self.at.rstrip('/')) 2403 if parent_at: 2404 parent_at += '/' 2405 return self._next(parent_at) 2406 2407 2408def main(args=None): 2409 import argparse 2410 2411 description = 'A simple command-line interface for zipfile module.' 2412 parser = argparse.ArgumentParser(description=description) 2413 group = parser.add_mutually_exclusive_group(required=True) 2414 group.add_argument('-l', '--list', metavar='<zipfile>', 2415 help='Show listing of a zipfile') 2416 group.add_argument('-e', '--extract', nargs=2, 2417 metavar=('<zipfile>', '<output_dir>'), 2418 help='Extract zipfile into target dir') 2419 group.add_argument('-c', '--create', nargs='+', 2420 metavar=('<name>', '<file>'), 2421 help='Create zipfile from sources') 2422 group.add_argument('-t', '--test', metavar='<zipfile>', 2423 help='Test if a zipfile is valid') 2424 args = parser.parse_args(args) 2425 2426 if args.test is not None: 2427 src = args.test 2428 with ZipFile(src, 'r') as zf: 2429 badfile = zf.testzip() 2430 if badfile: 2431 print("The following enclosed file is corrupted: {!r}".format(badfile)) 2432 print("Done testing") 2433 2434 elif args.list is not None: 2435 src = args.list 2436 with ZipFile(src, 'r') as zf: 2437 zf.printdir() 2438 2439 elif args.extract is not None: 2440 src, curdir = args.extract 2441 with ZipFile(src, 'r') as zf: 2442 zf.extractall(curdir) 2443 2444 elif args.create is not None: 2445 zip_name = args.create.pop(0) 2446 files = args.create 2447 2448 def addToZip(zf, path, zippath): 2449 if os.path.isfile(path): 2450 zf.write(path, zippath, ZIP_DEFLATED) 2451 elif os.path.isdir(path): 2452 if zippath: 2453 zf.write(path, zippath) 2454 for nm in sorted(os.listdir(path)): 2455 addToZip(zf, 2456 os.path.join(path, nm), os.path.join(zippath, nm)) 2457 # else: ignore 2458 2459 with ZipFile(zip_name, 'w') as zf: 2460 for path in files: 2461 zippath = os.path.basename(path) 2462 if not zippath: 2463 zippath = os.path.basename(os.path.dirname(path)) 2464 if zippath in ('', os.curdir, os.pardir): 2465 zippath = '' 2466 addToZip(zf, path, zippath) 2467 2468 2469if __name__ == "__main__": 2470 main() 2471