1""" 2Read and write ZIP files. 3 4XXX references to utf-8 need further investigation. 5""" 6import binascii 7import importlib.util 8import io 9import itertools 10import os 11import posixpath 12import shutil 13import stat 14import struct 15import sys 16import threading 17import time 18import contextlib 19import pathlib 20 21try: 22 import zlib # We may need its compression method 23 crc32 = zlib.crc32 24except ImportError: 25 zlib = None 26 crc32 = binascii.crc32 27 28try: 29 import bz2 # We may need its compression method 30except ImportError: 31 bz2 = None 32 33try: 34 import lzma # We may need its compression method 35except ImportError: 36 lzma = None 37 38__all__ = ["BadZipFile", "BadZipfile", "error", 39 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", 40 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", 41 "Path"] 42 43class BadZipFile(Exception): 44 pass 45 46 47class LargeZipFile(Exception): 48 """ 49 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 50 and those extensions are disabled. 51 """ 52 53error = BadZipfile = BadZipFile # Pre-3.2 compatibility names 54 55 56ZIP64_LIMIT = (1 << 31) - 1 57ZIP_FILECOUNT_LIMIT = (1 << 16) - 1 58ZIP_MAX_COMMENT = (1 << 16) - 1 59 60# constants for Zip file compression methods 61ZIP_STORED = 0 62ZIP_DEFLATED = 8 63ZIP_BZIP2 = 12 64ZIP_LZMA = 14 65# Other ZIP compression methods not supported 66 67DEFAULT_VERSION = 20 68ZIP64_VERSION = 45 69BZIP2_VERSION = 46 70LZMA_VERSION = 63 71# we recognize (but not necessarily support) all features up to that version 72MAX_EXTRACT_VERSION = 63 73 74# Below are some formats and associated data for reading/writing headers using 75# the struct module. The names and structures of headers/records are those used 76# in the PKWARE description of the ZIP file format: 77# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 78# (URL valid as of January 2008) 79 80# The "end of central directory" structure, magic number, size, and indices 81# (section V.I in the format document) 82structEndArchive = b"<4s4H2LH" 83stringEndArchive = b"PK\005\006" 84sizeEndCentDir = struct.calcsize(structEndArchive) 85 86_ECD_SIGNATURE = 0 87_ECD_DISK_NUMBER = 1 88_ECD_DISK_START = 2 89_ECD_ENTRIES_THIS_DISK = 3 90_ECD_ENTRIES_TOTAL = 4 91_ECD_SIZE = 5 92_ECD_OFFSET = 6 93_ECD_COMMENT_SIZE = 7 94# These last two indices are not part of the structure as defined in the 95# spec, but they are used internally by this module as a convenience 96_ECD_COMMENT = 8 97_ECD_LOCATION = 9 98 99# The "central directory" structure, magic number, size, and indices 100# of entries in the structure (section V.F in the format document) 101structCentralDir = "<4s4B4HL2L5H2L" 102stringCentralDir = b"PK\001\002" 103sizeCentralDir = struct.calcsize(structCentralDir) 104 105# indexes of entries in the central directory structure 106_CD_SIGNATURE = 0 107_CD_CREATE_VERSION = 1 108_CD_CREATE_SYSTEM = 2 109_CD_EXTRACT_VERSION = 3 110_CD_EXTRACT_SYSTEM = 4 111_CD_FLAG_BITS = 5 112_CD_COMPRESS_TYPE = 6 113_CD_TIME = 7 114_CD_DATE = 8 115_CD_CRC = 9 116_CD_COMPRESSED_SIZE = 10 117_CD_UNCOMPRESSED_SIZE = 11 118_CD_FILENAME_LENGTH = 12 119_CD_EXTRA_FIELD_LENGTH = 13 120_CD_COMMENT_LENGTH = 14 121_CD_DISK_NUMBER_START = 15 122_CD_INTERNAL_FILE_ATTRIBUTES = 16 123_CD_EXTERNAL_FILE_ATTRIBUTES = 17 124_CD_LOCAL_HEADER_OFFSET = 18 125 126# The "local file header" structure, magic number, size, and indices 127# (section V.A in the format document) 128structFileHeader = "<4s2B4HL2L2H" 129stringFileHeader = b"PK\003\004" 130sizeFileHeader = struct.calcsize(structFileHeader) 131 132_FH_SIGNATURE = 0 133_FH_EXTRACT_VERSION = 1 134_FH_EXTRACT_SYSTEM = 2 135_FH_GENERAL_PURPOSE_FLAG_BITS = 3 136_FH_COMPRESSION_METHOD = 4 137_FH_LAST_MOD_TIME = 5 138_FH_LAST_MOD_DATE = 6 139_FH_CRC = 7 140_FH_COMPRESSED_SIZE = 8 141_FH_UNCOMPRESSED_SIZE = 9 142_FH_FILENAME_LENGTH = 10 143_FH_EXTRA_FIELD_LENGTH = 11 144 145# The "Zip64 end of central directory locator" structure, magic number, and size 146structEndArchive64Locator = "<4sLQL" 147stringEndArchive64Locator = b"PK\x06\x07" 148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 149 150# The "Zip64 end of central directory" record, magic number, size, and indices 151# (section V.G in the format document) 152structEndArchive64 = "<4sQ2H2L4Q" 153stringEndArchive64 = b"PK\x06\x06" 154sizeEndCentDir64 = struct.calcsize(structEndArchive64) 155 156_CD64_SIGNATURE = 0 157_CD64_DIRECTORY_RECSIZE = 1 158_CD64_CREATE_VERSION = 2 159_CD64_EXTRACT_VERSION = 3 160_CD64_DISK_NUMBER = 4 161_CD64_DISK_NUMBER_START = 5 162_CD64_NUMBER_ENTRIES_THIS_DISK = 6 163_CD64_NUMBER_ENTRIES_TOTAL = 7 164_CD64_DIRECTORY_SIZE = 8 165_CD64_OFFSET_START_CENTDIR = 9 166 167_DD_SIGNATURE = 0x08074b50 168 169_EXTRA_FIELD_STRUCT = struct.Struct('<HH') 170 171def _strip_extra(extra, xids): 172 # Remove Extra Fields with specified IDs. 173 unpack = _EXTRA_FIELD_STRUCT.unpack 174 modified = False 175 buffer = [] 176 start = i = 0 177 while i + 4 <= len(extra): 178 xid, xlen = unpack(extra[i : i + 4]) 179 j = i + 4 + xlen 180 if xid in xids: 181 if i != start: 182 buffer.append(extra[start : i]) 183 start = j 184 modified = True 185 i = j 186 if not modified: 187 return extra 188 return b''.join(buffer) 189 190def _check_zipfile(fp): 191 try: 192 if _EndRecData(fp): 193 return True # file has correct magic number 194 except OSError: 195 pass 196 return False 197 198def is_zipfile(filename): 199 """Quickly see if a file is a ZIP file by checking the magic number. 200 201 The filename argument may be a file or file-like object too. 202 """ 203 result = False 204 try: 205 if hasattr(filename, "read"): 206 result = _check_zipfile(fp=filename) 207 else: 208 with open(filename, "rb") as fp: 209 result = _check_zipfile(fp) 210 except OSError: 211 pass 212 return result 213 214def _EndRecData64(fpin, offset, endrec): 215 """ 216 Read the ZIP64 end-of-archive records and use that to update endrec 217 """ 218 try: 219 fpin.seek(offset - sizeEndCentDir64Locator, 2) 220 except OSError: 221 # If the seek fails, the file is not large enough to contain a ZIP64 222 # end-of-archive record, so just return the end record we were given. 223 return endrec 224 225 data = fpin.read(sizeEndCentDir64Locator) 226 if len(data) != sizeEndCentDir64Locator: 227 return endrec 228 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 229 if sig != stringEndArchive64Locator: 230 return endrec 231 232 if diskno != 0 or disks > 1: 233 raise BadZipFile("zipfiles that span multiple disks are not supported") 234 235 # Assume no 'zip64 extensible data' 236 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 237 data = fpin.read(sizeEndCentDir64) 238 if len(data) != sizeEndCentDir64: 239 return endrec 240 sig, sz, create_version, read_version, disk_num, disk_dir, \ 241 dircount, dircount2, dirsize, diroffset = \ 242 struct.unpack(structEndArchive64, data) 243 if sig != stringEndArchive64: 244 return endrec 245 246 # Update the original endrec using data from the ZIP64 record 247 endrec[_ECD_SIGNATURE] = sig 248 endrec[_ECD_DISK_NUMBER] = disk_num 249 endrec[_ECD_DISK_START] = disk_dir 250 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 251 endrec[_ECD_ENTRIES_TOTAL] = dircount2 252 endrec[_ECD_SIZE] = dirsize 253 endrec[_ECD_OFFSET] = diroffset 254 return endrec 255 256 257def _EndRecData(fpin): 258 """Return data from the "End of Central Directory" record, or None. 259 260 The data is a list of the nine items in the ZIP "End of central dir" 261 record followed by a tenth item, the file seek offset of this record.""" 262 263 # Determine file size 264 fpin.seek(0, 2) 265 filesize = fpin.tell() 266 267 # Check to see if this is ZIP file with no archive comment (the 268 # "end of central directory" structure should be the last item in the 269 # file if this is the case). 270 try: 271 fpin.seek(-sizeEndCentDir, 2) 272 except OSError: 273 return None 274 data = fpin.read() 275 if (len(data) == sizeEndCentDir and 276 data[0:4] == stringEndArchive and 277 data[-2:] == b"\000\000"): 278 # the signature is correct and there's no comment, unpack structure 279 endrec = struct.unpack(structEndArchive, data) 280 endrec=list(endrec) 281 282 # Append a blank comment and record start offset 283 endrec.append(b"") 284 endrec.append(filesize - sizeEndCentDir) 285 286 # Try to read the "Zip64 end of central directory" structure 287 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 288 289 # Either this is not a ZIP file, or it is a ZIP file with an archive 290 # comment. Search the end of the file for the "end of central directory" 291 # record signature. The comment is the last item in the ZIP file and may be 292 # up to 64K long. It is assumed that the "end of central directory" magic 293 # number does not appear in the comment. 294 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 295 fpin.seek(maxCommentStart, 0) 296 data = fpin.read() 297 start = data.rfind(stringEndArchive) 298 if start >= 0: 299 # found the magic number; attempt to unpack and interpret 300 recData = data[start:start+sizeEndCentDir] 301 if len(recData) != sizeEndCentDir: 302 # Zip file is corrupted. 303 return None 304 endrec = list(struct.unpack(structEndArchive, recData)) 305 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 306 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 307 endrec.append(comment) 308 endrec.append(maxCommentStart + start) 309 310 # Try to read the "Zip64 end of central directory" structure 311 return _EndRecData64(fpin, maxCommentStart + start - filesize, 312 endrec) 313 314 # Unable to find a valid end of central directory structure 315 return None 316 317 318class ZipInfo (object): 319 """Class with attributes describing each file in the ZIP archive.""" 320 321 __slots__ = ( 322 'orig_filename', 323 'filename', 324 'date_time', 325 'compress_type', 326 '_compresslevel', 327 'comment', 328 'extra', 329 'create_system', 330 'create_version', 331 'extract_version', 332 'reserved', 333 'flag_bits', 334 'volume', 335 'internal_attr', 336 'external_attr', 337 'header_offset', 338 'CRC', 339 'compress_size', 340 'file_size', 341 '_raw_time', 342 ) 343 344 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 345 self.orig_filename = filename # Original file name in archive 346 347 # Terminate the file name at the first null byte. Null bytes in file 348 # names are used as tricks by viruses in archives. 349 null_byte = filename.find(chr(0)) 350 if null_byte >= 0: 351 filename = filename[0:null_byte] 352 # This is used to ensure paths in generated ZIP files always use 353 # forward slashes as the directory separator, as required by the 354 # ZIP format specification. 355 if os.sep != "/" and os.sep in filename: 356 filename = filename.replace(os.sep, "/") 357 358 self.filename = filename # Normalized file name 359 self.date_time = date_time # year, month, day, hour, min, sec 360 361 if date_time[0] < 1980: 362 raise ValueError('ZIP does not support timestamps before 1980') 363 364 # Standard values: 365 self.compress_type = ZIP_STORED # Type of compression for the file 366 self._compresslevel = None # Level for the compressor 367 self.comment = b"" # Comment for each file 368 self.extra = b"" # ZIP extra data 369 if sys.platform == 'win32': 370 self.create_system = 0 # System which created ZIP archive 371 else: 372 # Assume everything else is unix-y 373 self.create_system = 3 # System which created ZIP archive 374 self.create_version = DEFAULT_VERSION # Version which created ZIP archive 375 self.extract_version = DEFAULT_VERSION # Version needed to extract archive 376 self.reserved = 0 # Must be zero 377 self.flag_bits = 0 # ZIP flag bits 378 self.volume = 0 # Volume number of file header 379 self.internal_attr = 0 # Internal attributes 380 self.external_attr = 0 # External file attributes 381 self.compress_size = 0 # Size of the compressed file 382 self.file_size = 0 # Size of the uncompressed file 383 # Other attributes are set by class ZipFile: 384 # header_offset Byte offset to the file header 385 # CRC CRC-32 of the uncompressed file 386 387 def __repr__(self): 388 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)] 389 if self.compress_type != ZIP_STORED: 390 result.append(' compress_type=%s' % 391 compressor_names.get(self.compress_type, 392 self.compress_type)) 393 hi = self.external_attr >> 16 394 lo = self.external_attr & 0xFFFF 395 if hi: 396 result.append(' filemode=%r' % stat.filemode(hi)) 397 if lo: 398 result.append(' external_attr=%#x' % lo) 399 isdir = self.is_dir() 400 if not isdir or self.file_size: 401 result.append(' file_size=%r' % self.file_size) 402 if ((not isdir or self.compress_size) and 403 (self.compress_type != ZIP_STORED or 404 self.file_size != self.compress_size)): 405 result.append(' compress_size=%r' % self.compress_size) 406 result.append('>') 407 return ''.join(result) 408 409 def FileHeader(self, zip64=None): 410 """Return the per-file header as a bytes object.""" 411 dt = self.date_time 412 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 413 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 414 if self.flag_bits & 0x08: 415 # Set these to zero because we write them after the file data 416 CRC = compress_size = file_size = 0 417 else: 418 CRC = self.CRC 419 compress_size = self.compress_size 420 file_size = self.file_size 421 422 extra = self.extra 423 424 min_version = 0 425 if zip64 is None: 426 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 427 if zip64: 428 fmt = '<HHQQ' 429 extra = extra + struct.pack(fmt, 430 1, struct.calcsize(fmt)-4, file_size, compress_size) 431 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 432 if not zip64: 433 raise LargeZipFile("Filesize would require ZIP64 extensions") 434 # File is larger than what fits into a 4 byte integer, 435 # fall back to the ZIP64 extension 436 file_size = 0xffffffff 437 compress_size = 0xffffffff 438 min_version = ZIP64_VERSION 439 440 if self.compress_type == ZIP_BZIP2: 441 min_version = max(BZIP2_VERSION, min_version) 442 elif self.compress_type == ZIP_LZMA: 443 min_version = max(LZMA_VERSION, min_version) 444 445 self.extract_version = max(min_version, self.extract_version) 446 self.create_version = max(min_version, self.create_version) 447 filename, flag_bits = self._encodeFilenameFlags() 448 header = struct.pack(structFileHeader, stringFileHeader, 449 self.extract_version, self.reserved, flag_bits, 450 self.compress_type, dostime, dosdate, CRC, 451 compress_size, file_size, 452 len(filename), len(extra)) 453 return header + filename + extra 454 455 def _encodeFilenameFlags(self): 456 try: 457 return self.filename.encode('ascii'), self.flag_bits 458 except UnicodeEncodeError: 459 return self.filename.encode('utf-8'), self.flag_bits | 0x800 460 461 def _decodeExtra(self): 462 # Try to decode the extra field. 463 extra = self.extra 464 unpack = struct.unpack 465 while len(extra) >= 4: 466 tp, ln = unpack('<HH', extra[:4]) 467 if ln+4 > len(extra): 468 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 469 if tp == 0x0001: 470 data = extra[4:ln+4] 471 # ZIP64 extension (large files and/or large archives) 472 try: 473 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF): 474 field = "File size" 475 self.file_size, = unpack('<Q', data[:8]) 476 data = data[8:] 477 if self.compress_size == 0xFFFF_FFFF: 478 field = "Compress size" 479 self.compress_size, = unpack('<Q', data[:8]) 480 data = data[8:] 481 if self.header_offset == 0xFFFF_FFFF: 482 field = "Header offset" 483 self.header_offset, = unpack('<Q', data[:8]) 484 except struct.error: 485 raise BadZipFile(f"Corrupt zip64 extra field. " 486 f"{field} not found.") from None 487 488 extra = extra[ln+4:] 489 490 @classmethod 491 def from_file(cls, filename, arcname=None, *, strict_timestamps=True): 492 """Construct an appropriate ZipInfo for a file on the filesystem. 493 494 filename should be the path to a file or directory on the filesystem. 495 496 arcname is the name which it will have within the archive (by default, 497 this will be the same as filename, but without a drive letter and with 498 leading path separators removed). 499 """ 500 if isinstance(filename, os.PathLike): 501 filename = os.fspath(filename) 502 st = os.stat(filename) 503 isdir = stat.S_ISDIR(st.st_mode) 504 mtime = time.localtime(st.st_mtime) 505 date_time = mtime[0:6] 506 if not strict_timestamps and date_time[0] < 1980: 507 date_time = (1980, 1, 1, 0, 0, 0) 508 elif not strict_timestamps and date_time[0] > 2107: 509 date_time = (2107, 12, 31, 23, 59, 59) 510 # Create ZipInfo instance to store file information 511 if arcname is None: 512 arcname = filename 513 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 514 while arcname[0] in (os.sep, os.altsep): 515 arcname = arcname[1:] 516 if isdir: 517 arcname += '/' 518 zinfo = cls(arcname, date_time) 519 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes 520 if isdir: 521 zinfo.file_size = 0 522 zinfo.external_attr |= 0x10 # MS-DOS directory flag 523 else: 524 zinfo.file_size = st.st_size 525 526 return zinfo 527 528 def is_dir(self): 529 """Return True if this archive member is a directory.""" 530 return self.filename[-1] == '/' 531 532 533# ZIP encryption uses the CRC32 one-byte primitive for scrambling some 534# internal keys. We noticed that a direct implementation is faster than 535# relying on binascii.crc32(). 536 537_crctable = None 538def _gen_crc(crc): 539 for j in range(8): 540 if crc & 1: 541 crc = (crc >> 1) ^ 0xEDB88320 542 else: 543 crc >>= 1 544 return crc 545 546# ZIP supports a password-based form of encryption. Even though known 547# plaintext attacks have been found against it, it is still useful 548# to be able to get data out of such a file. 549# 550# Usage: 551# zd = _ZipDecrypter(mypwd) 552# plain_bytes = zd(cypher_bytes) 553 554def _ZipDecrypter(pwd): 555 key0 = 305419896 556 key1 = 591751049 557 key2 = 878082192 558 559 global _crctable 560 if _crctable is None: 561 _crctable = list(map(_gen_crc, range(256))) 562 crctable = _crctable 563 564 def crc32(ch, crc): 565 """Compute the CRC32 primitive on one byte.""" 566 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] 567 568 def update_keys(c): 569 nonlocal key0, key1, key2 570 key0 = crc32(c, key0) 571 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF 572 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF 573 key2 = crc32(key1 >> 24, key2) 574 575 for p in pwd: 576 update_keys(p) 577 578 def decrypter(data): 579 """Decrypt a bytes object.""" 580 result = bytearray() 581 append = result.append 582 for c in data: 583 k = key2 | 2 584 c ^= ((k * (k^1)) >> 8) & 0xFF 585 update_keys(c) 586 append(c) 587 return bytes(result) 588 589 return decrypter 590 591 592class LZMACompressor: 593 594 def __init__(self): 595 self._comp = None 596 597 def _init(self): 598 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1}) 599 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ 600 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props) 601 ]) 602 return struct.pack('<BBH', 9, 4, len(props)) + props 603 604 def compress(self, data): 605 if self._comp is None: 606 return self._init() + self._comp.compress(data) 607 return self._comp.compress(data) 608 609 def flush(self): 610 if self._comp is None: 611 return self._init() + self._comp.flush() 612 return self._comp.flush() 613 614 615class LZMADecompressor: 616 617 def __init__(self): 618 self._decomp = None 619 self._unconsumed = b'' 620 self.eof = False 621 622 def decompress(self, data): 623 if self._decomp is None: 624 self._unconsumed += data 625 if len(self._unconsumed) <= 4: 626 return b'' 627 psize, = struct.unpack('<H', self._unconsumed[2:4]) 628 if len(self._unconsumed) <= 4 + psize: 629 return b'' 630 631 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[ 632 lzma._decode_filter_properties(lzma.FILTER_LZMA1, 633 self._unconsumed[4:4 + psize]) 634 ]) 635 data = self._unconsumed[4 + psize:] 636 del self._unconsumed 637 638 result = self._decomp.decompress(data) 639 self.eof = self._decomp.eof 640 return result 641 642 643compressor_names = { 644 0: 'store', 645 1: 'shrink', 646 2: 'reduce', 647 3: 'reduce', 648 4: 'reduce', 649 5: 'reduce', 650 6: 'implode', 651 7: 'tokenize', 652 8: 'deflate', 653 9: 'deflate64', 654 10: 'implode', 655 12: 'bzip2', 656 14: 'lzma', 657 18: 'terse', 658 19: 'lz77', 659 97: 'wavpack', 660 98: 'ppmd', 661} 662 663def _check_compression(compression): 664 if compression == ZIP_STORED: 665 pass 666 elif compression == ZIP_DEFLATED: 667 if not zlib: 668 raise RuntimeError( 669 "Compression requires the (missing) zlib module") 670 elif compression == ZIP_BZIP2: 671 if not bz2: 672 raise RuntimeError( 673 "Compression requires the (missing) bz2 module") 674 elif compression == ZIP_LZMA: 675 if not lzma: 676 raise RuntimeError( 677 "Compression requires the (missing) lzma module") 678 else: 679 raise NotImplementedError("That compression method is not supported") 680 681 682def _get_compressor(compress_type, compresslevel=None): 683 if compress_type == ZIP_DEFLATED: 684 if compresslevel is not None: 685 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15) 686 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) 687 elif compress_type == ZIP_BZIP2: 688 if compresslevel is not None: 689 return bz2.BZ2Compressor(compresslevel) 690 return bz2.BZ2Compressor() 691 # compresslevel is ignored for ZIP_LZMA 692 elif compress_type == ZIP_LZMA: 693 return LZMACompressor() 694 else: 695 return None 696 697 698def _get_decompressor(compress_type): 699 _check_compression(compress_type) 700 if compress_type == ZIP_STORED: 701 return None 702 elif compress_type == ZIP_DEFLATED: 703 return zlib.decompressobj(-15) 704 elif compress_type == ZIP_BZIP2: 705 return bz2.BZ2Decompressor() 706 elif compress_type == ZIP_LZMA: 707 return LZMADecompressor() 708 else: 709 descr = compressor_names.get(compress_type) 710 if descr: 711 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr)) 712 else: 713 raise NotImplementedError("compression type %d" % (compress_type,)) 714 715 716class _SharedFile: 717 def __init__(self, file, pos, close, lock, writing): 718 self._file = file 719 self._pos = pos 720 self._close = close 721 self._lock = lock 722 self._writing = writing 723 self.seekable = file.seekable 724 self.tell = file.tell 725 726 def seek(self, offset, whence=0): 727 with self._lock: 728 if self._writing(): 729 raise ValueError("Can't reposition in the ZIP file while " 730 "there is an open writing handle on it. " 731 "Close the writing handle before trying to read.") 732 self._file.seek(offset, whence) 733 self._pos = self._file.tell() 734 return self._pos 735 736 def read(self, n=-1): 737 with self._lock: 738 if self._writing(): 739 raise ValueError("Can't read from the ZIP file while there " 740 "is an open writing handle on it. " 741 "Close the writing handle before trying to read.") 742 self._file.seek(self._pos) 743 data = self._file.read(n) 744 self._pos = self._file.tell() 745 return data 746 747 def close(self): 748 if self._file is not None: 749 fileobj = self._file 750 self._file = None 751 self._close(fileobj) 752 753# Provide the tell method for unseekable stream 754class _Tellable: 755 def __init__(self, fp): 756 self.fp = fp 757 self.offset = 0 758 759 def write(self, data): 760 n = self.fp.write(data) 761 self.offset += n 762 return n 763 764 def tell(self): 765 return self.offset 766 767 def flush(self): 768 self.fp.flush() 769 770 def close(self): 771 self.fp.close() 772 773 774class ZipExtFile(io.BufferedIOBase): 775 """File-like object for reading an archive member. 776 Is returned by ZipFile.open(). 777 """ 778 779 # Max size supported by decompressor. 780 MAX_N = 1 << 31 - 1 781 782 # Read from compressed files in 4k blocks. 783 MIN_READ_SIZE = 4096 784 785 # Chunk size to read during seek 786 MAX_SEEK_READ = 1 << 24 787 788 def __init__(self, fileobj, mode, zipinfo, pwd=None, 789 close_fileobj=False): 790 self._fileobj = fileobj 791 self._pwd = pwd 792 self._close_fileobj = close_fileobj 793 794 self._compress_type = zipinfo.compress_type 795 self._compress_left = zipinfo.compress_size 796 self._left = zipinfo.file_size 797 798 self._decompressor = _get_decompressor(self._compress_type) 799 800 self._eof = False 801 self._readbuffer = b'' 802 self._offset = 0 803 804 self.newlines = None 805 806 self.mode = mode 807 self.name = zipinfo.filename 808 809 if hasattr(zipinfo, 'CRC'): 810 self._expected_crc = zipinfo.CRC 811 self._running_crc = crc32(b'') 812 else: 813 self._expected_crc = None 814 815 self._seekable = False 816 try: 817 if fileobj.seekable(): 818 self._orig_compress_start = fileobj.tell() 819 self._orig_compress_size = zipinfo.compress_size 820 self._orig_file_size = zipinfo.file_size 821 self._orig_start_crc = self._running_crc 822 self._seekable = True 823 except AttributeError: 824 pass 825 826 self._decrypter = None 827 if pwd: 828 if zipinfo.flag_bits & 0x8: 829 # compare against the file type from extended local headers 830 check_byte = (zipinfo._raw_time >> 8) & 0xff 831 else: 832 # compare against the CRC otherwise 833 check_byte = (zipinfo.CRC >> 24) & 0xff 834 h = self._init_decrypter() 835 if h != check_byte: 836 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename) 837 838 839 def _init_decrypter(self): 840 self._decrypter = _ZipDecrypter(self._pwd) 841 # The first 12 bytes in the cypher stream is an encryption header 842 # used to strengthen the algorithm. The first 11 bytes are 843 # completely random, while the 12th contains the MSB of the CRC, 844 # or the MSB of the file time depending on the header type 845 # and is used to check the correctness of the password. 846 header = self._fileobj.read(12) 847 self._compress_left -= 12 848 return self._decrypter(header)[11] 849 850 def __repr__(self): 851 result = ['<%s.%s' % (self.__class__.__module__, 852 self.__class__.__qualname__)] 853 if not self.closed: 854 result.append(' name=%r mode=%r' % (self.name, self.mode)) 855 if self._compress_type != ZIP_STORED: 856 result.append(' compress_type=%s' % 857 compressor_names.get(self._compress_type, 858 self._compress_type)) 859 else: 860 result.append(' [closed]') 861 result.append('>') 862 return ''.join(result) 863 864 def readline(self, limit=-1): 865 """Read and return a line from the stream. 866 867 If limit is specified, at most limit bytes will be read. 868 """ 869 870 if limit < 0: 871 # Shortcut common case - newline found in buffer. 872 i = self._readbuffer.find(b'\n', self._offset) + 1 873 if i > 0: 874 line = self._readbuffer[self._offset: i] 875 self._offset = i 876 return line 877 878 return io.BufferedIOBase.readline(self, limit) 879 880 def peek(self, n=1): 881 """Returns buffered bytes without advancing the position.""" 882 if n > len(self._readbuffer) - self._offset: 883 chunk = self.read(n) 884 if len(chunk) > self._offset: 885 self._readbuffer = chunk + self._readbuffer[self._offset:] 886 self._offset = 0 887 else: 888 self._offset -= len(chunk) 889 890 # Return up to 512 bytes to reduce allocation overhead for tight loops. 891 return self._readbuffer[self._offset: self._offset + 512] 892 893 def readable(self): 894 if self.closed: 895 raise ValueError("I/O operation on closed file.") 896 return True 897 898 def read(self, n=-1): 899 """Read and return up to n bytes. 900 If the argument is omitted, None, or negative, data is read and returned until EOF is reached. 901 """ 902 if self.closed: 903 raise ValueError("read from closed file.") 904 if n is None or n < 0: 905 buf = self._readbuffer[self._offset:] 906 self._readbuffer = b'' 907 self._offset = 0 908 while not self._eof: 909 buf += self._read1(self.MAX_N) 910 return buf 911 912 end = n + self._offset 913 if end < len(self._readbuffer): 914 buf = self._readbuffer[self._offset:end] 915 self._offset = end 916 return buf 917 918 n = end - len(self._readbuffer) 919 buf = self._readbuffer[self._offset:] 920 self._readbuffer = b'' 921 self._offset = 0 922 while n > 0 and not self._eof: 923 data = self._read1(n) 924 if n < len(data): 925 self._readbuffer = data 926 self._offset = n 927 buf += data[:n] 928 break 929 buf += data 930 n -= len(data) 931 return buf 932 933 def _update_crc(self, newdata): 934 # Update the CRC using the given data. 935 if self._expected_crc is None: 936 # No need to compute the CRC if we don't have a reference value 937 return 938 self._running_crc = crc32(newdata, self._running_crc) 939 # Check the CRC if we're at the end of the file 940 if self._eof and self._running_crc != self._expected_crc: 941 raise BadZipFile("Bad CRC-32 for file %r" % self.name) 942 943 def read1(self, n): 944 """Read up to n bytes with at most one read() system call.""" 945 946 if n is None or n < 0: 947 buf = self._readbuffer[self._offset:] 948 self._readbuffer = b'' 949 self._offset = 0 950 while not self._eof: 951 data = self._read1(self.MAX_N) 952 if data: 953 buf += data 954 break 955 return buf 956 957 end = n + self._offset 958 if end < len(self._readbuffer): 959 buf = self._readbuffer[self._offset:end] 960 self._offset = end 961 return buf 962 963 n = end - len(self._readbuffer) 964 buf = self._readbuffer[self._offset:] 965 self._readbuffer = b'' 966 self._offset = 0 967 if n > 0: 968 while not self._eof: 969 data = self._read1(n) 970 if n < len(data): 971 self._readbuffer = data 972 self._offset = n 973 buf += data[:n] 974 break 975 if data: 976 buf += data 977 break 978 return buf 979 980 def _read1(self, n): 981 # Read up to n compressed bytes with at most one read() system call, 982 # decrypt and decompress them. 983 if self._eof or n <= 0: 984 return b'' 985 986 # Read from file. 987 if self._compress_type == ZIP_DEFLATED: 988 ## Handle unconsumed data. 989 data = self._decompressor.unconsumed_tail 990 if n > len(data): 991 data += self._read2(n - len(data)) 992 else: 993 data = self._read2(n) 994 995 if self._compress_type == ZIP_STORED: 996 self._eof = self._compress_left <= 0 997 elif self._compress_type == ZIP_DEFLATED: 998 n = max(n, self.MIN_READ_SIZE) 999 data = self._decompressor.decompress(data, n) 1000 self._eof = (self._decompressor.eof or 1001 self._compress_left <= 0 and 1002 not self._decompressor.unconsumed_tail) 1003 if self._eof: 1004 data += self._decompressor.flush() 1005 else: 1006 data = self._decompressor.decompress(data) 1007 self._eof = self._decompressor.eof or self._compress_left <= 0 1008 1009 data = data[:self._left] 1010 self._left -= len(data) 1011 if self._left <= 0: 1012 self._eof = True 1013 self._update_crc(data) 1014 return data 1015 1016 def _read2(self, n): 1017 if self._compress_left <= 0: 1018 return b'' 1019 1020 n = max(n, self.MIN_READ_SIZE) 1021 n = min(n, self._compress_left) 1022 1023 data = self._fileobj.read(n) 1024 self._compress_left -= len(data) 1025 if not data: 1026 raise EOFError 1027 1028 if self._decrypter is not None: 1029 data = self._decrypter(data) 1030 return data 1031 1032 def close(self): 1033 try: 1034 if self._close_fileobj: 1035 self._fileobj.close() 1036 finally: 1037 super().close() 1038 1039 def seekable(self): 1040 if self.closed: 1041 raise ValueError("I/O operation on closed file.") 1042 return self._seekable 1043 1044 def seek(self, offset, whence=0): 1045 if self.closed: 1046 raise ValueError("seek on closed file.") 1047 if not self._seekable: 1048 raise io.UnsupportedOperation("underlying stream is not seekable") 1049 curr_pos = self.tell() 1050 if whence == 0: # Seek from start of file 1051 new_pos = offset 1052 elif whence == 1: # Seek from current position 1053 new_pos = curr_pos + offset 1054 elif whence == 2: # Seek from EOF 1055 new_pos = self._orig_file_size + offset 1056 else: 1057 raise ValueError("whence must be os.SEEK_SET (0), " 1058 "os.SEEK_CUR (1), or os.SEEK_END (2)") 1059 1060 if new_pos > self._orig_file_size: 1061 new_pos = self._orig_file_size 1062 1063 if new_pos < 0: 1064 new_pos = 0 1065 1066 read_offset = new_pos - curr_pos 1067 buff_offset = read_offset + self._offset 1068 1069 if buff_offset >= 0 and buff_offset < len(self._readbuffer): 1070 # Just move the _offset index if the new position is in the _readbuffer 1071 self._offset = buff_offset 1072 read_offset = 0 1073 elif read_offset < 0: 1074 # Position is before the current position. Reset the ZipExtFile 1075 self._fileobj.seek(self._orig_compress_start) 1076 self._running_crc = self._orig_start_crc 1077 self._compress_left = self._orig_compress_size 1078 self._left = self._orig_file_size 1079 self._readbuffer = b'' 1080 self._offset = 0 1081 self._decompressor = _get_decompressor(self._compress_type) 1082 self._eof = False 1083 read_offset = new_pos 1084 if self._decrypter is not None: 1085 self._init_decrypter() 1086 1087 while read_offset > 0: 1088 read_len = min(self.MAX_SEEK_READ, read_offset) 1089 self.read(read_len) 1090 read_offset -= read_len 1091 1092 return self.tell() 1093 1094 def tell(self): 1095 if self.closed: 1096 raise ValueError("tell on closed file.") 1097 if not self._seekable: 1098 raise io.UnsupportedOperation("underlying stream is not seekable") 1099 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset 1100 return filepos 1101 1102 1103class _ZipWriteFile(io.BufferedIOBase): 1104 def __init__(self, zf, zinfo, zip64): 1105 self._zinfo = zinfo 1106 self._zip64 = zip64 1107 self._zipfile = zf 1108 self._compressor = _get_compressor(zinfo.compress_type, 1109 zinfo._compresslevel) 1110 self._file_size = 0 1111 self._compress_size = 0 1112 self._crc = 0 1113 1114 @property 1115 def _fileobj(self): 1116 return self._zipfile.fp 1117 1118 def writable(self): 1119 return True 1120 1121 def write(self, data): 1122 if self.closed: 1123 raise ValueError('I/O operation on closed file.') 1124 nbytes = len(data) 1125 self._file_size += nbytes 1126 self._crc = crc32(data, self._crc) 1127 if self._compressor: 1128 data = self._compressor.compress(data) 1129 self._compress_size += len(data) 1130 self._fileobj.write(data) 1131 return nbytes 1132 1133 def close(self): 1134 if self.closed: 1135 return 1136 try: 1137 super().close() 1138 # Flush any data from the compressor, and update header info 1139 if self._compressor: 1140 buf = self._compressor.flush() 1141 self._compress_size += len(buf) 1142 self._fileobj.write(buf) 1143 self._zinfo.compress_size = self._compress_size 1144 else: 1145 self._zinfo.compress_size = self._file_size 1146 self._zinfo.CRC = self._crc 1147 self._zinfo.file_size = self._file_size 1148 1149 # Write updated header info 1150 if self._zinfo.flag_bits & 0x08: 1151 # Write CRC and file sizes after the file data 1152 fmt = '<LLQQ' if self._zip64 else '<LLLL' 1153 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC, 1154 self._zinfo.compress_size, self._zinfo.file_size)) 1155 self._zipfile.start_dir = self._fileobj.tell() 1156 else: 1157 if not self._zip64: 1158 if self._file_size > ZIP64_LIMIT: 1159 raise RuntimeError( 1160 'File size unexpectedly exceeded ZIP64 limit') 1161 if self._compress_size > ZIP64_LIMIT: 1162 raise RuntimeError( 1163 'Compressed size unexpectedly exceeded ZIP64 limit') 1164 # Seek backwards and write file header (which will now include 1165 # correct CRC and file sizes) 1166 1167 # Preserve current position in file 1168 self._zipfile.start_dir = self._fileobj.tell() 1169 self._fileobj.seek(self._zinfo.header_offset) 1170 self._fileobj.write(self._zinfo.FileHeader(self._zip64)) 1171 self._fileobj.seek(self._zipfile.start_dir) 1172 1173 # Successfully written: Add file to our caches 1174 self._zipfile.filelist.append(self._zinfo) 1175 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo 1176 finally: 1177 self._zipfile._writing = False 1178 1179 1180 1181class ZipFile: 1182 """ Class with methods to open, read, write, close, list zip files. 1183 1184 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, 1185 compresslevel=None) 1186 1187 file: Either the path to the file, or a file-like object. 1188 If it is a path, the file will be opened and closed by ZipFile. 1189 mode: The mode can be either read 'r', write 'w', exclusive create 'x', 1190 or append 'a'. 1191 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), 1192 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). 1193 allowZip64: if True ZipFile will create files with ZIP64 extensions when 1194 needed, otherwise it will raise an exception when this would 1195 be necessary. 1196 compresslevel: None (default for the given compression type) or an integer 1197 specifying the level to pass to the compressor. 1198 When using ZIP_STORED or ZIP_LZMA this keyword has no effect. 1199 When using ZIP_DEFLATED integers 0 through 9 are accepted. 1200 When using ZIP_BZIP2 integers 1 through 9 are accepted. 1201 1202 """ 1203 1204 fp = None # Set here since __del__ checks it 1205 _windows_illegal_name_trans_table = None 1206 1207 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, 1208 compresslevel=None, *, strict_timestamps=True): 1209 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', 1210 or append 'a'.""" 1211 if mode not in ('r', 'w', 'x', 'a'): 1212 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") 1213 1214 _check_compression(compression) 1215 1216 self._allowZip64 = allowZip64 1217 self._didModify = False 1218 self.debug = 0 # Level of printing: 0 through 3 1219 self.NameToInfo = {} # Find file info given name 1220 self.filelist = [] # List of ZipInfo instances for archive 1221 self.compression = compression # Method of compression 1222 self.compresslevel = compresslevel 1223 self.mode = mode 1224 self.pwd = None 1225 self._comment = b'' 1226 self._strict_timestamps = strict_timestamps 1227 1228 # Check if we were passed a file-like object 1229 if isinstance(file, os.PathLike): 1230 file = os.fspath(file) 1231 if isinstance(file, str): 1232 # No, it's a filename 1233 self._filePassed = 0 1234 self.filename = file 1235 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b', 1236 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'} 1237 filemode = modeDict[mode] 1238 while True: 1239 try: 1240 self.fp = io.open(file, filemode) 1241 except OSError: 1242 if filemode in modeDict: 1243 filemode = modeDict[filemode] 1244 continue 1245 raise 1246 break 1247 else: 1248 self._filePassed = 1 1249 self.fp = file 1250 self.filename = getattr(file, 'name', None) 1251 self._fileRefCnt = 1 1252 self._lock = threading.RLock() 1253 self._seekable = True 1254 self._writing = False 1255 1256 try: 1257 if mode == 'r': 1258 self._RealGetContents() 1259 elif mode in ('w', 'x'): 1260 # set the modified flag so central directory gets written 1261 # even if no files are added to the archive 1262 self._didModify = True 1263 try: 1264 self.start_dir = self.fp.tell() 1265 except (AttributeError, OSError): 1266 self.fp = _Tellable(self.fp) 1267 self.start_dir = 0 1268 self._seekable = False 1269 else: 1270 # Some file-like objects can provide tell() but not seek() 1271 try: 1272 self.fp.seek(self.start_dir) 1273 except (AttributeError, OSError): 1274 self._seekable = False 1275 elif mode == 'a': 1276 try: 1277 # See if file is a zip file 1278 self._RealGetContents() 1279 # seek to start of directory and overwrite 1280 self.fp.seek(self.start_dir) 1281 except BadZipFile: 1282 # file is not a zip file, just append 1283 self.fp.seek(0, 2) 1284 1285 # set the modified flag so central directory gets written 1286 # even if no files are added to the archive 1287 self._didModify = True 1288 self.start_dir = self.fp.tell() 1289 else: 1290 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") 1291 except: 1292 fp = self.fp 1293 self.fp = None 1294 self._fpclose(fp) 1295 raise 1296 1297 def __enter__(self): 1298 return self 1299 1300 def __exit__(self, type, value, traceback): 1301 self.close() 1302 1303 def __repr__(self): 1304 result = ['<%s.%s' % (self.__class__.__module__, 1305 self.__class__.__qualname__)] 1306 if self.fp is not None: 1307 if self._filePassed: 1308 result.append(' file=%r' % self.fp) 1309 elif self.filename is not None: 1310 result.append(' filename=%r' % self.filename) 1311 result.append(' mode=%r' % self.mode) 1312 else: 1313 result.append(' [closed]') 1314 result.append('>') 1315 return ''.join(result) 1316 1317 def _RealGetContents(self): 1318 """Read in the table of contents for the ZIP file.""" 1319 fp = self.fp 1320 try: 1321 endrec = _EndRecData(fp) 1322 except OSError: 1323 raise BadZipFile("File is not a zip file") 1324 if not endrec: 1325 raise BadZipFile("File is not a zip file") 1326 if self.debug > 1: 1327 print(endrec) 1328 size_cd = endrec[_ECD_SIZE] # bytes in central directory 1329 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 1330 self._comment = endrec[_ECD_COMMENT] # archive comment 1331 1332 # "concat" is zero, unless zip was concatenated to another file 1333 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 1334 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 1335 # If Zip64 extension structures are present, account for them 1336 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 1337 1338 if self.debug > 2: 1339 inferred = concat + offset_cd 1340 print("given, inferred, offset", offset_cd, inferred, concat) 1341 # self.start_dir: Position of start of central directory 1342 self.start_dir = offset_cd + concat 1343 fp.seek(self.start_dir, 0) 1344 data = fp.read(size_cd) 1345 fp = io.BytesIO(data) 1346 total = 0 1347 while total < size_cd: 1348 centdir = fp.read(sizeCentralDir) 1349 if len(centdir) != sizeCentralDir: 1350 raise BadZipFile("Truncated central directory") 1351 centdir = struct.unpack(structCentralDir, centdir) 1352 if centdir[_CD_SIGNATURE] != stringCentralDir: 1353 raise BadZipFile("Bad magic number for central directory") 1354 if self.debug > 2: 1355 print(centdir) 1356 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 1357 flags = centdir[5] 1358 if flags & 0x800: 1359 # UTF-8 file names extension 1360 filename = filename.decode('utf-8') 1361 else: 1362 # Historical ZIP filename encoding 1363 filename = filename.decode('cp437') 1364 # Create ZipInfo instance to store file information 1365 x = ZipInfo(filename) 1366 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 1367 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 1368 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 1369 (x.create_version, x.create_system, x.extract_version, x.reserved, 1370 x.flag_bits, x.compress_type, t, d, 1371 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 1372 if x.extract_version > MAX_EXTRACT_VERSION: 1373 raise NotImplementedError("zip file version %.1f" % 1374 (x.extract_version / 10)) 1375 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 1376 # Convert date/time code to (year, month, day, hour, min, sec) 1377 x._raw_time = t 1378 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 1379 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 1380 1381 x._decodeExtra() 1382 x.header_offset = x.header_offset + concat 1383 self.filelist.append(x) 1384 self.NameToInfo[x.filename] = x 1385 1386 # update total bytes read from central directory 1387 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 1388 + centdir[_CD_EXTRA_FIELD_LENGTH] 1389 + centdir[_CD_COMMENT_LENGTH]) 1390 1391 if self.debug > 2: 1392 print("total", total) 1393 1394 1395 def namelist(self): 1396 """Return a list of file names in the archive.""" 1397 return [data.filename for data in self.filelist] 1398 1399 def infolist(self): 1400 """Return a list of class ZipInfo instances for files in the 1401 archive.""" 1402 return self.filelist 1403 1404 def printdir(self, file=None): 1405 """Print a table of contents for the zip file.""" 1406 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), 1407 file=file) 1408 for zinfo in self.filelist: 1409 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 1410 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), 1411 file=file) 1412 1413 def testzip(self): 1414 """Read all the files and check the CRC.""" 1415 chunk_size = 2 ** 20 1416 for zinfo in self.filelist: 1417 try: 1418 # Read by chunks, to avoid an OverflowError or a 1419 # MemoryError with very large embedded files. 1420 with self.open(zinfo.filename, "r") as f: 1421 while f.read(chunk_size): # Check CRC-32 1422 pass 1423 except BadZipFile: 1424 return zinfo.filename 1425 1426 def getinfo(self, name): 1427 """Return the instance of ZipInfo given 'name'.""" 1428 info = self.NameToInfo.get(name) 1429 if info is None: 1430 raise KeyError( 1431 'There is no item named %r in the archive' % name) 1432 1433 return info 1434 1435 def setpassword(self, pwd): 1436 """Set default password for encrypted files.""" 1437 if pwd and not isinstance(pwd, bytes): 1438 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1439 if pwd: 1440 self.pwd = pwd 1441 else: 1442 self.pwd = None 1443 1444 @property 1445 def comment(self): 1446 """The comment text associated with the ZIP file.""" 1447 return self._comment 1448 1449 @comment.setter 1450 def comment(self, comment): 1451 if not isinstance(comment, bytes): 1452 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) 1453 # check for valid comment length 1454 if len(comment) > ZIP_MAX_COMMENT: 1455 import warnings 1456 warnings.warn('Archive comment is too long; truncating to %d bytes' 1457 % ZIP_MAX_COMMENT, stacklevel=2) 1458 comment = comment[:ZIP_MAX_COMMENT] 1459 self._comment = comment 1460 self._didModify = True 1461 1462 def read(self, name, pwd=None): 1463 """Return file bytes for name.""" 1464 with self.open(name, "r", pwd) as fp: 1465 return fp.read() 1466 1467 def open(self, name, mode="r", pwd=None, *, force_zip64=False): 1468 """Return file-like object for 'name'. 1469 1470 name is a string for the file name within the ZIP file, or a ZipInfo 1471 object. 1472 1473 mode should be 'r' to read a file already in the ZIP file, or 'w' to 1474 write to a file newly added to the archive. 1475 1476 pwd is the password to decrypt files (only used for reading). 1477 1478 When writing, if the file size is not known in advance but may exceed 1479 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large 1480 files. If the size is known in advance, it is best to pass a ZipInfo 1481 instance for name, with zinfo.file_size set. 1482 """ 1483 if mode not in {"r", "w"}: 1484 raise ValueError('open() requires mode "r" or "w"') 1485 if pwd and not isinstance(pwd, bytes): 1486 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1487 if pwd and (mode == "w"): 1488 raise ValueError("pwd is only supported for reading files") 1489 if not self.fp: 1490 raise ValueError( 1491 "Attempt to use ZIP archive that was already closed") 1492 1493 # Make sure we have an info object 1494 if isinstance(name, ZipInfo): 1495 # 'name' is already an info object 1496 zinfo = name 1497 elif mode == 'w': 1498 zinfo = ZipInfo(name) 1499 zinfo.compress_type = self.compression 1500 zinfo._compresslevel = self.compresslevel 1501 else: 1502 # Get info object for name 1503 zinfo = self.getinfo(name) 1504 1505 if mode == 'w': 1506 return self._open_to_write(zinfo, force_zip64=force_zip64) 1507 1508 if self._writing: 1509 raise ValueError("Can't read from the ZIP file while there " 1510 "is an open writing handle on it. " 1511 "Close the writing handle before trying to read.") 1512 1513 # Open for reading: 1514 self._fileRefCnt += 1 1515 zef_file = _SharedFile(self.fp, zinfo.header_offset, 1516 self._fpclose, self._lock, lambda: self._writing) 1517 try: 1518 # Skip the file header: 1519 fheader = zef_file.read(sizeFileHeader) 1520 if len(fheader) != sizeFileHeader: 1521 raise BadZipFile("Truncated file header") 1522 fheader = struct.unpack(structFileHeader, fheader) 1523 if fheader[_FH_SIGNATURE] != stringFileHeader: 1524 raise BadZipFile("Bad magic number for file header") 1525 1526 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 1527 if fheader[_FH_EXTRA_FIELD_LENGTH]: 1528 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 1529 1530 if zinfo.flag_bits & 0x20: 1531 # Zip 2.7: compressed patched data 1532 raise NotImplementedError("compressed patched data (flag bit 5)") 1533 1534 if zinfo.flag_bits & 0x40: 1535 # strong encryption 1536 raise NotImplementedError("strong encryption (flag bit 6)") 1537 1538 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800: 1539 # UTF-8 filename 1540 fname_str = fname.decode("utf-8") 1541 else: 1542 fname_str = fname.decode("cp437") 1543 1544 if fname_str != zinfo.orig_filename: 1545 raise BadZipFile( 1546 'File name in directory %r and header %r differ.' 1547 % (zinfo.orig_filename, fname)) 1548 1549 # check for encrypted flag & handle password 1550 is_encrypted = zinfo.flag_bits & 0x1 1551 if is_encrypted: 1552 if not pwd: 1553 pwd = self.pwd 1554 if not pwd: 1555 raise RuntimeError("File %r is encrypted, password " 1556 "required for extraction" % name) 1557 else: 1558 pwd = None 1559 1560 return ZipExtFile(zef_file, mode, zinfo, pwd, True) 1561 except: 1562 zef_file.close() 1563 raise 1564 1565 def _open_to_write(self, zinfo, force_zip64=False): 1566 if force_zip64 and not self._allowZip64: 1567 raise ValueError( 1568 "force_zip64 is True, but allowZip64 was False when opening " 1569 "the ZIP file." 1570 ) 1571 if self._writing: 1572 raise ValueError("Can't write to the ZIP file while there is " 1573 "another write handle open on it. " 1574 "Close the first handle before opening another.") 1575 1576 # Size and CRC are overwritten with correct data after processing the file 1577 zinfo.compress_size = 0 1578 zinfo.CRC = 0 1579 1580 zinfo.flag_bits = 0x00 1581 if zinfo.compress_type == ZIP_LZMA: 1582 # Compressed data includes an end-of-stream (EOS) marker 1583 zinfo.flag_bits |= 0x02 1584 if not self._seekable: 1585 zinfo.flag_bits |= 0x08 1586 1587 if not zinfo.external_attr: 1588 zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- 1589 1590 # Compressed size can be larger than uncompressed size 1591 zip64 = self._allowZip64 and \ 1592 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT) 1593 1594 if self._seekable: 1595 self.fp.seek(self.start_dir) 1596 zinfo.header_offset = self.fp.tell() 1597 1598 self._writecheck(zinfo) 1599 self._didModify = True 1600 1601 self.fp.write(zinfo.FileHeader(zip64)) 1602 1603 self._writing = True 1604 return _ZipWriteFile(self, zinfo, zip64) 1605 1606 def extract(self, member, path=None, pwd=None): 1607 """Extract a member from the archive to the current working directory, 1608 using its full name. Its file information is extracted as accurately 1609 as possible. `member' may be a filename or a ZipInfo object. You can 1610 specify a different directory using `path'. 1611 """ 1612 if path is None: 1613 path = os.getcwd() 1614 else: 1615 path = os.fspath(path) 1616 1617 return self._extract_member(member, path, pwd) 1618 1619 def extractall(self, path=None, members=None, pwd=None): 1620 """Extract all members from the archive to the current working 1621 directory. `path' specifies a different directory to extract to. 1622 `members' is optional and must be a subset of the list returned 1623 by namelist(). 1624 """ 1625 if members is None: 1626 members = self.namelist() 1627 1628 if path is None: 1629 path = os.getcwd() 1630 else: 1631 path = os.fspath(path) 1632 1633 for zipinfo in members: 1634 self._extract_member(zipinfo, path, pwd) 1635 1636 @classmethod 1637 def _sanitize_windows_name(cls, arcname, pathsep): 1638 """Replace bad characters and remove trailing dots from parts.""" 1639 table = cls._windows_illegal_name_trans_table 1640 if not table: 1641 illegal = ':<>|"?*' 1642 table = str.maketrans(illegal, '_' * len(illegal)) 1643 cls._windows_illegal_name_trans_table = table 1644 arcname = arcname.translate(table) 1645 # remove trailing dots 1646 arcname = (x.rstrip('.') for x in arcname.split(pathsep)) 1647 # rejoin, removing empty parts. 1648 arcname = pathsep.join(x for x in arcname if x) 1649 return arcname 1650 1651 def _extract_member(self, member, targetpath, pwd): 1652 """Extract the ZipInfo object 'member' to a physical 1653 file on the path targetpath. 1654 """ 1655 if not isinstance(member, ZipInfo): 1656 member = self.getinfo(member) 1657 1658 # build the destination pathname, replacing 1659 # forward slashes to platform specific separators. 1660 arcname = member.filename.replace('/', os.path.sep) 1661 1662 if os.path.altsep: 1663 arcname = arcname.replace(os.path.altsep, os.path.sep) 1664 # interpret absolute pathname as relative, remove drive letter or 1665 # UNC path, redundant separators, "." and ".." components. 1666 arcname = os.path.splitdrive(arcname)[1] 1667 invalid_path_parts = ('', os.path.curdir, os.path.pardir) 1668 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1669 if x not in invalid_path_parts) 1670 if os.path.sep == '\\': 1671 # filter illegal characters on Windows 1672 arcname = self._sanitize_windows_name(arcname, os.path.sep) 1673 1674 targetpath = os.path.join(targetpath, arcname) 1675 targetpath = os.path.normpath(targetpath) 1676 1677 # Create all upper directories if necessary. 1678 upperdirs = os.path.dirname(targetpath) 1679 if upperdirs and not os.path.exists(upperdirs): 1680 os.makedirs(upperdirs) 1681 1682 if member.is_dir(): 1683 if not os.path.isdir(targetpath): 1684 os.mkdir(targetpath) 1685 return targetpath 1686 1687 with self.open(member, pwd=pwd) as source, \ 1688 open(targetpath, "wb") as target: 1689 shutil.copyfileobj(source, target) 1690 1691 return targetpath 1692 1693 def _writecheck(self, zinfo): 1694 """Check for errors before writing a file to the archive.""" 1695 if zinfo.filename in self.NameToInfo: 1696 import warnings 1697 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) 1698 if self.mode not in ('w', 'x', 'a'): 1699 raise ValueError("write() requires mode 'w', 'x', or 'a'") 1700 if not self.fp: 1701 raise ValueError( 1702 "Attempt to write ZIP archive that was already closed") 1703 _check_compression(zinfo.compress_type) 1704 if not self._allowZip64: 1705 requires_zip64 = None 1706 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT: 1707 requires_zip64 = "Files count" 1708 elif zinfo.file_size > ZIP64_LIMIT: 1709 requires_zip64 = "Filesize" 1710 elif zinfo.header_offset > ZIP64_LIMIT: 1711 requires_zip64 = "Zipfile size" 1712 if requires_zip64: 1713 raise LargeZipFile(requires_zip64 + 1714 " would require ZIP64 extensions") 1715 1716 def write(self, filename, arcname=None, 1717 compress_type=None, compresslevel=None): 1718 """Put the bytes from filename into the archive under the name 1719 arcname.""" 1720 if not self.fp: 1721 raise ValueError( 1722 "Attempt to write to ZIP archive that was already closed") 1723 if self._writing: 1724 raise ValueError( 1725 "Can't write to ZIP archive while an open writing handle exists" 1726 ) 1727 1728 zinfo = ZipInfo.from_file(filename, arcname, 1729 strict_timestamps=self._strict_timestamps) 1730 1731 if zinfo.is_dir(): 1732 zinfo.compress_size = 0 1733 zinfo.CRC = 0 1734 else: 1735 if compress_type is not None: 1736 zinfo.compress_type = compress_type 1737 else: 1738 zinfo.compress_type = self.compression 1739 1740 if compresslevel is not None: 1741 zinfo._compresslevel = compresslevel 1742 else: 1743 zinfo._compresslevel = self.compresslevel 1744 1745 if zinfo.is_dir(): 1746 with self._lock: 1747 if self._seekable: 1748 self.fp.seek(self.start_dir) 1749 zinfo.header_offset = self.fp.tell() # Start of header bytes 1750 if zinfo.compress_type == ZIP_LZMA: 1751 # Compressed data includes an end-of-stream (EOS) marker 1752 zinfo.flag_bits |= 0x02 1753 1754 self._writecheck(zinfo) 1755 self._didModify = True 1756 1757 self.filelist.append(zinfo) 1758 self.NameToInfo[zinfo.filename] = zinfo 1759 self.fp.write(zinfo.FileHeader(False)) 1760 self.start_dir = self.fp.tell() 1761 else: 1762 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: 1763 shutil.copyfileobj(src, dest, 1024*8) 1764 1765 def writestr(self, zinfo_or_arcname, data, 1766 compress_type=None, compresslevel=None): 1767 """Write a file into the archive. The contents is 'data', which 1768 may be either a 'str' or a 'bytes' instance; if it is a 'str', 1769 it is encoded as UTF-8 first. 1770 'zinfo_or_arcname' is either a ZipInfo instance or 1771 the name of the file in the archive.""" 1772 if isinstance(data, str): 1773 data = data.encode("utf-8") 1774 if not isinstance(zinfo_or_arcname, ZipInfo): 1775 zinfo = ZipInfo(filename=zinfo_or_arcname, 1776 date_time=time.localtime(time.time())[:6]) 1777 zinfo.compress_type = self.compression 1778 zinfo._compresslevel = self.compresslevel 1779 if zinfo.filename[-1] == '/': 1780 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x 1781 zinfo.external_attr |= 0x10 # MS-DOS directory flag 1782 else: 1783 zinfo.external_attr = 0o600 << 16 # ?rw------- 1784 else: 1785 zinfo = zinfo_or_arcname 1786 1787 if not self.fp: 1788 raise ValueError( 1789 "Attempt to write to ZIP archive that was already closed") 1790 if self._writing: 1791 raise ValueError( 1792 "Can't write to ZIP archive while an open writing handle exists." 1793 ) 1794 1795 if compress_type is not None: 1796 zinfo.compress_type = compress_type 1797 1798 if compresslevel is not None: 1799 zinfo._compresslevel = compresslevel 1800 1801 zinfo.file_size = len(data) # Uncompressed size 1802 with self._lock: 1803 with self.open(zinfo, mode='w') as dest: 1804 dest.write(data) 1805 1806 def __del__(self): 1807 """Call the "close()" method in case the user forgot.""" 1808 self.close() 1809 1810 def close(self): 1811 """Close the file, and for mode 'w', 'x' and 'a' write the ending 1812 records.""" 1813 if self.fp is None: 1814 return 1815 1816 if self._writing: 1817 raise ValueError("Can't close the ZIP file while there is " 1818 "an open writing handle on it. " 1819 "Close the writing handle before closing the zip.") 1820 1821 try: 1822 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records 1823 with self._lock: 1824 if self._seekable: 1825 self.fp.seek(self.start_dir) 1826 self._write_end_record() 1827 finally: 1828 fp = self.fp 1829 self.fp = None 1830 self._fpclose(fp) 1831 1832 def _write_end_record(self): 1833 for zinfo in self.filelist: # write central directory 1834 dt = zinfo.date_time 1835 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1836 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1837 extra = [] 1838 if zinfo.file_size > ZIP64_LIMIT \ 1839 or zinfo.compress_size > ZIP64_LIMIT: 1840 extra.append(zinfo.file_size) 1841 extra.append(zinfo.compress_size) 1842 file_size = 0xffffffff 1843 compress_size = 0xffffffff 1844 else: 1845 file_size = zinfo.file_size 1846 compress_size = zinfo.compress_size 1847 1848 if zinfo.header_offset > ZIP64_LIMIT: 1849 extra.append(zinfo.header_offset) 1850 header_offset = 0xffffffff 1851 else: 1852 header_offset = zinfo.header_offset 1853 1854 extra_data = zinfo.extra 1855 min_version = 0 1856 if extra: 1857 # Append a ZIP64 field to the extra's 1858 extra_data = _strip_extra(extra_data, (1,)) 1859 extra_data = struct.pack( 1860 '<HH' + 'Q'*len(extra), 1861 1, 8*len(extra), *extra) + extra_data 1862 1863 min_version = ZIP64_VERSION 1864 1865 if zinfo.compress_type == ZIP_BZIP2: 1866 min_version = max(BZIP2_VERSION, min_version) 1867 elif zinfo.compress_type == ZIP_LZMA: 1868 min_version = max(LZMA_VERSION, min_version) 1869 1870 extract_version = max(min_version, zinfo.extract_version) 1871 create_version = max(min_version, zinfo.create_version) 1872 filename, flag_bits = zinfo._encodeFilenameFlags() 1873 centdir = struct.pack(structCentralDir, 1874 stringCentralDir, create_version, 1875 zinfo.create_system, extract_version, zinfo.reserved, 1876 flag_bits, zinfo.compress_type, dostime, dosdate, 1877 zinfo.CRC, compress_size, file_size, 1878 len(filename), len(extra_data), len(zinfo.comment), 1879 0, zinfo.internal_attr, zinfo.external_attr, 1880 header_offset) 1881 self.fp.write(centdir) 1882 self.fp.write(filename) 1883 self.fp.write(extra_data) 1884 self.fp.write(zinfo.comment) 1885 1886 pos2 = self.fp.tell() 1887 # Write end-of-zip-archive record 1888 centDirCount = len(self.filelist) 1889 centDirSize = pos2 - self.start_dir 1890 centDirOffset = self.start_dir 1891 requires_zip64 = None 1892 if centDirCount > ZIP_FILECOUNT_LIMIT: 1893 requires_zip64 = "Files count" 1894 elif centDirOffset > ZIP64_LIMIT: 1895 requires_zip64 = "Central directory offset" 1896 elif centDirSize > ZIP64_LIMIT: 1897 requires_zip64 = "Central directory size" 1898 if requires_zip64: 1899 # Need to write the ZIP64 end-of-archive records 1900 if not self._allowZip64: 1901 raise LargeZipFile(requires_zip64 + 1902 " would require ZIP64 extensions") 1903 zip64endrec = struct.pack( 1904 structEndArchive64, stringEndArchive64, 1905 44, 45, 45, 0, 0, centDirCount, centDirCount, 1906 centDirSize, centDirOffset) 1907 self.fp.write(zip64endrec) 1908 1909 zip64locrec = struct.pack( 1910 structEndArchive64Locator, 1911 stringEndArchive64Locator, 0, pos2, 1) 1912 self.fp.write(zip64locrec) 1913 centDirCount = min(centDirCount, 0xFFFF) 1914 centDirSize = min(centDirSize, 0xFFFFFFFF) 1915 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1916 1917 endrec = struct.pack(structEndArchive, stringEndArchive, 1918 0, 0, centDirCount, centDirCount, 1919 centDirSize, centDirOffset, len(self._comment)) 1920 self.fp.write(endrec) 1921 self.fp.write(self._comment) 1922 if self.mode == "a": 1923 self.fp.truncate() 1924 self.fp.flush() 1925 1926 def _fpclose(self, fp): 1927 assert self._fileRefCnt > 0 1928 self._fileRefCnt -= 1 1929 if not self._fileRefCnt and not self._filePassed: 1930 fp.close() 1931 1932 1933class PyZipFile(ZipFile): 1934 """Class to create ZIP archives with Python library files and packages.""" 1935 1936 def __init__(self, file, mode="r", compression=ZIP_STORED, 1937 allowZip64=True, optimize=-1): 1938 ZipFile.__init__(self, file, mode=mode, compression=compression, 1939 allowZip64=allowZip64) 1940 self._optimize = optimize 1941 1942 def writepy(self, pathname, basename="", filterfunc=None): 1943 """Add all files from "pathname" to the ZIP archive. 1944 1945 If pathname is a package directory, search the directory and 1946 all package subdirectories recursively for all *.py and enter 1947 the modules into the archive. If pathname is a plain 1948 directory, listdir *.py and enter all modules. Else, pathname 1949 must be a Python *.py file and the module will be put into the 1950 archive. Added modules are always module.pyc. 1951 This method will compile the module.py into module.pyc if 1952 necessary. 1953 If filterfunc(pathname) is given, it is called with every argument. 1954 When it is False, the file or directory is skipped. 1955 """ 1956 pathname = os.fspath(pathname) 1957 if filterfunc and not filterfunc(pathname): 1958 if self.debug: 1959 label = 'path' if os.path.isdir(pathname) else 'file' 1960 print('%s %r skipped by filterfunc' % (label, pathname)) 1961 return 1962 dir, name = os.path.split(pathname) 1963 if os.path.isdir(pathname): 1964 initname = os.path.join(pathname, "__init__.py") 1965 if os.path.isfile(initname): 1966 # This is a package directory, add it 1967 if basename: 1968 basename = "%s/%s" % (basename, name) 1969 else: 1970 basename = name 1971 if self.debug: 1972 print("Adding package in", pathname, "as", basename) 1973 fname, arcname = self._get_codename(initname[0:-3], basename) 1974 if self.debug: 1975 print("Adding", arcname) 1976 self.write(fname, arcname) 1977 dirlist = sorted(os.listdir(pathname)) 1978 dirlist.remove("__init__.py") 1979 # Add all *.py files and package subdirectories 1980 for filename in dirlist: 1981 path = os.path.join(pathname, filename) 1982 root, ext = os.path.splitext(filename) 1983 if os.path.isdir(path): 1984 if os.path.isfile(os.path.join(path, "__init__.py")): 1985 # This is a package directory, add it 1986 self.writepy(path, basename, 1987 filterfunc=filterfunc) # Recursive call 1988 elif ext == ".py": 1989 if filterfunc and not filterfunc(path): 1990 if self.debug: 1991 print('file %r skipped by filterfunc' % path) 1992 continue 1993 fname, arcname = self._get_codename(path[0:-3], 1994 basename) 1995 if self.debug: 1996 print("Adding", arcname) 1997 self.write(fname, arcname) 1998 else: 1999 # This is NOT a package directory, add its files at top level 2000 if self.debug: 2001 print("Adding files from directory", pathname) 2002 for filename in sorted(os.listdir(pathname)): 2003 path = os.path.join(pathname, filename) 2004 root, ext = os.path.splitext(filename) 2005 if ext == ".py": 2006 if filterfunc and not filterfunc(path): 2007 if self.debug: 2008 print('file %r skipped by filterfunc' % path) 2009 continue 2010 fname, arcname = self._get_codename(path[0:-3], 2011 basename) 2012 if self.debug: 2013 print("Adding", arcname) 2014 self.write(fname, arcname) 2015 else: 2016 if pathname[-3:] != ".py": 2017 raise RuntimeError( 2018 'Files added with writepy() must end with ".py"') 2019 fname, arcname = self._get_codename(pathname[0:-3], basename) 2020 if self.debug: 2021 print("Adding file", arcname) 2022 self.write(fname, arcname) 2023 2024 def _get_codename(self, pathname, basename): 2025 """Return (filename, archivename) for the path. 2026 2027 Given a module name path, return the correct file path and 2028 archive name, compiling if necessary. For example, given 2029 /python/lib/string, return (/python/lib/string.pyc, string). 2030 """ 2031 def _compile(file, optimize=-1): 2032 import py_compile 2033 if self.debug: 2034 print("Compiling", file) 2035 try: 2036 py_compile.compile(file, doraise=True, optimize=optimize) 2037 except py_compile.PyCompileError as err: 2038 print(err.msg) 2039 return False 2040 return True 2041 2042 file_py = pathname + ".py" 2043 file_pyc = pathname + ".pyc" 2044 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='') 2045 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1) 2046 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2) 2047 if self._optimize == -1: 2048 # legacy mode: use whatever file is present 2049 if (os.path.isfile(file_pyc) and 2050 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): 2051 # Use .pyc file. 2052 arcname = fname = file_pyc 2053 elif (os.path.isfile(pycache_opt0) and 2054 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): 2055 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2056 # file name in the archive. 2057 fname = pycache_opt0 2058 arcname = file_pyc 2059 elif (os.path.isfile(pycache_opt1) and 2060 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): 2061 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2062 # file name in the archive. 2063 fname = pycache_opt1 2064 arcname = file_pyc 2065 elif (os.path.isfile(pycache_opt2) and 2066 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): 2067 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2068 # file name in the archive. 2069 fname = pycache_opt2 2070 arcname = file_pyc 2071 else: 2072 # Compile py into PEP 3147 pyc file. 2073 if _compile(file_py): 2074 if sys.flags.optimize == 0: 2075 fname = pycache_opt0 2076 elif sys.flags.optimize == 1: 2077 fname = pycache_opt1 2078 else: 2079 fname = pycache_opt2 2080 arcname = file_pyc 2081 else: 2082 fname = arcname = file_py 2083 else: 2084 # new mode: use given optimization level 2085 if self._optimize == 0: 2086 fname = pycache_opt0 2087 arcname = file_pyc 2088 else: 2089 arcname = file_pyc 2090 if self._optimize == 1: 2091 fname = pycache_opt1 2092 elif self._optimize == 2: 2093 fname = pycache_opt2 2094 else: 2095 msg = "invalid value for 'optimize': {!r}".format(self._optimize) 2096 raise ValueError(msg) 2097 if not (os.path.isfile(fname) and 2098 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): 2099 if not _compile(file_py, optimize=self._optimize): 2100 fname = arcname = file_py 2101 archivename = os.path.split(arcname)[1] 2102 if basename: 2103 archivename = "%s/%s" % (basename, archivename) 2104 return (fname, archivename) 2105 2106 2107def _parents(path): 2108 """ 2109 Given a path with elements separated by 2110 posixpath.sep, generate all parents of that path. 2111 2112 >>> list(_parents('b/d')) 2113 ['b'] 2114 >>> list(_parents('/b/d/')) 2115 ['/b'] 2116 >>> list(_parents('b/d/f/')) 2117 ['b/d', 'b'] 2118 >>> list(_parents('b')) 2119 [] 2120 >>> list(_parents('')) 2121 [] 2122 """ 2123 return itertools.islice(_ancestry(path), 1, None) 2124 2125 2126def _ancestry(path): 2127 """ 2128 Given a path with elements separated by 2129 posixpath.sep, generate all elements of that path 2130 2131 >>> list(_ancestry('b/d')) 2132 ['b/d', 'b'] 2133 >>> list(_ancestry('/b/d/')) 2134 ['/b/d', '/b'] 2135 >>> list(_ancestry('b/d/f/')) 2136 ['b/d/f', 'b/d', 'b'] 2137 >>> list(_ancestry('b')) 2138 ['b'] 2139 >>> list(_ancestry('')) 2140 [] 2141 """ 2142 path = path.rstrip(posixpath.sep) 2143 while path and path != posixpath.sep: 2144 yield path 2145 path, tail = posixpath.split(path) 2146 2147 2148_dedupe = dict.fromkeys 2149"""Deduplicate an iterable in original order""" 2150 2151 2152def _difference(minuend, subtrahend): 2153 """ 2154 Return items in minuend not in subtrahend, retaining order 2155 with O(1) lookup. 2156 """ 2157 return itertools.filterfalse(set(subtrahend).__contains__, minuend) 2158 2159 2160class CompleteDirs(ZipFile): 2161 """ 2162 A ZipFile subclass that ensures that implied directories 2163 are always included in the namelist. 2164 """ 2165 2166 @staticmethod 2167 def _implied_dirs(names): 2168 parents = itertools.chain.from_iterable(map(_parents, names)) 2169 as_dirs = (p + posixpath.sep for p in parents) 2170 return _dedupe(_difference(as_dirs, names)) 2171 2172 def namelist(self): 2173 names = super(CompleteDirs, self).namelist() 2174 return names + list(self._implied_dirs(names)) 2175 2176 def _name_set(self): 2177 return set(self.namelist()) 2178 2179 def resolve_dir(self, name): 2180 """ 2181 If the name represents a directory, return that name 2182 as a directory (with the trailing slash). 2183 """ 2184 names = self._name_set() 2185 dirname = name + '/' 2186 dir_match = name not in names and dirname in names 2187 return dirname if dir_match else name 2188 2189 @classmethod 2190 def make(cls, source): 2191 """ 2192 Given a source (filename or zipfile), return an 2193 appropriate CompleteDirs subclass. 2194 """ 2195 if isinstance(source, CompleteDirs): 2196 return source 2197 2198 if not isinstance(source, ZipFile): 2199 return cls(source) 2200 2201 # Only allow for FastLookup when supplied zipfile is read-only 2202 if 'r' not in source.mode: 2203 cls = CompleteDirs 2204 2205 source.__class__ = cls 2206 return source 2207 2208 2209class FastLookup(CompleteDirs): 2210 """ 2211 ZipFile subclass to ensure implicit 2212 dirs exist and are resolved rapidly. 2213 """ 2214 2215 def namelist(self): 2216 with contextlib.suppress(AttributeError): 2217 return self.__names 2218 self.__names = super(FastLookup, self).namelist() 2219 return self.__names 2220 2221 def _name_set(self): 2222 with contextlib.suppress(AttributeError): 2223 return self.__lookup 2224 self.__lookup = super(FastLookup, self)._name_set() 2225 return self.__lookup 2226 2227 2228class Path: 2229 """ 2230 A pathlib-compatible interface for zip files. 2231 2232 Consider a zip file with this structure:: 2233 2234 . 2235 ├── a.txt 2236 └── b 2237 ├── c.txt 2238 └── d 2239 └── e.txt 2240 2241 >>> data = io.BytesIO() 2242 >>> zf = ZipFile(data, 'w') 2243 >>> zf.writestr('a.txt', 'content of a') 2244 >>> zf.writestr('b/c.txt', 'content of c') 2245 >>> zf.writestr('b/d/e.txt', 'content of e') 2246 >>> zf.filename = 'mem/abcde.zip' 2247 2248 Path accepts the zipfile object itself or a filename 2249 2250 >>> root = Path(zf) 2251 2252 From there, several path operations are available. 2253 2254 Directory iteration (including the zip file itself): 2255 2256 >>> a, b = root.iterdir() 2257 >>> a 2258 Path('mem/abcde.zip', 'a.txt') 2259 >>> b 2260 Path('mem/abcde.zip', 'b/') 2261 2262 name property: 2263 2264 >>> b.name 2265 'b' 2266 2267 join with divide operator: 2268 2269 >>> c = b / 'c.txt' 2270 >>> c 2271 Path('mem/abcde.zip', 'b/c.txt') 2272 >>> c.name 2273 'c.txt' 2274 2275 Read text: 2276 2277 >>> c.read_text() 2278 'content of c' 2279 2280 existence: 2281 2282 >>> c.exists() 2283 True 2284 >>> (b / 'missing.txt').exists() 2285 False 2286 2287 Coercion to string: 2288 2289 >>> import os 2290 >>> str(c).replace(os.sep, posixpath.sep) 2291 'mem/abcde.zip/b/c.txt' 2292 2293 At the root, ``name``, ``filename``, and ``parent`` 2294 resolve to the zipfile. Note these attributes are not 2295 valid and will raise a ``ValueError`` if the zipfile 2296 has no filename. 2297 2298 >>> root.name 2299 'abcde.zip' 2300 >>> str(root.filename).replace(os.sep, posixpath.sep) 2301 'mem/abcde.zip' 2302 >>> str(root.parent) 2303 'mem' 2304 """ 2305 2306 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" 2307 2308 def __init__(self, root, at=""): 2309 """ 2310 Construct a Path from a ZipFile or filename. 2311 2312 Note: When the source is an existing ZipFile object, 2313 its type (__class__) will be mutated to a 2314 specialized type. If the caller wishes to retain the 2315 original type, the caller should either create a 2316 separate ZipFile object or pass a filename. 2317 """ 2318 self.root = FastLookup.make(root) 2319 self.at = at 2320 2321 def open(self, mode='r', *args, pwd=None, **kwargs): 2322 """ 2323 Open this entry as text or binary following the semantics 2324 of ``pathlib.Path.open()`` by passing arguments through 2325 to io.TextIOWrapper(). 2326 """ 2327 if self.is_dir(): 2328 raise IsADirectoryError(self) 2329 zip_mode = mode[0] 2330 if not self.exists() and zip_mode == 'r': 2331 raise FileNotFoundError(self) 2332 stream = self.root.open(self.at, zip_mode, pwd=pwd) 2333 if 'b' in mode: 2334 if args or kwargs: 2335 raise ValueError("encoding args invalid for binary operation") 2336 return stream 2337 else: 2338 kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) 2339 return io.TextIOWrapper(stream, *args, **kwargs) 2340 2341 @property 2342 def name(self): 2343 return pathlib.Path(self.at).name or self.filename.name 2344 2345 @property 2346 def filename(self): 2347 return pathlib.Path(self.root.filename).joinpath(self.at) 2348 2349 def read_text(self, *args, **kwargs): 2350 kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) 2351 with self.open('r', *args, **kwargs) as strm: 2352 return strm.read() 2353 2354 def read_bytes(self): 2355 with self.open('rb') as strm: 2356 return strm.read() 2357 2358 def _is_child(self, path): 2359 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") 2360 2361 def _next(self, at): 2362 return self.__class__(self.root, at) 2363 2364 def is_dir(self): 2365 return not self.at or self.at.endswith("/") 2366 2367 def is_file(self): 2368 return self.exists() and not self.is_dir() 2369 2370 def exists(self): 2371 return self.at in self.root._name_set() 2372 2373 def iterdir(self): 2374 if not self.is_dir(): 2375 raise ValueError("Can't listdir a file") 2376 subs = map(self._next, self.root.namelist()) 2377 return filter(self._is_child, subs) 2378 2379 def __str__(self): 2380 return posixpath.join(self.root.filename, self.at) 2381 2382 def __repr__(self): 2383 return self.__repr.format(self=self) 2384 2385 def joinpath(self, *other): 2386 next = posixpath.join(self.at, *other) 2387 return self._next(self.root.resolve_dir(next)) 2388 2389 __truediv__ = joinpath 2390 2391 @property 2392 def parent(self): 2393 if not self.at: 2394 return self.filename.parent 2395 parent_at = posixpath.dirname(self.at.rstrip('/')) 2396 if parent_at: 2397 parent_at += '/' 2398 return self._next(parent_at) 2399 2400 2401def main(args=None): 2402 import argparse 2403 2404 description = 'A simple command-line interface for zipfile module.' 2405 parser = argparse.ArgumentParser(description=description) 2406 group = parser.add_mutually_exclusive_group(required=True) 2407 group.add_argument('-l', '--list', metavar='<zipfile>', 2408 help='Show listing of a zipfile') 2409 group.add_argument('-e', '--extract', nargs=2, 2410 metavar=('<zipfile>', '<output_dir>'), 2411 help='Extract zipfile into target dir') 2412 group.add_argument('-c', '--create', nargs='+', 2413 metavar=('<name>', '<file>'), 2414 help='Create zipfile from sources') 2415 group.add_argument('-t', '--test', metavar='<zipfile>', 2416 help='Test if a zipfile is valid') 2417 args = parser.parse_args(args) 2418 2419 if args.test is not None: 2420 src = args.test 2421 with ZipFile(src, 'r') as zf: 2422 badfile = zf.testzip() 2423 if badfile: 2424 print("The following enclosed file is corrupted: {!r}".format(badfile)) 2425 print("Done testing") 2426 2427 elif args.list is not None: 2428 src = args.list 2429 with ZipFile(src, 'r') as zf: 2430 zf.printdir() 2431 2432 elif args.extract is not None: 2433 src, curdir = args.extract 2434 with ZipFile(src, 'r') as zf: 2435 zf.extractall(curdir) 2436 2437 elif args.create is not None: 2438 zip_name = args.create.pop(0) 2439 files = args.create 2440 2441 def addToZip(zf, path, zippath): 2442 if os.path.isfile(path): 2443 zf.write(path, zippath, ZIP_DEFLATED) 2444 elif os.path.isdir(path): 2445 if zippath: 2446 zf.write(path, zippath) 2447 for nm in sorted(os.listdir(path)): 2448 addToZip(zf, 2449 os.path.join(path, nm), os.path.join(zippath, nm)) 2450 # else: ignore 2451 2452 with ZipFile(zip_name, 'w') as zf: 2453 for path in files: 2454 zippath = os.path.basename(path) 2455 if not zippath: 2456 zippath = os.path.basename(os.path.dirname(path)) 2457 if zippath in ('', os.curdir, os.pardir): 2458 zippath = '' 2459 addToZip(zf, path, zippath) 2460 2461 2462if __name__ == "__main__": 2463 main() 2464