1""" 2Read and write ZIP files. 3 4XXX references to utf-8 need further investigation. 5""" 6import binascii 7import importlib.util 8import io 9import os 10import shutil 11import stat 12import struct 13import sys 14import threading 15import time 16 17try: 18 import zlib # We may need its compression method 19 crc32 = zlib.crc32 20except ImportError: 21 zlib = None 22 crc32 = binascii.crc32 23 24try: 25 import bz2 # We may need its compression method 26except ImportError: 27 bz2 = None 28 29try: 30 import lzma # We may need its compression method 31except ImportError: 32 lzma = None 33 34__all__ = ["BadZipFile", "BadZipfile", "error", 35 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", 36 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", 37 "Path"] 38 39class BadZipFile(Exception): 40 pass 41 42 43class LargeZipFile(Exception): 44 """ 45 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 46 and those extensions are disabled. 47 """ 48 49error = BadZipfile = BadZipFile # Pre-3.2 compatibility names 50 51 52ZIP64_LIMIT = (1 << 31) - 1 53ZIP_FILECOUNT_LIMIT = (1 << 16) - 1 54ZIP_MAX_COMMENT = (1 << 16) - 1 55 56# constants for Zip file compression methods 57ZIP_STORED = 0 58ZIP_DEFLATED = 8 59ZIP_BZIP2 = 12 60ZIP_LZMA = 14 61# Other ZIP compression methods not supported 62 63DEFAULT_VERSION = 20 64ZIP64_VERSION = 45 65BZIP2_VERSION = 46 66LZMA_VERSION = 63 67# we recognize (but not necessarily support) all features up to that version 68MAX_EXTRACT_VERSION = 63 69 70# Below are some formats and associated data for reading/writing headers using 71# the struct module. The names and structures of headers/records are those used 72# in the PKWARE description of the ZIP file format: 73# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 74# (URL valid as of January 2008) 75 76# The "end of central directory" structure, magic number, size, and indices 77# (section V.I in the format document) 78structEndArchive = b"<4s4H2LH" 79stringEndArchive = b"PK\005\006" 80sizeEndCentDir = struct.calcsize(structEndArchive) 81 82_ECD_SIGNATURE = 0 83_ECD_DISK_NUMBER = 1 84_ECD_DISK_START = 2 85_ECD_ENTRIES_THIS_DISK = 3 86_ECD_ENTRIES_TOTAL = 4 87_ECD_SIZE = 5 88_ECD_OFFSET = 6 89_ECD_COMMENT_SIZE = 7 90# These last two indices are not part of the structure as defined in the 91# spec, but they are used internally by this module as a convenience 92_ECD_COMMENT = 8 93_ECD_LOCATION = 9 94 95# The "central directory" structure, magic number, size, and indices 96# of entries in the structure (section V.F in the format document) 97structCentralDir = "<4s4B4HL2L5H2L" 98stringCentralDir = b"PK\001\002" 99sizeCentralDir = struct.calcsize(structCentralDir) 100 101# indexes of entries in the central directory structure 102_CD_SIGNATURE = 0 103_CD_CREATE_VERSION = 1 104_CD_CREATE_SYSTEM = 2 105_CD_EXTRACT_VERSION = 3 106_CD_EXTRACT_SYSTEM = 4 107_CD_FLAG_BITS = 5 108_CD_COMPRESS_TYPE = 6 109_CD_TIME = 7 110_CD_DATE = 8 111_CD_CRC = 9 112_CD_COMPRESSED_SIZE = 10 113_CD_UNCOMPRESSED_SIZE = 11 114_CD_FILENAME_LENGTH = 12 115_CD_EXTRA_FIELD_LENGTH = 13 116_CD_COMMENT_LENGTH = 14 117_CD_DISK_NUMBER_START = 15 118_CD_INTERNAL_FILE_ATTRIBUTES = 16 119_CD_EXTERNAL_FILE_ATTRIBUTES = 17 120_CD_LOCAL_HEADER_OFFSET = 18 121 122# General purpose bit flags 123# Zip Appnote: 4.4.4 general purpose bit flag: (2 bytes) 124_MASK_ENCRYPTED = 1 << 0 125# Bits 1 and 2 have different meanings depending on the compression used. 126_MASK_COMPRESS_OPTION_1 = 1 << 1 127# _MASK_COMPRESS_OPTION_2 = 1 << 2 128# _MASK_USE_DATA_DESCRIPTOR: If set, crc-32, compressed size and uncompressed 129# size are zero in the local header and the real values are written in the data 130# descriptor immediately following the compressed data. 131_MASK_USE_DATA_DESCRIPTOR = 1 << 3 132# Bit 4: Reserved for use with compression method 8, for enhanced deflating. 133# _MASK_RESERVED_BIT_4 = 1 << 4 134_MASK_COMPRESSED_PATCH = 1 << 5 135_MASK_STRONG_ENCRYPTION = 1 << 6 136# _MASK_UNUSED_BIT_7 = 1 << 7 137# _MASK_UNUSED_BIT_8 = 1 << 8 138# _MASK_UNUSED_BIT_9 = 1 << 9 139# _MASK_UNUSED_BIT_10 = 1 << 10 140_MASK_UTF_FILENAME = 1 << 11 141# Bit 12: Reserved by PKWARE for enhanced compression. 142# _MASK_RESERVED_BIT_12 = 1 << 12 143# _MASK_ENCRYPTED_CENTRAL_DIR = 1 << 13 144# Bit 14, 15: Reserved by PKWARE 145# _MASK_RESERVED_BIT_14 = 1 << 14 146# _MASK_RESERVED_BIT_15 = 1 << 15 147 148# The "local file header" structure, magic number, size, and indices 149# (section V.A in the format document) 150structFileHeader = "<4s2B4HL2L2H" 151stringFileHeader = b"PK\003\004" 152sizeFileHeader = struct.calcsize(structFileHeader) 153 154_FH_SIGNATURE = 0 155_FH_EXTRACT_VERSION = 1 156_FH_EXTRACT_SYSTEM = 2 157_FH_GENERAL_PURPOSE_FLAG_BITS = 3 158_FH_COMPRESSION_METHOD = 4 159_FH_LAST_MOD_TIME = 5 160_FH_LAST_MOD_DATE = 6 161_FH_CRC = 7 162_FH_COMPRESSED_SIZE = 8 163_FH_UNCOMPRESSED_SIZE = 9 164_FH_FILENAME_LENGTH = 10 165_FH_EXTRA_FIELD_LENGTH = 11 166 167# The "Zip64 end of central directory locator" structure, magic number, and size 168structEndArchive64Locator = "<4sLQL" 169stringEndArchive64Locator = b"PK\x06\x07" 170sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 171 172# The "Zip64 end of central directory" record, magic number, size, and indices 173# (section V.G in the format document) 174structEndArchive64 = "<4sQ2H2L4Q" 175stringEndArchive64 = b"PK\x06\x06" 176sizeEndCentDir64 = struct.calcsize(structEndArchive64) 177 178_CD64_SIGNATURE = 0 179_CD64_DIRECTORY_RECSIZE = 1 180_CD64_CREATE_VERSION = 2 181_CD64_EXTRACT_VERSION = 3 182_CD64_DISK_NUMBER = 4 183_CD64_DISK_NUMBER_START = 5 184_CD64_NUMBER_ENTRIES_THIS_DISK = 6 185_CD64_NUMBER_ENTRIES_TOTAL = 7 186_CD64_DIRECTORY_SIZE = 8 187_CD64_OFFSET_START_CENTDIR = 9 188 189_DD_SIGNATURE = 0x08074b50 190 191 192class _Extra(bytes): 193 FIELD_STRUCT = struct.Struct('<HH') 194 195 def __new__(cls, val, id=None): 196 return super().__new__(cls, val) 197 198 def __init__(self, val, id=None): 199 self.id = id 200 201 @classmethod 202 def read_one(cls, raw): 203 try: 204 xid, xlen = cls.FIELD_STRUCT.unpack(raw[:4]) 205 except struct.error: 206 xid = None 207 xlen = 0 208 return cls(raw[:4+xlen], xid), raw[4+xlen:] 209 210 @classmethod 211 def split(cls, data): 212 # use memoryview for zero-copy slices 213 rest = memoryview(data) 214 while rest: 215 extra, rest = _Extra.read_one(rest) 216 yield extra 217 218 @classmethod 219 def strip(cls, data, xids): 220 """Remove Extra fields with specified IDs.""" 221 return b''.join( 222 ex 223 for ex in cls.split(data) 224 if ex.id not in xids 225 ) 226 227 228def _check_zipfile(fp): 229 try: 230 if _EndRecData(fp): 231 return True # file has correct magic number 232 except OSError: 233 pass 234 return False 235 236def is_zipfile(filename): 237 """Quickly see if a file is a ZIP file by checking the magic number. 238 239 The filename argument may be a file or file-like object too. 240 """ 241 result = False 242 try: 243 if hasattr(filename, "read"): 244 result = _check_zipfile(fp=filename) 245 else: 246 with open(filename, "rb") as fp: 247 result = _check_zipfile(fp) 248 except OSError: 249 pass 250 return result 251 252def _EndRecData64(fpin, offset, endrec): 253 """ 254 Read the ZIP64 end-of-archive records and use that to update endrec 255 """ 256 try: 257 fpin.seek(offset - sizeEndCentDir64Locator, 2) 258 except OSError: 259 # If the seek fails, the file is not large enough to contain a ZIP64 260 # end-of-archive record, so just return the end record we were given. 261 return endrec 262 263 data = fpin.read(sizeEndCentDir64Locator) 264 if len(data) != sizeEndCentDir64Locator: 265 return endrec 266 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 267 if sig != stringEndArchive64Locator: 268 return endrec 269 270 if diskno != 0 or disks > 1: 271 raise BadZipFile("zipfiles that span multiple disks are not supported") 272 273 # Assume no 'zip64 extensible data' 274 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 275 data = fpin.read(sizeEndCentDir64) 276 if len(data) != sizeEndCentDir64: 277 return endrec 278 sig, sz, create_version, read_version, disk_num, disk_dir, \ 279 dircount, dircount2, dirsize, diroffset = \ 280 struct.unpack(structEndArchive64, data) 281 if sig != stringEndArchive64: 282 return endrec 283 284 # Update the original endrec using data from the ZIP64 record 285 endrec[_ECD_SIGNATURE] = sig 286 endrec[_ECD_DISK_NUMBER] = disk_num 287 endrec[_ECD_DISK_START] = disk_dir 288 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 289 endrec[_ECD_ENTRIES_TOTAL] = dircount2 290 endrec[_ECD_SIZE] = dirsize 291 endrec[_ECD_OFFSET] = diroffset 292 return endrec 293 294 295def _EndRecData(fpin): 296 """Return data from the "End of Central Directory" record, or None. 297 298 The data is a list of the nine items in the ZIP "End of central dir" 299 record followed by a tenth item, the file seek offset of this record.""" 300 301 # Determine file size 302 fpin.seek(0, 2) 303 filesize = fpin.tell() 304 305 # Check to see if this is ZIP file with no archive comment (the 306 # "end of central directory" structure should be the last item in the 307 # file if this is the case). 308 try: 309 fpin.seek(-sizeEndCentDir, 2) 310 except OSError: 311 return None 312 data = fpin.read(sizeEndCentDir) 313 if (len(data) == sizeEndCentDir and 314 data[0:4] == stringEndArchive and 315 data[-2:] == b"\000\000"): 316 # the signature is correct and there's no comment, unpack structure 317 endrec = struct.unpack(structEndArchive, data) 318 endrec=list(endrec) 319 320 # Append a blank comment and record start offset 321 endrec.append(b"") 322 endrec.append(filesize - sizeEndCentDir) 323 324 # Try to read the "Zip64 end of central directory" structure 325 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 326 327 # Either this is not a ZIP file, or it is a ZIP file with an archive 328 # comment. Search the end of the file for the "end of central directory" 329 # record signature. The comment is the last item in the ZIP file and may be 330 # up to 64K long. It is assumed that the "end of central directory" magic 331 # number does not appear in the comment. 332 maxCommentStart = max(filesize - ZIP_MAX_COMMENT - sizeEndCentDir, 0) 333 fpin.seek(maxCommentStart, 0) 334 data = fpin.read(ZIP_MAX_COMMENT + sizeEndCentDir) 335 start = data.rfind(stringEndArchive) 336 if start >= 0: 337 # found the magic number; attempt to unpack and interpret 338 recData = data[start:start+sizeEndCentDir] 339 if len(recData) != sizeEndCentDir: 340 # Zip file is corrupted. 341 return None 342 endrec = list(struct.unpack(structEndArchive, recData)) 343 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 344 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 345 endrec.append(comment) 346 endrec.append(maxCommentStart + start) 347 348 # Try to read the "Zip64 end of central directory" structure 349 return _EndRecData64(fpin, maxCommentStart + start - filesize, 350 endrec) 351 352 # Unable to find a valid end of central directory structure 353 return None 354 355def _sanitize_filename(filename): 356 """Terminate the file name at the first null byte and 357 ensure paths always use forward slashes as the directory separator.""" 358 359 # Terminate the file name at the first null byte. Null bytes in file 360 # names are used as tricks by viruses in archives. 361 null_byte = filename.find(chr(0)) 362 if null_byte >= 0: 363 filename = filename[0:null_byte] 364 # This is used to ensure paths in generated ZIP files always use 365 # forward slashes as the directory separator, as required by the 366 # ZIP format specification. 367 if os.sep != "/" and os.sep in filename: 368 filename = filename.replace(os.sep, "/") 369 if os.altsep and os.altsep != "/" and os.altsep in filename: 370 filename = filename.replace(os.altsep, "/") 371 return filename 372 373 374class ZipInfo: 375 """Class with attributes describing each file in the ZIP archive.""" 376 377 __slots__ = ( 378 'orig_filename', 379 'filename', 380 'date_time', 381 'compress_type', 382 'compress_level', 383 'comment', 384 'extra', 385 'create_system', 386 'create_version', 387 'extract_version', 388 'reserved', 389 'flag_bits', 390 'volume', 391 'internal_attr', 392 'external_attr', 393 'header_offset', 394 'CRC', 395 'compress_size', 396 'file_size', 397 '_raw_time', 398 '_end_offset', 399 ) 400 401 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 402 self.orig_filename = filename # Original file name in archive 403 404 # Terminate the file name at the first null byte and 405 # ensure paths always use forward slashes as the directory separator. 406 filename = _sanitize_filename(filename) 407 408 self.filename = filename # Normalized file name 409 self.date_time = date_time # year, month, day, hour, min, sec 410 411 if date_time[0] < 1980: 412 raise ValueError('ZIP does not support timestamps before 1980') 413 414 # Standard values: 415 self.compress_type = ZIP_STORED # Type of compression for the file 416 self.compress_level = None # Level for the compressor 417 self.comment = b"" # Comment for each file 418 self.extra = b"" # ZIP extra data 419 if sys.platform == 'win32': 420 self.create_system = 0 # System which created ZIP archive 421 else: 422 # Assume everything else is unix-y 423 self.create_system = 3 # System which created ZIP archive 424 self.create_version = DEFAULT_VERSION # Version which created ZIP archive 425 self.extract_version = DEFAULT_VERSION # Version needed to extract archive 426 self.reserved = 0 # Must be zero 427 self.flag_bits = 0 # ZIP flag bits 428 self.volume = 0 # Volume number of file header 429 self.internal_attr = 0 # Internal attributes 430 self.external_attr = 0 # External file attributes 431 self.compress_size = 0 # Size of the compressed file 432 self.file_size = 0 # Size of the uncompressed file 433 self._end_offset = None # Start of the next local header or central directory 434 # Other attributes are set by class ZipFile: 435 # header_offset Byte offset to the file header 436 # CRC CRC-32 of the uncompressed file 437 438 # Maintain backward compatibility with the old protected attribute name. 439 @property 440 def _compresslevel(self): 441 return self.compress_level 442 443 @_compresslevel.setter 444 def _compresslevel(self, value): 445 self.compress_level = value 446 447 def __repr__(self): 448 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)] 449 if self.compress_type != ZIP_STORED: 450 result.append(' compress_type=%s' % 451 compressor_names.get(self.compress_type, 452 self.compress_type)) 453 hi = self.external_attr >> 16 454 lo = self.external_attr & 0xFFFF 455 if hi: 456 result.append(' filemode=%r' % stat.filemode(hi)) 457 if lo: 458 result.append(' external_attr=%#x' % lo) 459 isdir = self.is_dir() 460 if not isdir or self.file_size: 461 result.append(' file_size=%r' % self.file_size) 462 if ((not isdir or self.compress_size) and 463 (self.compress_type != ZIP_STORED or 464 self.file_size != self.compress_size)): 465 result.append(' compress_size=%r' % self.compress_size) 466 result.append('>') 467 return ''.join(result) 468 469 def FileHeader(self, zip64=None): 470 """Return the per-file header as a bytes object. 471 472 When the optional zip64 arg is None rather than a bool, we will 473 decide based upon the file_size and compress_size, if known, 474 False otherwise. 475 """ 476 dt = self.date_time 477 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 478 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 479 if self.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 480 # Set these to zero because we write them after the file data 481 CRC = compress_size = file_size = 0 482 else: 483 CRC = self.CRC 484 compress_size = self.compress_size 485 file_size = self.file_size 486 487 extra = self.extra 488 489 min_version = 0 490 if zip64 is None: 491 # We always explicitly pass zip64 within this module.... This 492 # remains for anyone using ZipInfo.FileHeader as a public API. 493 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 494 if zip64: 495 fmt = '<HHQQ' 496 extra = extra + struct.pack(fmt, 497 1, struct.calcsize(fmt)-4, file_size, compress_size) 498 file_size = 0xffffffff 499 compress_size = 0xffffffff 500 min_version = ZIP64_VERSION 501 502 if self.compress_type == ZIP_BZIP2: 503 min_version = max(BZIP2_VERSION, min_version) 504 elif self.compress_type == ZIP_LZMA: 505 min_version = max(LZMA_VERSION, min_version) 506 507 self.extract_version = max(min_version, self.extract_version) 508 self.create_version = max(min_version, self.create_version) 509 filename, flag_bits = self._encodeFilenameFlags() 510 header = struct.pack(structFileHeader, stringFileHeader, 511 self.extract_version, self.reserved, flag_bits, 512 self.compress_type, dostime, dosdate, CRC, 513 compress_size, file_size, 514 len(filename), len(extra)) 515 return header + filename + extra 516 517 def _encodeFilenameFlags(self): 518 try: 519 return self.filename.encode('ascii'), self.flag_bits 520 except UnicodeEncodeError: 521 return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME 522 523 def _decodeExtra(self, filename_crc): 524 # Try to decode the extra field. 525 extra = self.extra 526 unpack = struct.unpack 527 while len(extra) >= 4: 528 tp, ln = unpack('<HH', extra[:4]) 529 if ln+4 > len(extra): 530 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 531 if tp == 0x0001: 532 data = extra[4:ln+4] 533 # ZIP64 extension (large files and/or large archives) 534 try: 535 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF): 536 field = "File size" 537 self.file_size, = unpack('<Q', data[:8]) 538 data = data[8:] 539 if self.compress_size == 0xFFFF_FFFF: 540 field = "Compress size" 541 self.compress_size, = unpack('<Q', data[:8]) 542 data = data[8:] 543 if self.header_offset == 0xFFFF_FFFF: 544 field = "Header offset" 545 self.header_offset, = unpack('<Q', data[:8]) 546 except struct.error: 547 raise BadZipFile(f"Corrupt zip64 extra field. " 548 f"{field} not found.") from None 549 elif tp == 0x7075: 550 data = extra[4:ln+4] 551 # Unicode Path Extra Field 552 try: 553 up_version, up_name_crc = unpack('<BL', data[:5]) 554 if up_version == 1 and up_name_crc == filename_crc: 555 up_unicode_name = data[5:].decode('utf-8') 556 if up_unicode_name: 557 self.filename = _sanitize_filename(up_unicode_name) 558 else: 559 import warnings 560 warnings.warn("Empty unicode path extra field (0x7075)", stacklevel=2) 561 except struct.error as e: 562 raise BadZipFile("Corrupt unicode path extra field (0x7075)") from e 563 except UnicodeDecodeError as e: 564 raise BadZipFile('Corrupt unicode path extra field (0x7075): invalid utf-8 bytes') from e 565 566 extra = extra[ln+4:] 567 568 @classmethod 569 def from_file(cls, filename, arcname=None, *, strict_timestamps=True): 570 """Construct an appropriate ZipInfo for a file on the filesystem. 571 572 filename should be the path to a file or directory on the filesystem. 573 574 arcname is the name which it will have within the archive (by default, 575 this will be the same as filename, but without a drive letter and with 576 leading path separators removed). 577 """ 578 if isinstance(filename, os.PathLike): 579 filename = os.fspath(filename) 580 st = os.stat(filename) 581 isdir = stat.S_ISDIR(st.st_mode) 582 mtime = time.localtime(st.st_mtime) 583 date_time = mtime[0:6] 584 if not strict_timestamps and date_time[0] < 1980: 585 date_time = (1980, 1, 1, 0, 0, 0) 586 elif not strict_timestamps and date_time[0] > 2107: 587 date_time = (2107, 12, 31, 23, 59, 59) 588 # Create ZipInfo instance to store file information 589 if arcname is None: 590 arcname = filename 591 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 592 while arcname[0] in (os.sep, os.altsep): 593 arcname = arcname[1:] 594 if isdir: 595 arcname += '/' 596 zinfo = cls(arcname, date_time) 597 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes 598 if isdir: 599 zinfo.file_size = 0 600 zinfo.external_attr |= 0x10 # MS-DOS directory flag 601 else: 602 zinfo.file_size = st.st_size 603 604 return zinfo 605 606 def is_dir(self): 607 """Return True if this archive member is a directory.""" 608 if self.filename.endswith('/'): 609 return True 610 # The ZIP format specification requires to use forward slashes 611 # as the directory separator, but in practice some ZIP files 612 # created on Windows can use backward slashes. For compatibility 613 # with the extraction code which already handles this: 614 if os.path.altsep: 615 return self.filename.endswith((os.path.sep, os.path.altsep)) 616 return False 617 618 619# ZIP encryption uses the CRC32 one-byte primitive for scrambling some 620# internal keys. We noticed that a direct implementation is faster than 621# relying on binascii.crc32(). 622 623_crctable = None 624def _gen_crc(crc): 625 for j in range(8): 626 if crc & 1: 627 crc = (crc >> 1) ^ 0xEDB88320 628 else: 629 crc >>= 1 630 return crc 631 632# ZIP supports a password-based form of encryption. Even though known 633# plaintext attacks have been found against it, it is still useful 634# to be able to get data out of such a file. 635# 636# Usage: 637# zd = _ZipDecrypter(mypwd) 638# plain_bytes = zd(cypher_bytes) 639 640def _ZipDecrypter(pwd): 641 key0 = 305419896 642 key1 = 591751049 643 key2 = 878082192 644 645 global _crctable 646 if _crctable is None: 647 _crctable = list(map(_gen_crc, range(256))) 648 crctable = _crctable 649 650 def crc32(ch, crc): 651 """Compute the CRC32 primitive on one byte.""" 652 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] 653 654 def update_keys(c): 655 nonlocal key0, key1, key2 656 key0 = crc32(c, key0) 657 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF 658 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF 659 key2 = crc32(key1 >> 24, key2) 660 661 for p in pwd: 662 update_keys(p) 663 664 def decrypter(data): 665 """Decrypt a bytes object.""" 666 result = bytearray() 667 append = result.append 668 for c in data: 669 k = key2 | 2 670 c ^= ((k * (k^1)) >> 8) & 0xFF 671 update_keys(c) 672 append(c) 673 return bytes(result) 674 675 return decrypter 676 677 678class LZMACompressor: 679 680 def __init__(self): 681 self._comp = None 682 683 def _init(self): 684 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1}) 685 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ 686 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props) 687 ]) 688 return struct.pack('<BBH', 9, 4, len(props)) + props 689 690 def compress(self, data): 691 if self._comp is None: 692 return self._init() + self._comp.compress(data) 693 return self._comp.compress(data) 694 695 def flush(self): 696 if self._comp is None: 697 return self._init() + self._comp.flush() 698 return self._comp.flush() 699 700 701class LZMADecompressor: 702 703 def __init__(self): 704 self._decomp = None 705 self._unconsumed = b'' 706 self.eof = False 707 708 def decompress(self, data): 709 if self._decomp is None: 710 self._unconsumed += data 711 if len(self._unconsumed) <= 4: 712 return b'' 713 psize, = struct.unpack('<H', self._unconsumed[2:4]) 714 if len(self._unconsumed) <= 4 + psize: 715 return b'' 716 717 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[ 718 lzma._decode_filter_properties(lzma.FILTER_LZMA1, 719 self._unconsumed[4:4 + psize]) 720 ]) 721 data = self._unconsumed[4 + psize:] 722 del self._unconsumed 723 724 result = self._decomp.decompress(data) 725 self.eof = self._decomp.eof 726 return result 727 728 729compressor_names = { 730 0: 'store', 731 1: 'shrink', 732 2: 'reduce', 733 3: 'reduce', 734 4: 'reduce', 735 5: 'reduce', 736 6: 'implode', 737 7: 'tokenize', 738 8: 'deflate', 739 9: 'deflate64', 740 10: 'implode', 741 12: 'bzip2', 742 14: 'lzma', 743 18: 'terse', 744 19: 'lz77', 745 97: 'wavpack', 746 98: 'ppmd', 747} 748 749def _check_compression(compression): 750 if compression == ZIP_STORED: 751 pass 752 elif compression == ZIP_DEFLATED: 753 if not zlib: 754 raise RuntimeError( 755 "Compression requires the (missing) zlib module") 756 elif compression == ZIP_BZIP2: 757 if not bz2: 758 raise RuntimeError( 759 "Compression requires the (missing) bz2 module") 760 elif compression == ZIP_LZMA: 761 if not lzma: 762 raise RuntimeError( 763 "Compression requires the (missing) lzma module") 764 else: 765 raise NotImplementedError("That compression method is not supported") 766 767 768def _get_compressor(compress_type, compresslevel=None): 769 if compress_type == ZIP_DEFLATED: 770 if compresslevel is not None: 771 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15) 772 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) 773 elif compress_type == ZIP_BZIP2: 774 if compresslevel is not None: 775 return bz2.BZ2Compressor(compresslevel) 776 return bz2.BZ2Compressor() 777 # compresslevel is ignored for ZIP_LZMA 778 elif compress_type == ZIP_LZMA: 779 return LZMACompressor() 780 else: 781 return None 782 783 784def _get_decompressor(compress_type): 785 _check_compression(compress_type) 786 if compress_type == ZIP_STORED: 787 return None 788 elif compress_type == ZIP_DEFLATED: 789 return zlib.decompressobj(-15) 790 elif compress_type == ZIP_BZIP2: 791 return bz2.BZ2Decompressor() 792 elif compress_type == ZIP_LZMA: 793 return LZMADecompressor() 794 else: 795 descr = compressor_names.get(compress_type) 796 if descr: 797 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr)) 798 else: 799 raise NotImplementedError("compression type %d" % (compress_type,)) 800 801 802class _SharedFile: 803 def __init__(self, file, pos, close, lock, writing): 804 self._file = file 805 self._pos = pos 806 self._close = close 807 self._lock = lock 808 self._writing = writing 809 self.seekable = file.seekable 810 811 def tell(self): 812 return self._pos 813 814 def seek(self, offset, whence=0): 815 with self._lock: 816 if self._writing(): 817 raise ValueError("Can't reposition in the ZIP file while " 818 "there is an open writing handle on it. " 819 "Close the writing handle before trying to read.") 820 self._file.seek(offset, whence) 821 self._pos = self._file.tell() 822 return self._pos 823 824 def read(self, n=-1): 825 with self._lock: 826 if self._writing(): 827 raise ValueError("Can't read from the ZIP file while there " 828 "is an open writing handle on it. " 829 "Close the writing handle before trying to read.") 830 self._file.seek(self._pos) 831 data = self._file.read(n) 832 self._pos = self._file.tell() 833 return data 834 835 def close(self): 836 if self._file is not None: 837 fileobj = self._file 838 self._file = None 839 self._close(fileobj) 840 841# Provide the tell method for unseekable stream 842class _Tellable: 843 def __init__(self, fp): 844 self.fp = fp 845 self.offset = 0 846 847 def write(self, data): 848 n = self.fp.write(data) 849 self.offset += n 850 return n 851 852 def tell(self): 853 return self.offset 854 855 def flush(self): 856 self.fp.flush() 857 858 def close(self): 859 self.fp.close() 860 861 862class ZipExtFile(io.BufferedIOBase): 863 """File-like object for reading an archive member. 864 Is returned by ZipFile.open(). 865 """ 866 867 # Max size supported by decompressor. 868 MAX_N = 1 << 31 - 1 869 870 # Read from compressed files in 4k blocks. 871 MIN_READ_SIZE = 4096 872 873 # Chunk size to read during seek 874 MAX_SEEK_READ = 1 << 24 875 876 def __init__(self, fileobj, mode, zipinfo, pwd=None, 877 close_fileobj=False): 878 self._fileobj = fileobj 879 self._pwd = pwd 880 self._close_fileobj = close_fileobj 881 882 self._compress_type = zipinfo.compress_type 883 self._compress_left = zipinfo.compress_size 884 self._left = zipinfo.file_size 885 886 self._decompressor = _get_decompressor(self._compress_type) 887 888 self._eof = False 889 self._readbuffer = b'' 890 self._offset = 0 891 892 self.newlines = None 893 894 self.mode = mode 895 self.name = zipinfo.filename 896 897 if hasattr(zipinfo, 'CRC'): 898 self._expected_crc = zipinfo.CRC 899 self._running_crc = crc32(b'') 900 else: 901 self._expected_crc = None 902 903 self._seekable = False 904 try: 905 if fileobj.seekable(): 906 self._orig_compress_start = fileobj.tell() 907 self._orig_compress_size = zipinfo.compress_size 908 self._orig_file_size = zipinfo.file_size 909 self._orig_start_crc = self._running_crc 910 self._orig_crc = self._expected_crc 911 self._seekable = True 912 except AttributeError: 913 pass 914 915 self._decrypter = None 916 if pwd: 917 if zipinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 918 # compare against the file type from extended local headers 919 check_byte = (zipinfo._raw_time >> 8) & 0xff 920 else: 921 # compare against the CRC otherwise 922 check_byte = (zipinfo.CRC >> 24) & 0xff 923 h = self._init_decrypter() 924 if h != check_byte: 925 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename) 926 927 928 def _init_decrypter(self): 929 self._decrypter = _ZipDecrypter(self._pwd) 930 # The first 12 bytes in the cypher stream is an encryption header 931 # used to strengthen the algorithm. The first 11 bytes are 932 # completely random, while the 12th contains the MSB of the CRC, 933 # or the MSB of the file time depending on the header type 934 # and is used to check the correctness of the password. 935 header = self._fileobj.read(12) 936 self._compress_left -= 12 937 return self._decrypter(header)[11] 938 939 def __repr__(self): 940 result = ['<%s.%s' % (self.__class__.__module__, 941 self.__class__.__qualname__)] 942 if not self.closed: 943 result.append(' name=%r' % (self.name,)) 944 if self._compress_type != ZIP_STORED: 945 result.append(' compress_type=%s' % 946 compressor_names.get(self._compress_type, 947 self._compress_type)) 948 else: 949 result.append(' [closed]') 950 result.append('>') 951 return ''.join(result) 952 953 def readline(self, limit=-1): 954 """Read and return a line from the stream. 955 956 If limit is specified, at most limit bytes will be read. 957 """ 958 959 if limit < 0: 960 # Shortcut common case - newline found in buffer. 961 i = self._readbuffer.find(b'\n', self._offset) + 1 962 if i > 0: 963 line = self._readbuffer[self._offset: i] 964 self._offset = i 965 return line 966 967 return io.BufferedIOBase.readline(self, limit) 968 969 def peek(self, n=1): 970 """Returns buffered bytes without advancing the position.""" 971 if n > len(self._readbuffer) - self._offset: 972 chunk = self.read(n) 973 if len(chunk) > self._offset: 974 self._readbuffer = chunk + self._readbuffer[self._offset:] 975 self._offset = 0 976 else: 977 self._offset -= len(chunk) 978 979 # Return up to 512 bytes to reduce allocation overhead for tight loops. 980 return self._readbuffer[self._offset: self._offset + 512] 981 982 def readable(self): 983 if self.closed: 984 raise ValueError("I/O operation on closed file.") 985 return True 986 987 def read(self, n=-1): 988 """Read and return up to n bytes. 989 If the argument is omitted, None, or negative, data is read and returned until EOF is reached. 990 """ 991 if self.closed: 992 raise ValueError("read from closed file.") 993 if n is None or n < 0: 994 buf = self._readbuffer[self._offset:] 995 self._readbuffer = b'' 996 self._offset = 0 997 while not self._eof: 998 buf += self._read1(self.MAX_N) 999 return buf 1000 1001 end = n + self._offset 1002 if end < len(self._readbuffer): 1003 buf = self._readbuffer[self._offset:end] 1004 self._offset = end 1005 return buf 1006 1007 n = end - len(self._readbuffer) 1008 buf = self._readbuffer[self._offset:] 1009 self._readbuffer = b'' 1010 self._offset = 0 1011 while n > 0 and not self._eof: 1012 data = self._read1(n) 1013 if n < len(data): 1014 self._readbuffer = data 1015 self._offset = n 1016 buf += data[:n] 1017 break 1018 buf += data 1019 n -= len(data) 1020 return buf 1021 1022 def _update_crc(self, newdata): 1023 # Update the CRC using the given data. 1024 if self._expected_crc is None: 1025 # No need to compute the CRC if we don't have a reference value 1026 return 1027 self._running_crc = crc32(newdata, self._running_crc) 1028 # Check the CRC if we're at the end of the file 1029 if self._eof and self._running_crc != self._expected_crc: 1030 raise BadZipFile("Bad CRC-32 for file %r" % self.name) 1031 1032 def read1(self, n): 1033 """Read up to n bytes with at most one read() system call.""" 1034 1035 if n is None or n < 0: 1036 buf = self._readbuffer[self._offset:] 1037 self._readbuffer = b'' 1038 self._offset = 0 1039 while not self._eof: 1040 data = self._read1(self.MAX_N) 1041 if data: 1042 buf += data 1043 break 1044 return buf 1045 1046 end = n + self._offset 1047 if end < len(self._readbuffer): 1048 buf = self._readbuffer[self._offset:end] 1049 self._offset = end 1050 return buf 1051 1052 n = end - len(self._readbuffer) 1053 buf = self._readbuffer[self._offset:] 1054 self._readbuffer = b'' 1055 self._offset = 0 1056 if n > 0: 1057 while not self._eof: 1058 data = self._read1(n) 1059 if n < len(data): 1060 self._readbuffer = data 1061 self._offset = n 1062 buf += data[:n] 1063 break 1064 if data: 1065 buf += data 1066 break 1067 return buf 1068 1069 def _read1(self, n): 1070 # Read up to n compressed bytes with at most one read() system call, 1071 # decrypt and decompress them. 1072 if self._eof or n <= 0: 1073 return b'' 1074 1075 # Read from file. 1076 if self._compress_type == ZIP_DEFLATED: 1077 ## Handle unconsumed data. 1078 data = self._decompressor.unconsumed_tail 1079 if n > len(data): 1080 data += self._read2(n - len(data)) 1081 else: 1082 data = self._read2(n) 1083 1084 if self._compress_type == ZIP_STORED: 1085 self._eof = self._compress_left <= 0 1086 elif self._compress_type == ZIP_DEFLATED: 1087 n = max(n, self.MIN_READ_SIZE) 1088 data = self._decompressor.decompress(data, n) 1089 self._eof = (self._decompressor.eof or 1090 self._compress_left <= 0 and 1091 not self._decompressor.unconsumed_tail) 1092 if self._eof: 1093 data += self._decompressor.flush() 1094 else: 1095 data = self._decompressor.decompress(data) 1096 self._eof = self._decompressor.eof or self._compress_left <= 0 1097 1098 data = data[:self._left] 1099 self._left -= len(data) 1100 if self._left <= 0: 1101 self._eof = True 1102 self._update_crc(data) 1103 return data 1104 1105 def _read2(self, n): 1106 if self._compress_left <= 0: 1107 return b'' 1108 1109 n = max(n, self.MIN_READ_SIZE) 1110 n = min(n, self._compress_left) 1111 1112 data = self._fileobj.read(n) 1113 self._compress_left -= len(data) 1114 if not data: 1115 raise EOFError 1116 1117 if self._decrypter is not None: 1118 data = self._decrypter(data) 1119 return data 1120 1121 def close(self): 1122 try: 1123 if self._close_fileobj: 1124 self._fileobj.close() 1125 finally: 1126 super().close() 1127 1128 def seekable(self): 1129 if self.closed: 1130 raise ValueError("I/O operation on closed file.") 1131 return self._seekable 1132 1133 def seek(self, offset, whence=os.SEEK_SET): 1134 if self.closed: 1135 raise ValueError("seek on closed file.") 1136 if not self._seekable: 1137 raise io.UnsupportedOperation("underlying stream is not seekable") 1138 curr_pos = self.tell() 1139 if whence == os.SEEK_SET: 1140 new_pos = offset 1141 elif whence == os.SEEK_CUR: 1142 new_pos = curr_pos + offset 1143 elif whence == os.SEEK_END: 1144 new_pos = self._orig_file_size + offset 1145 else: 1146 raise ValueError("whence must be os.SEEK_SET (0), " 1147 "os.SEEK_CUR (1), or os.SEEK_END (2)") 1148 1149 if new_pos > self._orig_file_size: 1150 new_pos = self._orig_file_size 1151 1152 if new_pos < 0: 1153 new_pos = 0 1154 1155 read_offset = new_pos - curr_pos 1156 buff_offset = read_offset + self._offset 1157 1158 if buff_offset >= 0 and buff_offset < len(self._readbuffer): 1159 # Just move the _offset index if the new position is in the _readbuffer 1160 self._offset = buff_offset 1161 read_offset = 0 1162 # Fast seek uncompressed unencrypted file 1163 elif self._compress_type == ZIP_STORED and self._decrypter is None and read_offset > 0: 1164 # disable CRC checking after first seeking - it would be invalid 1165 self._expected_crc = None 1166 # seek actual file taking already buffered data into account 1167 read_offset -= len(self._readbuffer) - self._offset 1168 self._fileobj.seek(read_offset, os.SEEK_CUR) 1169 self._left -= read_offset 1170 read_offset = 0 1171 # flush read buffer 1172 self._readbuffer = b'' 1173 self._offset = 0 1174 elif read_offset < 0: 1175 # Position is before the current position. Reset the ZipExtFile 1176 self._fileobj.seek(self._orig_compress_start) 1177 self._running_crc = self._orig_start_crc 1178 self._expected_crc = self._orig_crc 1179 self._compress_left = self._orig_compress_size 1180 self._left = self._orig_file_size 1181 self._readbuffer = b'' 1182 self._offset = 0 1183 self._decompressor = _get_decompressor(self._compress_type) 1184 self._eof = False 1185 read_offset = new_pos 1186 if self._decrypter is not None: 1187 self._init_decrypter() 1188 1189 while read_offset > 0: 1190 read_len = min(self.MAX_SEEK_READ, read_offset) 1191 self.read(read_len) 1192 read_offset -= read_len 1193 1194 return self.tell() 1195 1196 def tell(self): 1197 if self.closed: 1198 raise ValueError("tell on closed file.") 1199 if not self._seekable: 1200 raise io.UnsupportedOperation("underlying stream is not seekable") 1201 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset 1202 return filepos 1203 1204 1205class _ZipWriteFile(io.BufferedIOBase): 1206 def __init__(self, zf, zinfo, zip64): 1207 self._zinfo = zinfo 1208 self._zip64 = zip64 1209 self._zipfile = zf 1210 self._compressor = _get_compressor(zinfo.compress_type, 1211 zinfo.compress_level) 1212 self._file_size = 0 1213 self._compress_size = 0 1214 self._crc = 0 1215 1216 @property 1217 def _fileobj(self): 1218 return self._zipfile.fp 1219 1220 @property 1221 def name(self): 1222 return self._zinfo.filename 1223 1224 @property 1225 def mode(self): 1226 return 'wb' 1227 1228 def writable(self): 1229 return True 1230 1231 def write(self, data): 1232 if self.closed: 1233 raise ValueError('I/O operation on closed file.') 1234 1235 # Accept any data that supports the buffer protocol 1236 if isinstance(data, (bytes, bytearray)): 1237 nbytes = len(data) 1238 else: 1239 data = memoryview(data) 1240 nbytes = data.nbytes 1241 self._file_size += nbytes 1242 1243 self._crc = crc32(data, self._crc) 1244 if self._compressor: 1245 data = self._compressor.compress(data) 1246 self._compress_size += len(data) 1247 self._fileobj.write(data) 1248 return nbytes 1249 1250 def close(self): 1251 if self.closed: 1252 return 1253 try: 1254 super().close() 1255 # Flush any data from the compressor, and update header info 1256 if self._compressor: 1257 buf = self._compressor.flush() 1258 self._compress_size += len(buf) 1259 self._fileobj.write(buf) 1260 self._zinfo.compress_size = self._compress_size 1261 else: 1262 self._zinfo.compress_size = self._file_size 1263 self._zinfo.CRC = self._crc 1264 self._zinfo.file_size = self._file_size 1265 1266 if not self._zip64: 1267 if self._file_size > ZIP64_LIMIT: 1268 raise RuntimeError("File size too large, try using force_zip64") 1269 if self._compress_size > ZIP64_LIMIT: 1270 raise RuntimeError("Compressed size too large, try using force_zip64") 1271 1272 # Write updated header info 1273 if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 1274 # Write CRC and file sizes after the file data 1275 fmt = '<LLQQ' if self._zip64 else '<LLLL' 1276 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC, 1277 self._zinfo.compress_size, self._zinfo.file_size)) 1278 self._zipfile.start_dir = self._fileobj.tell() 1279 else: 1280 # Seek backwards and write file header (which will now include 1281 # correct CRC and file sizes) 1282 1283 # Preserve current position in file 1284 self._zipfile.start_dir = self._fileobj.tell() 1285 self._fileobj.seek(self._zinfo.header_offset) 1286 self._fileobj.write(self._zinfo.FileHeader(self._zip64)) 1287 self._fileobj.seek(self._zipfile.start_dir) 1288 1289 # Successfully written: Add file to our caches 1290 self._zipfile.filelist.append(self._zinfo) 1291 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo 1292 finally: 1293 self._zipfile._writing = False 1294 1295 1296 1297class ZipFile: 1298 """ Class with methods to open, read, write, close, list zip files. 1299 1300 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, 1301 compresslevel=None) 1302 1303 file: Either the path to the file, or a file-like object. 1304 If it is a path, the file will be opened and closed by ZipFile. 1305 mode: The mode can be either read 'r', write 'w', exclusive create 'x', 1306 or append 'a'. 1307 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), 1308 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). 1309 allowZip64: if True ZipFile will create files with ZIP64 extensions when 1310 needed, otherwise it will raise an exception when this would 1311 be necessary. 1312 compresslevel: None (default for the given compression type) or an integer 1313 specifying the level to pass to the compressor. 1314 When using ZIP_STORED or ZIP_LZMA this keyword has no effect. 1315 When using ZIP_DEFLATED integers 0 through 9 are accepted. 1316 When using ZIP_BZIP2 integers 1 through 9 are accepted. 1317 1318 """ 1319 1320 fp = None # Set here since __del__ checks it 1321 _windows_illegal_name_trans_table = None 1322 1323 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, 1324 compresslevel=None, *, strict_timestamps=True, metadata_encoding=None): 1325 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', 1326 or append 'a'.""" 1327 if mode not in ('r', 'w', 'x', 'a'): 1328 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") 1329 1330 _check_compression(compression) 1331 1332 self._allowZip64 = allowZip64 1333 self._didModify = False 1334 self.debug = 0 # Level of printing: 0 through 3 1335 self.NameToInfo = {} # Find file info given name 1336 self.filelist = [] # List of ZipInfo instances for archive 1337 self.compression = compression # Method of compression 1338 self.compresslevel = compresslevel 1339 self.mode = mode 1340 self.pwd = None 1341 self._comment = b'' 1342 self._strict_timestamps = strict_timestamps 1343 self.metadata_encoding = metadata_encoding 1344 1345 # Check that we don't try to write with nonconforming codecs 1346 if self.metadata_encoding and mode != 'r': 1347 raise ValueError( 1348 "metadata_encoding is only supported for reading files") 1349 1350 # Check if we were passed a file-like object 1351 if isinstance(file, os.PathLike): 1352 file = os.fspath(file) 1353 if isinstance(file, str): 1354 # No, it's a filename 1355 self._filePassed = 0 1356 self.filename = file 1357 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b', 1358 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'} 1359 filemode = modeDict[mode] 1360 while True: 1361 try: 1362 self.fp = io.open(file, filemode) 1363 except OSError: 1364 if filemode in modeDict: 1365 filemode = modeDict[filemode] 1366 continue 1367 raise 1368 break 1369 else: 1370 self._filePassed = 1 1371 self.fp = file 1372 self.filename = getattr(file, 'name', None) 1373 self._fileRefCnt = 1 1374 self._lock = threading.RLock() 1375 self._seekable = True 1376 self._writing = False 1377 1378 try: 1379 if mode == 'r': 1380 self._RealGetContents() 1381 elif mode in ('w', 'x'): 1382 # set the modified flag so central directory gets written 1383 # even if no files are added to the archive 1384 self._didModify = True 1385 try: 1386 self.start_dir = self.fp.tell() 1387 except (AttributeError, OSError): 1388 self.fp = _Tellable(self.fp) 1389 self.start_dir = 0 1390 self._seekable = False 1391 else: 1392 # Some file-like objects can provide tell() but not seek() 1393 try: 1394 self.fp.seek(self.start_dir) 1395 except (AttributeError, OSError): 1396 self._seekable = False 1397 elif mode == 'a': 1398 try: 1399 # See if file is a zip file 1400 self._RealGetContents() 1401 # seek to start of directory and overwrite 1402 self.fp.seek(self.start_dir) 1403 except BadZipFile: 1404 # file is not a zip file, just append 1405 self.fp.seek(0, 2) 1406 1407 # set the modified flag so central directory gets written 1408 # even if no files are added to the archive 1409 self._didModify = True 1410 self.start_dir = self.fp.tell() 1411 else: 1412 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") 1413 except: 1414 fp = self.fp 1415 self.fp = None 1416 self._fpclose(fp) 1417 raise 1418 1419 def __enter__(self): 1420 return self 1421 1422 def __exit__(self, type, value, traceback): 1423 self.close() 1424 1425 def __repr__(self): 1426 result = ['<%s.%s' % (self.__class__.__module__, 1427 self.__class__.__qualname__)] 1428 if self.fp is not None: 1429 if self._filePassed: 1430 result.append(' file=%r' % self.fp) 1431 elif self.filename is not None: 1432 result.append(' filename=%r' % self.filename) 1433 result.append(' mode=%r' % self.mode) 1434 else: 1435 result.append(' [closed]') 1436 result.append('>') 1437 return ''.join(result) 1438 1439 def _RealGetContents(self): 1440 """Read in the table of contents for the ZIP file.""" 1441 fp = self.fp 1442 try: 1443 endrec = _EndRecData(fp) 1444 except OSError: 1445 raise BadZipFile("File is not a zip file") 1446 if not endrec: 1447 raise BadZipFile("File is not a zip file") 1448 if self.debug > 1: 1449 print(endrec) 1450 size_cd = endrec[_ECD_SIZE] # bytes in central directory 1451 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 1452 self._comment = endrec[_ECD_COMMENT] # archive comment 1453 1454 # "concat" is zero, unless zip was concatenated to another file 1455 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 1456 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 1457 # If Zip64 extension structures are present, account for them 1458 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 1459 1460 if self.debug > 2: 1461 inferred = concat + offset_cd 1462 print("given, inferred, offset", offset_cd, inferred, concat) 1463 # self.start_dir: Position of start of central directory 1464 self.start_dir = offset_cd + concat 1465 if self.start_dir < 0: 1466 raise BadZipFile("Bad offset for central directory") 1467 fp.seek(self.start_dir, 0) 1468 data = fp.read(size_cd) 1469 fp = io.BytesIO(data) 1470 total = 0 1471 while total < size_cd: 1472 centdir = fp.read(sizeCentralDir) 1473 if len(centdir) != sizeCentralDir: 1474 raise BadZipFile("Truncated central directory") 1475 centdir = struct.unpack(structCentralDir, centdir) 1476 if centdir[_CD_SIGNATURE] != stringCentralDir: 1477 raise BadZipFile("Bad magic number for central directory") 1478 if self.debug > 2: 1479 print(centdir) 1480 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 1481 orig_filename_crc = crc32(filename) 1482 flags = centdir[_CD_FLAG_BITS] 1483 if flags & _MASK_UTF_FILENAME: 1484 # UTF-8 file names extension 1485 filename = filename.decode('utf-8') 1486 else: 1487 # Historical ZIP filename encoding 1488 filename = filename.decode(self.metadata_encoding or 'cp437') 1489 # Create ZipInfo instance to store file information 1490 x = ZipInfo(filename) 1491 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 1492 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 1493 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 1494 (x.create_version, x.create_system, x.extract_version, x.reserved, 1495 x.flag_bits, x.compress_type, t, d, 1496 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 1497 if x.extract_version > MAX_EXTRACT_VERSION: 1498 raise NotImplementedError("zip file version %.1f" % 1499 (x.extract_version / 10)) 1500 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 1501 # Convert date/time code to (year, month, day, hour, min, sec) 1502 x._raw_time = t 1503 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 1504 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 1505 x._decodeExtra(orig_filename_crc) 1506 x.header_offset = x.header_offset + concat 1507 self.filelist.append(x) 1508 self.NameToInfo[x.filename] = x 1509 1510 # update total bytes read from central directory 1511 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 1512 + centdir[_CD_EXTRA_FIELD_LENGTH] 1513 + centdir[_CD_COMMENT_LENGTH]) 1514 1515 if self.debug > 2: 1516 print("total", total) 1517 1518 end_offset = self.start_dir 1519 for zinfo in sorted(self.filelist, 1520 key=lambda zinfo: zinfo.header_offset, 1521 reverse=True): 1522 zinfo._end_offset = end_offset 1523 end_offset = zinfo.header_offset 1524 1525 def namelist(self): 1526 """Return a list of file names in the archive.""" 1527 return [data.filename for data in self.filelist] 1528 1529 def infolist(self): 1530 """Return a list of class ZipInfo instances for files in the 1531 archive.""" 1532 return self.filelist 1533 1534 def printdir(self, file=None): 1535 """Print a table of contents for the zip file.""" 1536 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), 1537 file=file) 1538 for zinfo in self.filelist: 1539 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 1540 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), 1541 file=file) 1542 1543 def testzip(self): 1544 """Read all the files and check the CRC. 1545 1546 Return None if all files could be read successfully, or the name 1547 of the offending file otherwise.""" 1548 chunk_size = 2 ** 20 1549 for zinfo in self.filelist: 1550 try: 1551 # Read by chunks, to avoid an OverflowError or a 1552 # MemoryError with very large embedded files. 1553 with self.open(zinfo.filename, "r") as f: 1554 while f.read(chunk_size): # Check CRC-32 1555 pass 1556 except BadZipFile: 1557 return zinfo.filename 1558 1559 def getinfo(self, name): 1560 """Return the instance of ZipInfo given 'name'.""" 1561 info = self.NameToInfo.get(name) 1562 if info is None: 1563 raise KeyError( 1564 'There is no item named %r in the archive' % name) 1565 1566 return info 1567 1568 def setpassword(self, pwd): 1569 """Set default password for encrypted files.""" 1570 if pwd and not isinstance(pwd, bytes): 1571 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1572 if pwd: 1573 self.pwd = pwd 1574 else: 1575 self.pwd = None 1576 1577 @property 1578 def comment(self): 1579 """The comment text associated with the ZIP file.""" 1580 return self._comment 1581 1582 @comment.setter 1583 def comment(self, comment): 1584 if not isinstance(comment, bytes): 1585 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) 1586 # check for valid comment length 1587 if len(comment) > ZIP_MAX_COMMENT: 1588 import warnings 1589 warnings.warn('Archive comment is too long; truncating to %d bytes' 1590 % ZIP_MAX_COMMENT, stacklevel=2) 1591 comment = comment[:ZIP_MAX_COMMENT] 1592 self._comment = comment 1593 self._didModify = True 1594 1595 def read(self, name, pwd=None): 1596 """Return file bytes for name. 'pwd' is the password to decrypt 1597 encrypted files.""" 1598 with self.open(name, "r", pwd) as fp: 1599 return fp.read() 1600 1601 def open(self, name, mode="r", pwd=None, *, force_zip64=False): 1602 """Return file-like object for 'name'. 1603 1604 name is a string for the file name within the ZIP file, or a ZipInfo 1605 object. 1606 1607 mode should be 'r' to read a file already in the ZIP file, or 'w' to 1608 write to a file newly added to the archive. 1609 1610 pwd is the password to decrypt files (only used for reading). 1611 1612 When writing, if the file size is not known in advance but may exceed 1613 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large 1614 files. If the size is known in advance, it is best to pass a ZipInfo 1615 instance for name, with zinfo.file_size set. 1616 """ 1617 if mode not in {"r", "w"}: 1618 raise ValueError('open() requires mode "r" or "w"') 1619 if pwd and (mode == "w"): 1620 raise ValueError("pwd is only supported for reading files") 1621 if not self.fp: 1622 raise ValueError( 1623 "Attempt to use ZIP archive that was already closed") 1624 1625 # Make sure we have an info object 1626 if isinstance(name, ZipInfo): 1627 # 'name' is already an info object 1628 zinfo = name 1629 elif mode == 'w': 1630 zinfo = ZipInfo(name) 1631 zinfo.compress_type = self.compression 1632 zinfo.compress_level = self.compresslevel 1633 else: 1634 # Get info object for name 1635 zinfo = self.getinfo(name) 1636 1637 if mode == 'w': 1638 return self._open_to_write(zinfo, force_zip64=force_zip64) 1639 1640 if self._writing: 1641 raise ValueError("Can't read from the ZIP file while there " 1642 "is an open writing handle on it. " 1643 "Close the writing handle before trying to read.") 1644 1645 # Open for reading: 1646 self._fileRefCnt += 1 1647 zef_file = _SharedFile(self.fp, zinfo.header_offset, 1648 self._fpclose, self._lock, lambda: self._writing) 1649 try: 1650 # Skip the file header: 1651 fheader = zef_file.read(sizeFileHeader) 1652 if len(fheader) != sizeFileHeader: 1653 raise BadZipFile("Truncated file header") 1654 fheader = struct.unpack(structFileHeader, fheader) 1655 if fheader[_FH_SIGNATURE] != stringFileHeader: 1656 raise BadZipFile("Bad magic number for file header") 1657 1658 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 1659 if fheader[_FH_EXTRA_FIELD_LENGTH]: 1660 zef_file.seek(fheader[_FH_EXTRA_FIELD_LENGTH], whence=1) 1661 1662 if zinfo.flag_bits & _MASK_COMPRESSED_PATCH: 1663 # Zip 2.7: compressed patched data 1664 raise NotImplementedError("compressed patched data (flag bit 5)") 1665 1666 if zinfo.flag_bits & _MASK_STRONG_ENCRYPTION: 1667 # strong encryption 1668 raise NotImplementedError("strong encryption (flag bit 6)") 1669 1670 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & _MASK_UTF_FILENAME: 1671 # UTF-8 filename 1672 fname_str = fname.decode("utf-8") 1673 else: 1674 fname_str = fname.decode(self.metadata_encoding or "cp437") 1675 1676 if fname_str != zinfo.orig_filename: 1677 raise BadZipFile( 1678 'File name in directory %r and header %r differ.' 1679 % (zinfo.orig_filename, fname)) 1680 1681 if (zinfo._end_offset is not None and 1682 zef_file.tell() + zinfo.compress_size > zinfo._end_offset): 1683 raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)") 1684 1685 # check for encrypted flag & handle password 1686 is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED 1687 if is_encrypted: 1688 if not pwd: 1689 pwd = self.pwd 1690 if pwd and not isinstance(pwd, bytes): 1691 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1692 if not pwd: 1693 raise RuntimeError("File %r is encrypted, password " 1694 "required for extraction" % name) 1695 else: 1696 pwd = None 1697 1698 return ZipExtFile(zef_file, mode + 'b', zinfo, pwd, True) 1699 except: 1700 zef_file.close() 1701 raise 1702 1703 def _open_to_write(self, zinfo, force_zip64=False): 1704 if force_zip64 and not self._allowZip64: 1705 raise ValueError( 1706 "force_zip64 is True, but allowZip64 was False when opening " 1707 "the ZIP file." 1708 ) 1709 if self._writing: 1710 raise ValueError("Can't write to the ZIP file while there is " 1711 "another write handle open on it. " 1712 "Close the first handle before opening another.") 1713 1714 # Size and CRC are overwritten with correct data after processing the file 1715 zinfo.compress_size = 0 1716 zinfo.CRC = 0 1717 1718 zinfo.flag_bits = 0x00 1719 if zinfo.compress_type == ZIP_LZMA: 1720 # Compressed data includes an end-of-stream (EOS) marker 1721 zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1 1722 if not self._seekable: 1723 zinfo.flag_bits |= _MASK_USE_DATA_DESCRIPTOR 1724 1725 if not zinfo.external_attr: 1726 zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- 1727 1728 # Compressed size can be larger than uncompressed size 1729 zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT) 1730 if not self._allowZip64 and zip64: 1731 raise LargeZipFile("Filesize would require ZIP64 extensions") 1732 1733 if self._seekable: 1734 self.fp.seek(self.start_dir) 1735 zinfo.header_offset = self.fp.tell() 1736 1737 self._writecheck(zinfo) 1738 self._didModify = True 1739 1740 self.fp.write(zinfo.FileHeader(zip64)) 1741 1742 self._writing = True 1743 return _ZipWriteFile(self, zinfo, zip64) 1744 1745 def extract(self, member, path=None, pwd=None): 1746 """Extract a member from the archive to the current working directory, 1747 using its full name. Its file information is extracted as accurately 1748 as possible. `member' may be a filename or a ZipInfo object. You can 1749 specify a different directory using `path'. You can specify the 1750 password to decrypt the file using 'pwd'. 1751 """ 1752 if path is None: 1753 path = os.getcwd() 1754 else: 1755 path = os.fspath(path) 1756 1757 return self._extract_member(member, path, pwd) 1758 1759 def extractall(self, path=None, members=None, pwd=None): 1760 """Extract all members from the archive to the current working 1761 directory. `path' specifies a different directory to extract to. 1762 `members' is optional and must be a subset of the list returned 1763 by namelist(). You can specify the password to decrypt all files 1764 using 'pwd'. 1765 """ 1766 if members is None: 1767 members = self.namelist() 1768 1769 if path is None: 1770 path = os.getcwd() 1771 else: 1772 path = os.fspath(path) 1773 1774 for zipinfo in members: 1775 self._extract_member(zipinfo, path, pwd) 1776 1777 @classmethod 1778 def _sanitize_windows_name(cls, arcname, pathsep): 1779 """Replace bad characters and remove trailing dots from parts.""" 1780 table = cls._windows_illegal_name_trans_table 1781 if not table: 1782 illegal = ':<>|"?*' 1783 table = str.maketrans(illegal, '_' * len(illegal)) 1784 cls._windows_illegal_name_trans_table = table 1785 arcname = arcname.translate(table) 1786 # remove trailing dots and spaces 1787 arcname = (x.rstrip(' .') for x in arcname.split(pathsep)) 1788 # rejoin, removing empty parts. 1789 arcname = pathsep.join(x for x in arcname if x) 1790 return arcname 1791 1792 def _extract_member(self, member, targetpath, pwd): 1793 """Extract the ZipInfo object 'member' to a physical 1794 file on the path targetpath. 1795 """ 1796 if not isinstance(member, ZipInfo): 1797 member = self.getinfo(member) 1798 1799 # build the destination pathname, replacing 1800 # forward slashes to platform specific separators. 1801 arcname = member.filename.replace('/', os.path.sep) 1802 1803 if os.path.altsep: 1804 arcname = arcname.replace(os.path.altsep, os.path.sep) 1805 # interpret absolute pathname as relative, remove drive letter or 1806 # UNC path, redundant separators, "." and ".." components. 1807 arcname = os.path.splitdrive(arcname)[1] 1808 invalid_path_parts = ('', os.path.curdir, os.path.pardir) 1809 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1810 if x not in invalid_path_parts) 1811 if os.path.sep == '\\': 1812 # filter illegal characters on Windows 1813 arcname = self._sanitize_windows_name(arcname, os.path.sep) 1814 1815 if not arcname and not member.is_dir(): 1816 raise ValueError("Empty filename.") 1817 1818 targetpath = os.path.join(targetpath, arcname) 1819 targetpath = os.path.normpath(targetpath) 1820 1821 # Create all upper directories if necessary. 1822 upperdirs = os.path.dirname(targetpath) 1823 if upperdirs and not os.path.exists(upperdirs): 1824 os.makedirs(upperdirs, exist_ok=True) 1825 1826 if member.is_dir(): 1827 if not os.path.isdir(targetpath): 1828 try: 1829 os.mkdir(targetpath) 1830 except FileExistsError: 1831 if not os.path.isdir(targetpath): 1832 raise 1833 return targetpath 1834 1835 with self.open(member, pwd=pwd) as source, \ 1836 open(targetpath, "wb") as target: 1837 shutil.copyfileobj(source, target) 1838 1839 return targetpath 1840 1841 def _writecheck(self, zinfo): 1842 """Check for errors before writing a file to the archive.""" 1843 if zinfo.filename in self.NameToInfo: 1844 import warnings 1845 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) 1846 if self.mode not in ('w', 'x', 'a'): 1847 raise ValueError("write() requires mode 'w', 'x', or 'a'") 1848 if not self.fp: 1849 raise ValueError( 1850 "Attempt to write ZIP archive that was already closed") 1851 _check_compression(zinfo.compress_type) 1852 if not self._allowZip64: 1853 requires_zip64 = None 1854 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT: 1855 requires_zip64 = "Files count" 1856 elif zinfo.file_size > ZIP64_LIMIT: 1857 requires_zip64 = "Filesize" 1858 elif zinfo.header_offset > ZIP64_LIMIT: 1859 requires_zip64 = "Zipfile size" 1860 if requires_zip64: 1861 raise LargeZipFile(requires_zip64 + 1862 " would require ZIP64 extensions") 1863 1864 def write(self, filename, arcname=None, 1865 compress_type=None, compresslevel=None): 1866 """Put the bytes from filename into the archive under the name 1867 arcname.""" 1868 if not self.fp: 1869 raise ValueError( 1870 "Attempt to write to ZIP archive that was already closed") 1871 if self._writing: 1872 raise ValueError( 1873 "Can't write to ZIP archive while an open writing handle exists" 1874 ) 1875 1876 zinfo = ZipInfo.from_file(filename, arcname, 1877 strict_timestamps=self._strict_timestamps) 1878 1879 if zinfo.is_dir(): 1880 zinfo.compress_size = 0 1881 zinfo.CRC = 0 1882 self.mkdir(zinfo) 1883 else: 1884 if compress_type is not None: 1885 zinfo.compress_type = compress_type 1886 else: 1887 zinfo.compress_type = self.compression 1888 1889 if compresslevel is not None: 1890 zinfo.compress_level = compresslevel 1891 else: 1892 zinfo.compress_level = self.compresslevel 1893 1894 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: 1895 shutil.copyfileobj(src, dest, 1024*8) 1896 1897 def writestr(self, zinfo_or_arcname, data, 1898 compress_type=None, compresslevel=None): 1899 """Write a file into the archive. The contents is 'data', which 1900 may be either a 'str' or a 'bytes' instance; if it is a 'str', 1901 it is encoded as UTF-8 first. 1902 'zinfo_or_arcname' is either a ZipInfo instance or 1903 the name of the file in the archive.""" 1904 if isinstance(data, str): 1905 data = data.encode("utf-8") 1906 if not isinstance(zinfo_or_arcname, ZipInfo): 1907 zinfo = ZipInfo(filename=zinfo_or_arcname, 1908 date_time=time.localtime(time.time())[:6]) 1909 zinfo.compress_type = self.compression 1910 zinfo.compress_level = self.compresslevel 1911 if zinfo.filename.endswith('/'): 1912 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x 1913 zinfo.external_attr |= 0x10 # MS-DOS directory flag 1914 else: 1915 zinfo.external_attr = 0o600 << 16 # ?rw------- 1916 else: 1917 zinfo = zinfo_or_arcname 1918 1919 if not self.fp: 1920 raise ValueError( 1921 "Attempt to write to ZIP archive that was already closed") 1922 if self._writing: 1923 raise ValueError( 1924 "Can't write to ZIP archive while an open writing handle exists." 1925 ) 1926 1927 if compress_type is not None: 1928 zinfo.compress_type = compress_type 1929 1930 if compresslevel is not None: 1931 zinfo.compress_level = compresslevel 1932 1933 zinfo.file_size = len(data) # Uncompressed size 1934 with self._lock: 1935 with self.open(zinfo, mode='w') as dest: 1936 dest.write(data) 1937 1938 def mkdir(self, zinfo_or_directory_name, mode=511): 1939 """Creates a directory inside the zip archive.""" 1940 if isinstance(zinfo_or_directory_name, ZipInfo): 1941 zinfo = zinfo_or_directory_name 1942 if not zinfo.is_dir(): 1943 raise ValueError("The given ZipInfo does not describe a directory") 1944 elif isinstance(zinfo_or_directory_name, str): 1945 directory_name = zinfo_or_directory_name 1946 if not directory_name.endswith("/"): 1947 directory_name += "/" 1948 zinfo = ZipInfo(directory_name) 1949 zinfo.compress_size = 0 1950 zinfo.CRC = 0 1951 zinfo.external_attr = ((0o40000 | mode) & 0xFFFF) << 16 1952 zinfo.file_size = 0 1953 zinfo.external_attr |= 0x10 1954 else: 1955 raise TypeError("Expected type str or ZipInfo") 1956 1957 with self._lock: 1958 if self._seekable: 1959 self.fp.seek(self.start_dir) 1960 zinfo.header_offset = self.fp.tell() # Start of header bytes 1961 if zinfo.compress_type == ZIP_LZMA: 1962 # Compressed data includes an end-of-stream (EOS) marker 1963 zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1 1964 1965 self._writecheck(zinfo) 1966 self._didModify = True 1967 1968 self.filelist.append(zinfo) 1969 self.NameToInfo[zinfo.filename] = zinfo 1970 self.fp.write(zinfo.FileHeader(False)) 1971 self.start_dir = self.fp.tell() 1972 1973 def __del__(self): 1974 """Call the "close()" method in case the user forgot.""" 1975 self.close() 1976 1977 def close(self): 1978 """Close the file, and for mode 'w', 'x' and 'a' write the ending 1979 records.""" 1980 if self.fp is None: 1981 return 1982 1983 if self._writing: 1984 raise ValueError("Can't close the ZIP file while there is " 1985 "an open writing handle on it. " 1986 "Close the writing handle before closing the zip.") 1987 1988 try: 1989 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records 1990 with self._lock: 1991 if self._seekable: 1992 self.fp.seek(self.start_dir) 1993 self._write_end_record() 1994 finally: 1995 fp = self.fp 1996 self.fp = None 1997 self._fpclose(fp) 1998 1999 def _write_end_record(self): 2000 for zinfo in self.filelist: # write central directory 2001 dt = zinfo.date_time 2002 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 2003 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 2004 extra = [] 2005 if zinfo.file_size > ZIP64_LIMIT \ 2006 or zinfo.compress_size > ZIP64_LIMIT: 2007 extra.append(zinfo.file_size) 2008 extra.append(zinfo.compress_size) 2009 file_size = 0xffffffff 2010 compress_size = 0xffffffff 2011 else: 2012 file_size = zinfo.file_size 2013 compress_size = zinfo.compress_size 2014 2015 if zinfo.header_offset > ZIP64_LIMIT: 2016 extra.append(zinfo.header_offset) 2017 header_offset = 0xffffffff 2018 else: 2019 header_offset = zinfo.header_offset 2020 2021 extra_data = zinfo.extra 2022 min_version = 0 2023 if extra: 2024 # Append a ZIP64 field to the extra's 2025 extra_data = _Extra.strip(extra_data, (1,)) 2026 extra_data = struct.pack( 2027 '<HH' + 'Q'*len(extra), 2028 1, 8*len(extra), *extra) + extra_data 2029 2030 min_version = ZIP64_VERSION 2031 2032 if zinfo.compress_type == ZIP_BZIP2: 2033 min_version = max(BZIP2_VERSION, min_version) 2034 elif zinfo.compress_type == ZIP_LZMA: 2035 min_version = max(LZMA_VERSION, min_version) 2036 2037 extract_version = max(min_version, zinfo.extract_version) 2038 create_version = max(min_version, zinfo.create_version) 2039 filename, flag_bits = zinfo._encodeFilenameFlags() 2040 centdir = struct.pack(structCentralDir, 2041 stringCentralDir, create_version, 2042 zinfo.create_system, extract_version, zinfo.reserved, 2043 flag_bits, zinfo.compress_type, dostime, dosdate, 2044 zinfo.CRC, compress_size, file_size, 2045 len(filename), len(extra_data), len(zinfo.comment), 2046 0, zinfo.internal_attr, zinfo.external_attr, 2047 header_offset) 2048 self.fp.write(centdir) 2049 self.fp.write(filename) 2050 self.fp.write(extra_data) 2051 self.fp.write(zinfo.comment) 2052 2053 pos2 = self.fp.tell() 2054 # Write end-of-zip-archive record 2055 centDirCount = len(self.filelist) 2056 centDirSize = pos2 - self.start_dir 2057 centDirOffset = self.start_dir 2058 requires_zip64 = None 2059 if centDirCount > ZIP_FILECOUNT_LIMIT: 2060 requires_zip64 = "Files count" 2061 elif centDirOffset > ZIP64_LIMIT: 2062 requires_zip64 = "Central directory offset" 2063 elif centDirSize > ZIP64_LIMIT: 2064 requires_zip64 = "Central directory size" 2065 if requires_zip64: 2066 # Need to write the ZIP64 end-of-archive records 2067 if not self._allowZip64: 2068 raise LargeZipFile(requires_zip64 + 2069 " would require ZIP64 extensions") 2070 zip64endrec = struct.pack( 2071 structEndArchive64, stringEndArchive64, 2072 44, 45, 45, 0, 0, centDirCount, centDirCount, 2073 centDirSize, centDirOffset) 2074 self.fp.write(zip64endrec) 2075 2076 zip64locrec = struct.pack( 2077 structEndArchive64Locator, 2078 stringEndArchive64Locator, 0, pos2, 1) 2079 self.fp.write(zip64locrec) 2080 centDirCount = min(centDirCount, 0xFFFF) 2081 centDirSize = min(centDirSize, 0xFFFFFFFF) 2082 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 2083 2084 endrec = struct.pack(structEndArchive, stringEndArchive, 2085 0, 0, centDirCount, centDirCount, 2086 centDirSize, centDirOffset, len(self._comment)) 2087 self.fp.write(endrec) 2088 self.fp.write(self._comment) 2089 if self.mode == "a": 2090 self.fp.truncate() 2091 self.fp.flush() 2092 2093 def _fpclose(self, fp): 2094 assert self._fileRefCnt > 0 2095 self._fileRefCnt -= 1 2096 if not self._fileRefCnt and not self._filePassed: 2097 fp.close() 2098 2099 2100class PyZipFile(ZipFile): 2101 """Class to create ZIP archives with Python library files and packages.""" 2102 2103 def __init__(self, file, mode="r", compression=ZIP_STORED, 2104 allowZip64=True, optimize=-1): 2105 ZipFile.__init__(self, file, mode=mode, compression=compression, 2106 allowZip64=allowZip64) 2107 self._optimize = optimize 2108 2109 def writepy(self, pathname, basename="", filterfunc=None): 2110 """Add all files from "pathname" to the ZIP archive. 2111 2112 If pathname is a package directory, search the directory and 2113 all package subdirectories recursively for all *.py and enter 2114 the modules into the archive. If pathname is a plain 2115 directory, listdir *.py and enter all modules. Else, pathname 2116 must be a Python *.py file and the module will be put into the 2117 archive. Added modules are always module.pyc. 2118 This method will compile the module.py into module.pyc if 2119 necessary. 2120 If filterfunc(pathname) is given, it is called with every argument. 2121 When it is False, the file or directory is skipped. 2122 """ 2123 pathname = os.fspath(pathname) 2124 if filterfunc and not filterfunc(pathname): 2125 if self.debug: 2126 label = 'path' if os.path.isdir(pathname) else 'file' 2127 print('%s %r skipped by filterfunc' % (label, pathname)) 2128 return 2129 dir, name = os.path.split(pathname) 2130 if os.path.isdir(pathname): 2131 initname = os.path.join(pathname, "__init__.py") 2132 if os.path.isfile(initname): 2133 # This is a package directory, add it 2134 if basename: 2135 basename = "%s/%s" % (basename, name) 2136 else: 2137 basename = name 2138 if self.debug: 2139 print("Adding package in", pathname, "as", basename) 2140 fname, arcname = self._get_codename(initname[0:-3], basename) 2141 if self.debug: 2142 print("Adding", arcname) 2143 self.write(fname, arcname) 2144 dirlist = sorted(os.listdir(pathname)) 2145 dirlist.remove("__init__.py") 2146 # Add all *.py files and package subdirectories 2147 for filename in dirlist: 2148 path = os.path.join(pathname, filename) 2149 root, ext = os.path.splitext(filename) 2150 if os.path.isdir(path): 2151 if os.path.isfile(os.path.join(path, "__init__.py")): 2152 # This is a package directory, add it 2153 self.writepy(path, basename, 2154 filterfunc=filterfunc) # Recursive call 2155 elif ext == ".py": 2156 if filterfunc and not filterfunc(path): 2157 if self.debug: 2158 print('file %r skipped by filterfunc' % path) 2159 continue 2160 fname, arcname = self._get_codename(path[0:-3], 2161 basename) 2162 if self.debug: 2163 print("Adding", arcname) 2164 self.write(fname, arcname) 2165 else: 2166 # This is NOT a package directory, add its files at top level 2167 if self.debug: 2168 print("Adding files from directory", pathname) 2169 for filename in sorted(os.listdir(pathname)): 2170 path = os.path.join(pathname, filename) 2171 root, ext = os.path.splitext(filename) 2172 if ext == ".py": 2173 if filterfunc and not filterfunc(path): 2174 if self.debug: 2175 print('file %r skipped by filterfunc' % path) 2176 continue 2177 fname, arcname = self._get_codename(path[0:-3], 2178 basename) 2179 if self.debug: 2180 print("Adding", arcname) 2181 self.write(fname, arcname) 2182 else: 2183 if pathname[-3:] != ".py": 2184 raise RuntimeError( 2185 'Files added with writepy() must end with ".py"') 2186 fname, arcname = self._get_codename(pathname[0:-3], basename) 2187 if self.debug: 2188 print("Adding file", arcname) 2189 self.write(fname, arcname) 2190 2191 def _get_codename(self, pathname, basename): 2192 """Return (filename, archivename) for the path. 2193 2194 Given a module name path, return the correct file path and 2195 archive name, compiling if necessary. For example, given 2196 /python/lib/string, return (/python/lib/string.pyc, string). 2197 """ 2198 def _compile(file, optimize=-1): 2199 import py_compile 2200 if self.debug: 2201 print("Compiling", file) 2202 try: 2203 py_compile.compile(file, doraise=True, optimize=optimize) 2204 except py_compile.PyCompileError as err: 2205 print(err.msg) 2206 return False 2207 return True 2208 2209 file_py = pathname + ".py" 2210 file_pyc = pathname + ".pyc" 2211 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='') 2212 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1) 2213 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2) 2214 if self._optimize == -1: 2215 # legacy mode: use whatever file is present 2216 if (os.path.isfile(file_pyc) and 2217 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): 2218 # Use .pyc file. 2219 arcname = fname = file_pyc 2220 elif (os.path.isfile(pycache_opt0) and 2221 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): 2222 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2223 # file name in the archive. 2224 fname = pycache_opt0 2225 arcname = file_pyc 2226 elif (os.path.isfile(pycache_opt1) and 2227 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): 2228 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2229 # file name in the archive. 2230 fname = pycache_opt1 2231 arcname = file_pyc 2232 elif (os.path.isfile(pycache_opt2) and 2233 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): 2234 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2235 # file name in the archive. 2236 fname = pycache_opt2 2237 arcname = file_pyc 2238 else: 2239 # Compile py into PEP 3147 pyc file. 2240 if _compile(file_py): 2241 if sys.flags.optimize == 0: 2242 fname = pycache_opt0 2243 elif sys.flags.optimize == 1: 2244 fname = pycache_opt1 2245 else: 2246 fname = pycache_opt2 2247 arcname = file_pyc 2248 else: 2249 fname = arcname = file_py 2250 else: 2251 # new mode: use given optimization level 2252 if self._optimize == 0: 2253 fname = pycache_opt0 2254 arcname = file_pyc 2255 else: 2256 arcname = file_pyc 2257 if self._optimize == 1: 2258 fname = pycache_opt1 2259 elif self._optimize == 2: 2260 fname = pycache_opt2 2261 else: 2262 msg = "invalid value for 'optimize': {!r}".format(self._optimize) 2263 raise ValueError(msg) 2264 if not (os.path.isfile(fname) and 2265 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): 2266 if not _compile(file_py, optimize=self._optimize): 2267 fname = arcname = file_py 2268 archivename = os.path.split(arcname)[1] 2269 if basename: 2270 archivename = "%s/%s" % (basename, archivename) 2271 return (fname, archivename) 2272 2273 2274def main(args=None): 2275 import argparse 2276 2277 description = 'A simple command-line interface for zipfile module.' 2278 parser = argparse.ArgumentParser(description=description) 2279 group = parser.add_mutually_exclusive_group(required=True) 2280 group.add_argument('-l', '--list', metavar='<zipfile>', 2281 help='Show listing of a zipfile') 2282 group.add_argument('-e', '--extract', nargs=2, 2283 metavar=('<zipfile>', '<output_dir>'), 2284 help='Extract zipfile into target dir') 2285 group.add_argument('-c', '--create', nargs='+', 2286 metavar=('<name>', '<file>'), 2287 help='Create zipfile from sources') 2288 group.add_argument('-t', '--test', metavar='<zipfile>', 2289 help='Test if a zipfile is valid') 2290 parser.add_argument('--metadata-encoding', metavar='<encoding>', 2291 help='Specify encoding of member names for -l, -e and -t') 2292 args = parser.parse_args(args) 2293 2294 encoding = args.metadata_encoding 2295 2296 if args.test is not None: 2297 src = args.test 2298 with ZipFile(src, 'r', metadata_encoding=encoding) as zf: 2299 badfile = zf.testzip() 2300 if badfile: 2301 print("The following enclosed file is corrupted: {!r}".format(badfile)) 2302 print("Done testing") 2303 2304 elif args.list is not None: 2305 src = args.list 2306 with ZipFile(src, 'r', metadata_encoding=encoding) as zf: 2307 zf.printdir() 2308 2309 elif args.extract is not None: 2310 src, curdir = args.extract 2311 with ZipFile(src, 'r', metadata_encoding=encoding) as zf: 2312 zf.extractall(curdir) 2313 2314 elif args.create is not None: 2315 if encoding: 2316 print("Non-conforming encodings not supported with -c.", 2317 file=sys.stderr) 2318 sys.exit(1) 2319 2320 zip_name = args.create.pop(0) 2321 files = args.create 2322 2323 def addToZip(zf, path, zippath): 2324 if os.path.isfile(path): 2325 zf.write(path, zippath, ZIP_DEFLATED) 2326 elif os.path.isdir(path): 2327 if zippath: 2328 zf.write(path, zippath) 2329 for nm in sorted(os.listdir(path)): 2330 addToZip(zf, 2331 os.path.join(path, nm), os.path.join(zippath, nm)) 2332 # else: ignore 2333 2334 with ZipFile(zip_name, 'w') as zf: 2335 for path in files: 2336 zippath = os.path.basename(path) 2337 if not zippath: 2338 zippath = os.path.basename(os.path.dirname(path)) 2339 if zippath in ('', os.curdir, os.pardir): 2340 zippath = '' 2341 addToZip(zf, path, zippath) 2342 2343 2344from ._path import ( # noqa: E402 2345 Path, 2346 2347 # used privately for tests 2348 CompleteDirs, # noqa: F401 2349) 2350