1""" 2Read and write ZIP files. 3""" 4import struct, os, time, sys, shutil 5import binascii, cStringIO, stat 6import io 7import re 8 9try: 10 import zlib # We may need its compression method 11 crc32 = zlib.crc32 12except ImportError: 13 zlib = None 14 crc32 = binascii.crc32 15 16__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile", 17 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ] 18 19class BadZipfile(Exception): 20 pass 21 22 23class LargeZipFile(Exception): 24 """ 25 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 26 and those extensions are disabled. 27 """ 28 29error = BadZipfile # The exception raised by this module 30 31ZIP64_LIMIT = (1 << 31) - 1 32ZIP_FILECOUNT_LIMIT = 1 << 16 33ZIP_MAX_COMMENT = (1 << 16) - 1 34 35# constants for Zip file compression methods 36ZIP_STORED = 0 37ZIP_DEFLATED = 8 38# Other ZIP compression methods not supported 39 40# Below are some formats and associated data for reading/writing headers using 41# the struct module. The names and structures of headers/records are those used 42# in the PKWARE description of the ZIP file format: 43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 44# (URL valid as of January 2008) 45 46# The "end of central directory" structure, magic number, size, and indices 47# (section V.I in the format document) 48structEndArchive = "<4s4H2LH" 49stringEndArchive = "PK\005\006" 50sizeEndCentDir = struct.calcsize(structEndArchive) 51 52_ECD_SIGNATURE = 0 53_ECD_DISK_NUMBER = 1 54_ECD_DISK_START = 2 55_ECD_ENTRIES_THIS_DISK = 3 56_ECD_ENTRIES_TOTAL = 4 57_ECD_SIZE = 5 58_ECD_OFFSET = 6 59_ECD_COMMENT_SIZE = 7 60# These last two indices are not part of the structure as defined in the 61# spec, but they are used internally by this module as a convenience 62_ECD_COMMENT = 8 63_ECD_LOCATION = 9 64 65# The "central directory" structure, magic number, size, and indices 66# of entries in the structure (section V.F in the format document) 67structCentralDir = "<4s4B4HL2L5H2L" 68stringCentralDir = "PK\001\002" 69sizeCentralDir = struct.calcsize(structCentralDir) 70 71# indexes of entries in the central directory structure 72_CD_SIGNATURE = 0 73_CD_CREATE_VERSION = 1 74_CD_CREATE_SYSTEM = 2 75_CD_EXTRACT_VERSION = 3 76_CD_EXTRACT_SYSTEM = 4 77_CD_FLAG_BITS = 5 78_CD_COMPRESS_TYPE = 6 79_CD_TIME = 7 80_CD_DATE = 8 81_CD_CRC = 9 82_CD_COMPRESSED_SIZE = 10 83_CD_UNCOMPRESSED_SIZE = 11 84_CD_FILENAME_LENGTH = 12 85_CD_EXTRA_FIELD_LENGTH = 13 86_CD_COMMENT_LENGTH = 14 87_CD_DISK_NUMBER_START = 15 88_CD_INTERNAL_FILE_ATTRIBUTES = 16 89_CD_EXTERNAL_FILE_ATTRIBUTES = 17 90_CD_LOCAL_HEADER_OFFSET = 18 91 92# The "local file header" structure, magic number, size, and indices 93# (section V.A in the format document) 94structFileHeader = "<4s2B4HL2L2H" 95stringFileHeader = "PK\003\004" 96sizeFileHeader = struct.calcsize(structFileHeader) 97 98_FH_SIGNATURE = 0 99_FH_EXTRACT_VERSION = 1 100_FH_EXTRACT_SYSTEM = 2 101_FH_GENERAL_PURPOSE_FLAG_BITS = 3 102_FH_COMPRESSION_METHOD = 4 103_FH_LAST_MOD_TIME = 5 104_FH_LAST_MOD_DATE = 6 105_FH_CRC = 7 106_FH_COMPRESSED_SIZE = 8 107_FH_UNCOMPRESSED_SIZE = 9 108_FH_FILENAME_LENGTH = 10 109_FH_EXTRA_FIELD_LENGTH = 11 110 111# The "Zip64 end of central directory locator" structure, magic number, and size 112structEndArchive64Locator = "<4sLQL" 113stringEndArchive64Locator = "PK\x06\x07" 114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 115 116# The "Zip64 end of central directory" record, magic number, size, and indices 117# (section V.G in the format document) 118structEndArchive64 = "<4sQ2H2L4Q" 119stringEndArchive64 = "PK\x06\x06" 120sizeEndCentDir64 = struct.calcsize(structEndArchive64) 121 122_CD64_SIGNATURE = 0 123_CD64_DIRECTORY_RECSIZE = 1 124_CD64_CREATE_VERSION = 2 125_CD64_EXTRACT_VERSION = 3 126_CD64_DISK_NUMBER = 4 127_CD64_DISK_NUMBER_START = 5 128_CD64_NUMBER_ENTRIES_THIS_DISK = 6 129_CD64_NUMBER_ENTRIES_TOTAL = 7 130_CD64_DIRECTORY_SIZE = 8 131_CD64_OFFSET_START_CENTDIR = 9 132 133def _check_zipfile(fp): 134 try: 135 if _EndRecData(fp): 136 return True # file has correct magic number 137 except IOError: 138 pass 139 return False 140 141def is_zipfile(filename): 142 """Quickly see if a file is a ZIP file by checking the magic number. 143 144 The filename argument may be a file or file-like object too. 145 """ 146 result = False 147 try: 148 if hasattr(filename, "read"): 149 result = _check_zipfile(fp=filename) 150 else: 151 with open(filename, "rb") as fp: 152 result = _check_zipfile(fp) 153 except IOError: 154 pass 155 return result 156 157def _EndRecData64(fpin, offset, endrec): 158 """ 159 Read the ZIP64 end-of-archive records and use that to update endrec 160 """ 161 try: 162 fpin.seek(offset - sizeEndCentDir64Locator, 2) 163 except IOError: 164 # If the seek fails, the file is not large enough to contain a ZIP64 165 # end-of-archive record, so just return the end record we were given. 166 return endrec 167 168 data = fpin.read(sizeEndCentDir64Locator) 169 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 170 if sig != stringEndArchive64Locator: 171 return endrec 172 173 if diskno != 0 or disks != 1: 174 raise BadZipfile("zipfiles that span multiple disks are not supported") 175 176 # Assume no 'zip64 extensible data' 177 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 178 data = fpin.read(sizeEndCentDir64) 179 sig, sz, create_version, read_version, disk_num, disk_dir, \ 180 dircount, dircount2, dirsize, diroffset = \ 181 struct.unpack(structEndArchive64, data) 182 if sig != stringEndArchive64: 183 return endrec 184 185 # Update the original endrec using data from the ZIP64 record 186 endrec[_ECD_SIGNATURE] = sig 187 endrec[_ECD_DISK_NUMBER] = disk_num 188 endrec[_ECD_DISK_START] = disk_dir 189 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 190 endrec[_ECD_ENTRIES_TOTAL] = dircount2 191 endrec[_ECD_SIZE] = dirsize 192 endrec[_ECD_OFFSET] = diroffset 193 return endrec 194 195 196def _EndRecData(fpin): 197 """Return data from the "End of Central Directory" record, or None. 198 199 The data is a list of the nine items in the ZIP "End of central dir" 200 record followed by a tenth item, the file seek offset of this record.""" 201 202 # Determine file size 203 fpin.seek(0, 2) 204 filesize = fpin.tell() 205 206 # Check to see if this is ZIP file with no archive comment (the 207 # "end of central directory" structure should be the last item in the 208 # file if this is the case). 209 try: 210 fpin.seek(-sizeEndCentDir, 2) 211 except IOError: 212 return None 213 data = fpin.read() 214 if data[0:4] == stringEndArchive and data[-2:] == "\000\000": 215 # the signature is correct and there's no comment, unpack structure 216 endrec = struct.unpack(structEndArchive, data) 217 endrec=list(endrec) 218 219 # Append a blank comment and record start offset 220 endrec.append("") 221 endrec.append(filesize - sizeEndCentDir) 222 223 # Try to read the "Zip64 end of central directory" structure 224 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 225 226 # Either this is not a ZIP file, or it is a ZIP file with an archive 227 # comment. Search the end of the file for the "end of central directory" 228 # record signature. The comment is the last item in the ZIP file and may be 229 # up to 64K long. It is assumed that the "end of central directory" magic 230 # number does not appear in the comment. 231 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 232 fpin.seek(maxCommentStart, 0) 233 data = fpin.read() 234 start = data.rfind(stringEndArchive) 235 if start >= 0: 236 # found the magic number; attempt to unpack and interpret 237 recData = data[start:start+sizeEndCentDir] 238 endrec = list(struct.unpack(structEndArchive, recData)) 239 comment = data[start+sizeEndCentDir:] 240 # check that comment length is correct 241 if endrec[_ECD_COMMENT_SIZE] == len(comment): 242 # Append the archive comment and start offset 243 endrec.append(comment) 244 endrec.append(maxCommentStart + start) 245 246 # Try to read the "Zip64 end of central directory" structure 247 return _EndRecData64(fpin, maxCommentStart + start - filesize, 248 endrec) 249 250 # Unable to find a valid end of central directory structure 251 return 252 253 254class ZipInfo (object): 255 """Class with attributes describing each file in the ZIP archive.""" 256 257 __slots__ = ( 258 'orig_filename', 259 'filename', 260 'date_time', 261 'compress_type', 262 'comment', 263 'extra', 264 'create_system', 265 'create_version', 266 'extract_version', 267 'reserved', 268 'flag_bits', 269 'volume', 270 'internal_attr', 271 'external_attr', 272 'header_offset', 273 'CRC', 274 'compress_size', 275 'file_size', 276 '_raw_time', 277 ) 278 279 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 280 self.orig_filename = filename # Original file name in archive 281 282 # Terminate the file name at the first null byte. Null bytes in file 283 # names are used as tricks by viruses in archives. 284 null_byte = filename.find(chr(0)) 285 if null_byte >= 0: 286 filename = filename[0:null_byte] 287 # This is used to ensure paths in generated ZIP files always use 288 # forward slashes as the directory separator, as required by the 289 # ZIP format specification. 290 if os.sep != "/" and os.sep in filename: 291 filename = filename.replace(os.sep, "/") 292 293 self.filename = filename # Normalized file name 294 self.date_time = date_time # year, month, day, hour, min, sec 295 # Standard values: 296 self.compress_type = ZIP_STORED # Type of compression for the file 297 self.comment = "" # Comment for each file 298 self.extra = "" # ZIP extra data 299 if sys.platform == 'win32': 300 self.create_system = 0 # System which created ZIP archive 301 else: 302 # Assume everything else is unix-y 303 self.create_system = 3 # System which created ZIP archive 304 self.create_version = 20 # Version which created ZIP archive 305 self.extract_version = 20 # Version needed to extract archive 306 self.reserved = 0 # Must be zero 307 self.flag_bits = 0 # ZIP flag bits 308 self.volume = 0 # Volume number of file header 309 self.internal_attr = 0 # Internal attributes 310 self.external_attr = 0 # External file attributes 311 # Other attributes are set by class ZipFile: 312 # header_offset Byte offset to the file header 313 # CRC CRC-32 of the uncompressed file 314 # compress_size Size of the compressed file 315 # file_size Size of the uncompressed file 316 317 def FileHeader(self): 318 """Return the per-file header as a string.""" 319 dt = self.date_time 320 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 321 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 322 if self.flag_bits & 0x08: 323 # Set these to zero because we write them after the file data 324 CRC = compress_size = file_size = 0 325 else: 326 CRC = self.CRC 327 compress_size = self.compress_size 328 file_size = self.file_size 329 330 extra = self.extra 331 332 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 333 # File is larger than what fits into a 4 byte integer, 334 # fall back to the ZIP64 extension 335 fmt = '<HHQQ' 336 extra = extra + struct.pack(fmt, 337 1, struct.calcsize(fmt)-4, file_size, compress_size) 338 file_size = 0xffffffff 339 compress_size = 0xffffffff 340 self.extract_version = max(45, self.extract_version) 341 self.create_version = max(45, self.extract_version) 342 343 filename, flag_bits = self._encodeFilenameFlags() 344 header = struct.pack(structFileHeader, stringFileHeader, 345 self.extract_version, self.reserved, flag_bits, 346 self.compress_type, dostime, dosdate, CRC, 347 compress_size, file_size, 348 len(filename), len(extra)) 349 return header + filename + extra 350 351 def _encodeFilenameFlags(self): 352 if isinstance(self.filename, unicode): 353 try: 354 return self.filename.encode('ascii'), self.flag_bits 355 except UnicodeEncodeError: 356 return self.filename.encode('utf-8'), self.flag_bits | 0x800 357 else: 358 return self.filename, self.flag_bits 359 360 def _decodeFilename(self): 361 if self.flag_bits & 0x800: 362 return self.filename.decode('utf-8') 363 else: 364 return self.filename 365 366 def _decodeExtra(self): 367 # Try to decode the extra field. 368 extra = self.extra 369 unpack = struct.unpack 370 while extra: 371 tp, ln = unpack('<HH', extra[:4]) 372 if tp == 1: 373 if ln >= 24: 374 counts = unpack('<QQQ', extra[4:28]) 375 elif ln == 16: 376 counts = unpack('<QQ', extra[4:20]) 377 elif ln == 8: 378 counts = unpack('<Q', extra[4:12]) 379 elif ln == 0: 380 counts = () 381 else: 382 raise RuntimeError, "Corrupt extra field %s"%(ln,) 383 384 idx = 0 385 386 # ZIP64 extension (large files and/or large archives) 387 if self.file_size in (0xffffffffffffffffL, 0xffffffffL): 388 self.file_size = counts[idx] 389 idx += 1 390 391 if self.compress_size == 0xFFFFFFFFL: 392 self.compress_size = counts[idx] 393 idx += 1 394 395 if self.header_offset == 0xffffffffL: 396 old = self.header_offset 397 self.header_offset = counts[idx] 398 idx+=1 399 400 extra = extra[ln+4:] 401 402 403class _ZipDecrypter: 404 """Class to handle decryption of files stored within a ZIP archive. 405 406 ZIP supports a password-based form of encryption. Even though known 407 plaintext attacks have been found against it, it is still useful 408 to be able to get data out of such a file. 409 410 Usage: 411 zd = _ZipDecrypter(mypwd) 412 plain_char = zd(cypher_char) 413 plain_text = map(zd, cypher_text) 414 """ 415 416 def _GenerateCRCTable(): 417 """Generate a CRC-32 table. 418 419 ZIP encryption uses the CRC32 one-byte primitive for scrambling some 420 internal keys. We noticed that a direct implementation is faster than 421 relying on binascii.crc32(). 422 """ 423 poly = 0xedb88320 424 table = [0] * 256 425 for i in range(256): 426 crc = i 427 for j in range(8): 428 if crc & 1: 429 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly 430 else: 431 crc = ((crc >> 1) & 0x7FFFFFFF) 432 table[i] = crc 433 return table 434 crctable = _GenerateCRCTable() 435 436 def _crc32(self, ch, crc): 437 """Compute the CRC32 primitive on one byte.""" 438 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff] 439 440 def __init__(self, pwd): 441 self.key0 = 305419896 442 self.key1 = 591751049 443 self.key2 = 878082192 444 for p in pwd: 445 self._UpdateKeys(p) 446 447 def _UpdateKeys(self, c): 448 self.key0 = self._crc32(c, self.key0) 449 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 450 self.key1 = (self.key1 * 134775813 + 1) & 4294967295 451 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2) 452 453 def __call__(self, c): 454 """Decrypt a single character.""" 455 c = ord(c) 456 k = self.key2 | 2 457 c = c ^ (((k * (k^1)) >> 8) & 255) 458 c = chr(c) 459 self._UpdateKeys(c) 460 return c 461 462class ZipExtFile(io.BufferedIOBase): 463 """File-like object for reading an archive member. 464 Is returned by ZipFile.open(). 465 """ 466 467 # Max size supported by decompressor. 468 MAX_N = 1 << 31 - 1 469 470 # Read from compressed files in 4k blocks. 471 MIN_READ_SIZE = 4096 472 473 # Search for universal newlines or line chunks. 474 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)') 475 476 def __init__(self, fileobj, mode, zipinfo, decrypter=None): 477 self._fileobj = fileobj 478 self._decrypter = decrypter 479 480 self._compress_type = zipinfo.compress_type 481 self._compress_size = zipinfo.compress_size 482 self._compress_left = zipinfo.compress_size 483 484 if self._compress_type == ZIP_DEFLATED: 485 self._decompressor = zlib.decompressobj(-15) 486 self._unconsumed = '' 487 488 self._readbuffer = '' 489 self._offset = 0 490 491 self._universal = 'U' in mode 492 self.newlines = None 493 494 # Adjust read size for encrypted files since the first 12 bytes 495 # are for the encryption/password information. 496 if self._decrypter is not None: 497 self._compress_left -= 12 498 499 self.mode = mode 500 self.name = zipinfo.filename 501 502 if hasattr(zipinfo, 'CRC'): 503 self._expected_crc = zipinfo.CRC 504 self._running_crc = crc32(b'') & 0xffffffff 505 else: 506 self._expected_crc = None 507 508 def readline(self, limit=-1): 509 """Read and return a line from the stream. 510 511 If limit is specified, at most limit bytes will be read. 512 """ 513 514 if not self._universal and limit < 0: 515 # Shortcut common case - newline found in buffer. 516 i = self._readbuffer.find('\n', self._offset) + 1 517 if i > 0: 518 line = self._readbuffer[self._offset: i] 519 self._offset = i 520 return line 521 522 if not self._universal: 523 return io.BufferedIOBase.readline(self, limit) 524 525 line = '' 526 while limit < 0 or len(line) < limit: 527 readahead = self.peek(2) 528 if readahead == '': 529 return line 530 531 # 532 # Search for universal newlines or line chunks. 533 # 534 # The pattern returns either a line chunk or a newline, but not 535 # both. Combined with peek(2), we are assured that the sequence 536 # '\r\n' is always retrieved completely and never split into 537 # separate newlines - '\r', '\n' due to coincidental readaheads. 538 # 539 match = self.PATTERN.search(readahead) 540 newline = match.group('newline') 541 if newline is not None: 542 if self.newlines is None: 543 self.newlines = [] 544 if newline not in self.newlines: 545 self.newlines.append(newline) 546 self._offset += len(newline) 547 return line + '\n' 548 549 chunk = match.group('chunk') 550 if limit >= 0: 551 chunk = chunk[: limit - len(line)] 552 553 self._offset += len(chunk) 554 line += chunk 555 556 return line 557 558 def peek(self, n=1): 559 """Returns buffered bytes without advancing the position.""" 560 if n > len(self._readbuffer) - self._offset: 561 chunk = self.read(n) 562 self._offset -= len(chunk) 563 564 # Return up to 512 bytes to reduce allocation overhead for tight loops. 565 return self._readbuffer[self._offset: self._offset + 512] 566 567 def readable(self): 568 return True 569 570 def read(self, n=-1): 571 """Read and return up to n bytes. 572 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.. 573 """ 574 buf = '' 575 if n is None: 576 n = -1 577 while True: 578 if n < 0: 579 data = self.read1(n) 580 elif n > len(buf): 581 data = self.read1(n - len(buf)) 582 else: 583 return buf 584 if len(data) == 0: 585 return buf 586 buf += data 587 588 def _update_crc(self, newdata, eof): 589 # Update the CRC using the given data. 590 if self._expected_crc is None: 591 # No need to compute the CRC if we don't have a reference value 592 return 593 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff 594 # Check the CRC if we're at the end of the file 595 if eof and self._running_crc != self._expected_crc: 596 raise BadZipfile("Bad CRC-32 for file %r" % self.name) 597 598 def read1(self, n): 599 """Read up to n bytes with at most one read() system call.""" 600 601 # Simplify algorithm (branching) by transforming negative n to large n. 602 if n < 0 or n is None: 603 n = self.MAX_N 604 605 # Bytes available in read buffer. 606 len_readbuffer = len(self._readbuffer) - self._offset 607 608 # Read from file. 609 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed): 610 nbytes = n - len_readbuffer - len(self._unconsumed) 611 nbytes = max(nbytes, self.MIN_READ_SIZE) 612 nbytes = min(nbytes, self._compress_left) 613 614 data = self._fileobj.read(nbytes) 615 self._compress_left -= len(data) 616 617 if data and self._decrypter is not None: 618 data = ''.join(map(self._decrypter, data)) 619 620 if self._compress_type == ZIP_STORED: 621 self._update_crc(data, eof=(self._compress_left==0)) 622 self._readbuffer = self._readbuffer[self._offset:] + data 623 self._offset = 0 624 else: 625 # Prepare deflated bytes for decompression. 626 self._unconsumed += data 627 628 # Handle unconsumed data. 629 if (len(self._unconsumed) > 0 and n > len_readbuffer and 630 self._compress_type == ZIP_DEFLATED): 631 data = self._decompressor.decompress( 632 self._unconsumed, 633 max(n - len_readbuffer, self.MIN_READ_SIZE) 634 ) 635 636 self._unconsumed = self._decompressor.unconsumed_tail 637 eof = len(self._unconsumed) == 0 and self._compress_left == 0 638 if eof: 639 data += self._decompressor.flush() 640 641 self._update_crc(data, eof=eof) 642 self._readbuffer = self._readbuffer[self._offset:] + data 643 self._offset = 0 644 645 # Read from buffer. 646 data = self._readbuffer[self._offset: self._offset + n] 647 self._offset += len(data) 648 return data 649 650 651 652class ZipFile: 653 """ Class with methods to open, read, write, close, list zip files. 654 655 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False) 656 657 file: Either the path to the file, or a file-like object. 658 If it is a path, the file will be opened and closed by ZipFile. 659 mode: The mode can be either read "r", write "w" or append "a". 660 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib). 661 allowZip64: if True ZipFile will create files with ZIP64 extensions when 662 needed, otherwise it will raise an exception when this would 663 be necessary. 664 665 """ 666 667 fp = None # Set here since __del__ checks it 668 669 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False): 670 """Open the ZIP file with mode read "r", write "w" or append "a".""" 671 if mode not in ("r", "w", "a"): 672 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"') 673 674 if compression == ZIP_STORED: 675 pass 676 elif compression == ZIP_DEFLATED: 677 if not zlib: 678 raise RuntimeError,\ 679 "Compression requires the (missing) zlib module" 680 else: 681 raise RuntimeError, "That compression method is not supported" 682 683 self._allowZip64 = allowZip64 684 self._didModify = False 685 self.debug = 0 # Level of printing: 0 through 3 686 self.NameToInfo = {} # Find file info given name 687 self.filelist = [] # List of ZipInfo instances for archive 688 self.compression = compression # Method of compression 689 self.mode = key = mode.replace('b', '')[0] 690 self.pwd = None 691 self.comment = '' 692 693 # Check if we were passed a file-like object 694 if isinstance(file, basestring): 695 self._filePassed = 0 696 self.filename = file 697 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} 698 try: 699 self.fp = open(file, modeDict[mode]) 700 except IOError: 701 if mode == 'a': 702 mode = key = 'w' 703 self.fp = open(file, modeDict[mode]) 704 else: 705 raise 706 else: 707 self._filePassed = 1 708 self.fp = file 709 self.filename = getattr(file, 'name', None) 710 711 if key == 'r': 712 self._GetContents() 713 elif key == 'w': 714 # set the modified flag so central directory gets written 715 # even if no files are added to the archive 716 self._didModify = True 717 elif key == 'a': 718 try: 719 # See if file is a zip file 720 self._RealGetContents() 721 # seek to start of directory and overwrite 722 self.fp.seek(self.start_dir, 0) 723 except BadZipfile: 724 # file is not a zip file, just append 725 self.fp.seek(0, 2) 726 727 # set the modified flag so central directory gets written 728 # even if no files are added to the archive 729 self._didModify = True 730 else: 731 if not self._filePassed: 732 self.fp.close() 733 self.fp = None 734 raise RuntimeError, 'Mode must be "r", "w" or "a"' 735 736 def __enter__(self): 737 return self 738 739 def __exit__(self, type, value, traceback): 740 self.close() 741 742 def _GetContents(self): 743 """Read the directory, making sure we close the file if the format 744 is bad.""" 745 try: 746 self._RealGetContents() 747 except BadZipfile: 748 if not self._filePassed: 749 self.fp.close() 750 self.fp = None 751 raise 752 753 def _RealGetContents(self): 754 """Read in the table of contents for the ZIP file.""" 755 fp = self.fp 756 try: 757 endrec = _EndRecData(fp) 758 except IOError: 759 raise BadZipfile("File is not a zip file") 760 if not endrec: 761 raise BadZipfile, "File is not a zip file" 762 if self.debug > 1: 763 print endrec 764 size_cd = endrec[_ECD_SIZE] # bytes in central directory 765 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 766 self.comment = endrec[_ECD_COMMENT] # archive comment 767 768 # "concat" is zero, unless zip was concatenated to another file 769 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 770 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 771 # If Zip64 extension structures are present, account for them 772 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 773 774 if self.debug > 2: 775 inferred = concat + offset_cd 776 print "given, inferred, offset", offset_cd, inferred, concat 777 # self.start_dir: Position of start of central directory 778 self.start_dir = offset_cd + concat 779 fp.seek(self.start_dir, 0) 780 data = fp.read(size_cd) 781 fp = cStringIO.StringIO(data) 782 total = 0 783 while total < size_cd: 784 centdir = fp.read(sizeCentralDir) 785 if centdir[0:4] != stringCentralDir: 786 raise BadZipfile, "Bad magic number for central directory" 787 centdir = struct.unpack(structCentralDir, centdir) 788 if self.debug > 2: 789 print centdir 790 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 791 # Create ZipInfo instance to store file information 792 x = ZipInfo(filename) 793 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 794 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 795 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 796 (x.create_version, x.create_system, x.extract_version, x.reserved, 797 x.flag_bits, x.compress_type, t, d, 798 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 799 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 800 # Convert date/time code to (year, month, day, hour, min, sec) 801 x._raw_time = t 802 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 803 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 804 805 x._decodeExtra() 806 x.header_offset = x.header_offset + concat 807 x.filename = x._decodeFilename() 808 self.filelist.append(x) 809 self.NameToInfo[x.filename] = x 810 811 # update total bytes read from central directory 812 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 813 + centdir[_CD_EXTRA_FIELD_LENGTH] 814 + centdir[_CD_COMMENT_LENGTH]) 815 816 if self.debug > 2: 817 print "total", total 818 819 820 def namelist(self): 821 """Return a list of file names in the archive.""" 822 l = [] 823 for data in self.filelist: 824 l.append(data.filename) 825 return l 826 827 def infolist(self): 828 """Return a list of class ZipInfo instances for files in the 829 archive.""" 830 return self.filelist 831 832 def printdir(self): 833 """Print a table of contents for the zip file.""" 834 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size") 835 for zinfo in self.filelist: 836 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 837 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size) 838 839 def testzip(self): 840 """Read all the files and check the CRC.""" 841 chunk_size = 2 ** 20 842 for zinfo in self.filelist: 843 try: 844 # Read by chunks, to avoid an OverflowError or a 845 # MemoryError with very large embedded files. 846 f = self.open(zinfo.filename, "r") 847 while f.read(chunk_size): # Check CRC-32 848 pass 849 except BadZipfile: 850 return zinfo.filename 851 852 def getinfo(self, name): 853 """Return the instance of ZipInfo given 'name'.""" 854 info = self.NameToInfo.get(name) 855 if info is None: 856 raise KeyError( 857 'There is no item named %r in the archive' % name) 858 859 return info 860 861 def setpassword(self, pwd): 862 """Set default password for encrypted files.""" 863 self.pwd = pwd 864 865 def read(self, name, pwd=None): 866 """Return file bytes (as a string) for name.""" 867 return self.open(name, "r", pwd).read() 868 869 def open(self, name, mode="r", pwd=None): 870 """Return file-like object for 'name'.""" 871 if mode not in ("r", "U", "rU"): 872 raise RuntimeError, 'open() requires mode "r", "U", or "rU"' 873 if not self.fp: 874 raise RuntimeError, \ 875 "Attempt to read ZIP archive that was already closed" 876 877 # Only open a new file for instances where we were not 878 # given a file object in the constructor 879 if self._filePassed: 880 zef_file = self.fp 881 else: 882 zef_file = open(self.filename, 'rb') 883 884 # Make sure we have an info object 885 if isinstance(name, ZipInfo): 886 # 'name' is already an info object 887 zinfo = name 888 else: 889 # Get info object for name 890 zinfo = self.getinfo(name) 891 892 zef_file.seek(zinfo.header_offset, 0) 893 894 # Skip the file header: 895 fheader = zef_file.read(sizeFileHeader) 896 if fheader[0:4] != stringFileHeader: 897 raise BadZipfile, "Bad magic number for file header" 898 899 fheader = struct.unpack(structFileHeader, fheader) 900 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 901 if fheader[_FH_EXTRA_FIELD_LENGTH]: 902 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 903 904 if fname != zinfo.orig_filename: 905 raise BadZipfile, \ 906 'File name in directory "%s" and header "%s" differ.' % ( 907 zinfo.orig_filename, fname) 908 909 # check for encrypted flag & handle password 910 is_encrypted = zinfo.flag_bits & 0x1 911 zd = None 912 if is_encrypted: 913 if not pwd: 914 pwd = self.pwd 915 if not pwd: 916 raise RuntimeError, "File %s is encrypted, " \ 917 "password required for extraction" % name 918 919 zd = _ZipDecrypter(pwd) 920 # The first 12 bytes in the cypher stream is an encryption header 921 # used to strengthen the algorithm. The first 11 bytes are 922 # completely random, while the 12th contains the MSB of the CRC, 923 # or the MSB of the file time depending on the header type 924 # and is used to check the correctness of the password. 925 bytes = zef_file.read(12) 926 h = map(zd, bytes[0:12]) 927 if zinfo.flag_bits & 0x8: 928 # compare against the file type from extended local headers 929 check_byte = (zinfo._raw_time >> 8) & 0xff 930 else: 931 # compare against the CRC otherwise 932 check_byte = (zinfo.CRC >> 24) & 0xff 933 if ord(h[11]) != check_byte: 934 raise RuntimeError("Bad password for file", name) 935 936 return ZipExtFile(zef_file, mode, zinfo, zd) 937 938 def extract(self, member, path=None, pwd=None): 939 """Extract a member from the archive to the current working directory, 940 using its full name. Its file information is extracted as accurately 941 as possible. `member' may be a filename or a ZipInfo object. You can 942 specify a different directory using `path'. 943 """ 944 if not isinstance(member, ZipInfo): 945 member = self.getinfo(member) 946 947 if path is None: 948 path = os.getcwd() 949 950 return self._extract_member(member, path, pwd) 951 952 def extractall(self, path=None, members=None, pwd=None): 953 """Extract all members from the archive to the current working 954 directory. `path' specifies a different directory to extract to. 955 `members' is optional and must be a subset of the list returned 956 by namelist(). 957 """ 958 if members is None: 959 members = self.namelist() 960 961 for zipinfo in members: 962 self.extract(zipinfo, path, pwd) 963 964 def _extract_member(self, member, targetpath, pwd): 965 """Extract the ZipInfo object 'member' to a physical 966 file on the path targetpath. 967 """ 968 # build the destination pathname, replacing 969 # forward slashes to platform specific separators. 970 # Strip trailing path separator, unless it represents the root. 971 if (targetpath[-1:] in (os.path.sep, os.path.altsep) 972 and len(os.path.splitdrive(targetpath)[1]) > 1): 973 targetpath = targetpath[:-1] 974 975 # don't include leading "/" from file name if present 976 if member.filename[0] == '/': 977 targetpath = os.path.join(targetpath, member.filename[1:]) 978 else: 979 targetpath = os.path.join(targetpath, member.filename) 980 981 targetpath = os.path.normpath(targetpath) 982 983 # Create all upper directories if necessary. 984 upperdirs = os.path.dirname(targetpath) 985 if upperdirs and not os.path.exists(upperdirs): 986 os.makedirs(upperdirs) 987 988 if member.filename[-1] == '/': 989 if not os.path.isdir(targetpath): 990 os.mkdir(targetpath) 991 return targetpath 992 993 source = self.open(member, pwd=pwd) 994 target = file(targetpath, "wb") 995 shutil.copyfileobj(source, target) 996 source.close() 997 target.close() 998 999 return targetpath 1000 1001 def _writecheck(self, zinfo): 1002 """Check for errors before writing a file to the archive.""" 1003 if zinfo.filename in self.NameToInfo: 1004 if self.debug: # Warning for duplicate names 1005 print "Duplicate name:", zinfo.filename 1006 if self.mode not in ("w", "a"): 1007 raise RuntimeError, 'write() requires mode "w" or "a"' 1008 if not self.fp: 1009 raise RuntimeError, \ 1010 "Attempt to write ZIP archive that was already closed" 1011 if zinfo.compress_type == ZIP_DEFLATED and not zlib: 1012 raise RuntimeError, \ 1013 "Compression requires the (missing) zlib module" 1014 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED): 1015 raise RuntimeError, \ 1016 "That compression method is not supported" 1017 if zinfo.file_size > ZIP64_LIMIT: 1018 if not self._allowZip64: 1019 raise LargeZipFile("Filesize would require ZIP64 extensions") 1020 if zinfo.header_offset > ZIP64_LIMIT: 1021 if not self._allowZip64: 1022 raise LargeZipFile("Zipfile size would require ZIP64 extensions") 1023 1024 def write(self, filename, arcname=None, compress_type=None): 1025 """Put the bytes from filename into the archive under the name 1026 arcname.""" 1027 if not self.fp: 1028 raise RuntimeError( 1029 "Attempt to write to ZIP archive that was already closed") 1030 1031 st = os.stat(filename) 1032 isdir = stat.S_ISDIR(st.st_mode) 1033 mtime = time.localtime(st.st_mtime) 1034 date_time = mtime[0:6] 1035 # Create ZipInfo instance to store file information 1036 if arcname is None: 1037 arcname = filename 1038 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 1039 while arcname[0] in (os.sep, os.altsep): 1040 arcname = arcname[1:] 1041 if isdir: 1042 arcname += '/' 1043 zinfo = ZipInfo(arcname, date_time) 1044 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes 1045 if compress_type is None: 1046 zinfo.compress_type = self.compression 1047 else: 1048 zinfo.compress_type = compress_type 1049 1050 zinfo.file_size = st.st_size 1051 zinfo.flag_bits = 0x00 1052 zinfo.header_offset = self.fp.tell() # Start of header bytes 1053 1054 self._writecheck(zinfo) 1055 self._didModify = True 1056 1057 if isdir: 1058 zinfo.file_size = 0 1059 zinfo.compress_size = 0 1060 zinfo.CRC = 0 1061 self.filelist.append(zinfo) 1062 self.NameToInfo[zinfo.filename] = zinfo 1063 self.fp.write(zinfo.FileHeader()) 1064 return 1065 1066 with open(filename, "rb") as fp: 1067 # Must overwrite CRC and sizes with correct data later 1068 zinfo.CRC = CRC = 0 1069 zinfo.compress_size = compress_size = 0 1070 zinfo.file_size = file_size = 0 1071 self.fp.write(zinfo.FileHeader()) 1072 if zinfo.compress_type == ZIP_DEFLATED: 1073 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, 1074 zlib.DEFLATED, -15) 1075 else: 1076 cmpr = None 1077 while 1: 1078 buf = fp.read(1024 * 8) 1079 if not buf: 1080 break 1081 file_size = file_size + len(buf) 1082 CRC = crc32(buf, CRC) & 0xffffffff 1083 if cmpr: 1084 buf = cmpr.compress(buf) 1085 compress_size = compress_size + len(buf) 1086 self.fp.write(buf) 1087 if cmpr: 1088 buf = cmpr.flush() 1089 compress_size = compress_size + len(buf) 1090 self.fp.write(buf) 1091 zinfo.compress_size = compress_size 1092 else: 1093 zinfo.compress_size = file_size 1094 zinfo.CRC = CRC 1095 zinfo.file_size = file_size 1096 # Seek backwards and write CRC and file sizes 1097 position = self.fp.tell() # Preserve current position in file 1098 self.fp.seek(zinfo.header_offset + 14, 0) 1099 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, 1100 zinfo.file_size)) 1101 self.fp.seek(position, 0) 1102 self.filelist.append(zinfo) 1103 self.NameToInfo[zinfo.filename] = zinfo 1104 1105 def writestr(self, zinfo_or_arcname, bytes, compress_type=None): 1106 """Write a file into the archive. The contents is the string 1107 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or 1108 the name of the file in the archive.""" 1109 if not isinstance(zinfo_or_arcname, ZipInfo): 1110 zinfo = ZipInfo(filename=zinfo_or_arcname, 1111 date_time=time.localtime(time.time())[:6]) 1112 1113 zinfo.compress_type = self.compression 1114 zinfo.external_attr = 0600 << 16 1115 else: 1116 zinfo = zinfo_or_arcname 1117 1118 if not self.fp: 1119 raise RuntimeError( 1120 "Attempt to write to ZIP archive that was already closed") 1121 1122 if compress_type is not None: 1123 zinfo.compress_type = compress_type 1124 1125 zinfo.file_size = len(bytes) # Uncompressed size 1126 zinfo.header_offset = self.fp.tell() # Start of header bytes 1127 self._writecheck(zinfo) 1128 self._didModify = True 1129 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum 1130 if zinfo.compress_type == ZIP_DEFLATED: 1131 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, 1132 zlib.DEFLATED, -15) 1133 bytes = co.compress(bytes) + co.flush() 1134 zinfo.compress_size = len(bytes) # Compressed size 1135 else: 1136 zinfo.compress_size = zinfo.file_size 1137 zinfo.header_offset = self.fp.tell() # Start of header bytes 1138 self.fp.write(zinfo.FileHeader()) 1139 self.fp.write(bytes) 1140 self.fp.flush() 1141 if zinfo.flag_bits & 0x08: 1142 # Write CRC and file sizes after the file data 1143 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, 1144 zinfo.file_size)) 1145 self.filelist.append(zinfo) 1146 self.NameToInfo[zinfo.filename] = zinfo 1147 1148 def __del__(self): 1149 """Call the "close()" method in case the user forgot.""" 1150 self.close() 1151 1152 def close(self): 1153 """Close the file, and for mode "w" and "a" write the ending 1154 records.""" 1155 if self.fp is None: 1156 return 1157 1158 if self.mode in ("w", "a") and self._didModify: # write ending records 1159 count = 0 1160 pos1 = self.fp.tell() 1161 for zinfo in self.filelist: # write central directory 1162 count = count + 1 1163 dt = zinfo.date_time 1164 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1165 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1166 extra = [] 1167 if zinfo.file_size > ZIP64_LIMIT \ 1168 or zinfo.compress_size > ZIP64_LIMIT: 1169 extra.append(zinfo.file_size) 1170 extra.append(zinfo.compress_size) 1171 file_size = 0xffffffff 1172 compress_size = 0xffffffff 1173 else: 1174 file_size = zinfo.file_size 1175 compress_size = zinfo.compress_size 1176 1177 if zinfo.header_offset > ZIP64_LIMIT: 1178 extra.append(zinfo.header_offset) 1179 header_offset = 0xffffffffL 1180 else: 1181 header_offset = zinfo.header_offset 1182 1183 extra_data = zinfo.extra 1184 if extra: 1185 # Append a ZIP64 field to the extra's 1186 extra_data = struct.pack( 1187 '<HH' + 'Q'*len(extra), 1188 1, 8*len(extra), *extra) + extra_data 1189 1190 extract_version = max(45, zinfo.extract_version) 1191 create_version = max(45, zinfo.create_version) 1192 else: 1193 extract_version = zinfo.extract_version 1194 create_version = zinfo.create_version 1195 1196 try: 1197 filename, flag_bits = zinfo._encodeFilenameFlags() 1198 centdir = struct.pack(structCentralDir, 1199 stringCentralDir, create_version, 1200 zinfo.create_system, extract_version, zinfo.reserved, 1201 flag_bits, zinfo.compress_type, dostime, dosdate, 1202 zinfo.CRC, compress_size, file_size, 1203 len(filename), len(extra_data), len(zinfo.comment), 1204 0, zinfo.internal_attr, zinfo.external_attr, 1205 header_offset) 1206 except DeprecationWarning: 1207 print >>sys.stderr, (structCentralDir, 1208 stringCentralDir, create_version, 1209 zinfo.create_system, extract_version, zinfo.reserved, 1210 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, 1211 zinfo.CRC, compress_size, file_size, 1212 len(zinfo.filename), len(extra_data), len(zinfo.comment), 1213 0, zinfo.internal_attr, zinfo.external_attr, 1214 header_offset) 1215 raise 1216 self.fp.write(centdir) 1217 self.fp.write(filename) 1218 self.fp.write(extra_data) 1219 self.fp.write(zinfo.comment) 1220 1221 pos2 = self.fp.tell() 1222 # Write end-of-zip-archive record 1223 centDirCount = count 1224 centDirSize = pos2 - pos1 1225 centDirOffset = pos1 1226 if (centDirCount >= ZIP_FILECOUNT_LIMIT or 1227 centDirOffset > ZIP64_LIMIT or 1228 centDirSize > ZIP64_LIMIT): 1229 # Need to write the ZIP64 end-of-archive records 1230 zip64endrec = struct.pack( 1231 structEndArchive64, stringEndArchive64, 1232 44, 45, 45, 0, 0, centDirCount, centDirCount, 1233 centDirSize, centDirOffset) 1234 self.fp.write(zip64endrec) 1235 1236 zip64locrec = struct.pack( 1237 structEndArchive64Locator, 1238 stringEndArchive64Locator, 0, pos2, 1) 1239 self.fp.write(zip64locrec) 1240 centDirCount = min(centDirCount, 0xFFFF) 1241 centDirSize = min(centDirSize, 0xFFFFFFFF) 1242 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1243 1244 # check for valid comment length 1245 if len(self.comment) >= ZIP_MAX_COMMENT: 1246 if self.debug > 0: 1247 msg = 'Archive comment is too long; truncating to %d bytes' \ 1248 % ZIP_MAX_COMMENT 1249 self.comment = self.comment[:ZIP_MAX_COMMENT] 1250 1251 endrec = struct.pack(structEndArchive, stringEndArchive, 1252 0, 0, centDirCount, centDirCount, 1253 centDirSize, centDirOffset, len(self.comment)) 1254 self.fp.write(endrec) 1255 self.fp.write(self.comment) 1256 self.fp.flush() 1257 1258 if not self._filePassed: 1259 self.fp.close() 1260 self.fp = None 1261 1262 1263class PyZipFile(ZipFile): 1264 """Class to create ZIP archives with Python library files and packages.""" 1265 1266 def writepy(self, pathname, basename = ""): 1267 """Add all files from "pathname" to the ZIP archive. 1268 1269 If pathname is a package directory, search the directory and 1270 all package subdirectories recursively for all *.py and enter 1271 the modules into the archive. If pathname is a plain 1272 directory, listdir *.py and enter all modules. Else, pathname 1273 must be a Python *.py file and the module will be put into the 1274 archive. Added modules are always module.pyo or module.pyc. 1275 This method will compile the module.py into module.pyc if 1276 necessary. 1277 """ 1278 dir, name = os.path.split(pathname) 1279 if os.path.isdir(pathname): 1280 initname = os.path.join(pathname, "__init__.py") 1281 if os.path.isfile(initname): 1282 # This is a package directory, add it 1283 if basename: 1284 basename = "%s/%s" % (basename, name) 1285 else: 1286 basename = name 1287 if self.debug: 1288 print "Adding package in", pathname, "as", basename 1289 fname, arcname = self._get_codename(initname[0:-3], basename) 1290 if self.debug: 1291 print "Adding", arcname 1292 self.write(fname, arcname) 1293 dirlist = os.listdir(pathname) 1294 dirlist.remove("__init__.py") 1295 # Add all *.py files and package subdirectories 1296 for filename in dirlist: 1297 path = os.path.join(pathname, filename) 1298 root, ext = os.path.splitext(filename) 1299 if os.path.isdir(path): 1300 if os.path.isfile(os.path.join(path, "__init__.py")): 1301 # This is a package directory, add it 1302 self.writepy(path, basename) # Recursive call 1303 elif ext == ".py": 1304 fname, arcname = self._get_codename(path[0:-3], 1305 basename) 1306 if self.debug: 1307 print "Adding", arcname 1308 self.write(fname, arcname) 1309 else: 1310 # This is NOT a package directory, add its files at top level 1311 if self.debug: 1312 print "Adding files from directory", pathname 1313 for filename in os.listdir(pathname): 1314 path = os.path.join(pathname, filename) 1315 root, ext = os.path.splitext(filename) 1316 if ext == ".py": 1317 fname, arcname = self._get_codename(path[0:-3], 1318 basename) 1319 if self.debug: 1320 print "Adding", arcname 1321 self.write(fname, arcname) 1322 else: 1323 if pathname[-3:] != ".py": 1324 raise RuntimeError, \ 1325 'Files added with writepy() must end with ".py"' 1326 fname, arcname = self._get_codename(pathname[0:-3], basename) 1327 if self.debug: 1328 print "Adding file", arcname 1329 self.write(fname, arcname) 1330 1331 def _get_codename(self, pathname, basename): 1332 """Return (filename, archivename) for the path. 1333 1334 Given a module name path, return the correct file path and 1335 archive name, compiling if necessary. For example, given 1336 /python/lib/string, return (/python/lib/string.pyc, string). 1337 """ 1338 file_py = pathname + ".py" 1339 file_pyc = pathname + ".pyc" 1340 file_pyo = pathname + ".pyo" 1341 if os.path.isfile(file_pyo) and \ 1342 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime: 1343 fname = file_pyo # Use .pyo file 1344 elif not os.path.isfile(file_pyc) or \ 1345 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime: 1346 import py_compile 1347 if self.debug: 1348 print "Compiling", file_py 1349 try: 1350 py_compile.compile(file_py, file_pyc, None, True) 1351 except py_compile.PyCompileError,err: 1352 print err.msg 1353 fname = file_pyc 1354 else: 1355 fname = file_pyc 1356 archivename = os.path.split(fname)[1] 1357 if basename: 1358 archivename = "%s/%s" % (basename, archivename) 1359 return (fname, archivename) 1360 1361 1362def main(args = None): 1363 import textwrap 1364 USAGE=textwrap.dedent("""\ 1365 Usage: 1366 zipfile.py -l zipfile.zip # Show listing of a zipfile 1367 zipfile.py -t zipfile.zip # Test if a zipfile is valid 1368 zipfile.py -e zipfile.zip target # Extract zipfile into target dir 1369 zipfile.py -c zipfile.zip src ... # Create zipfile from sources 1370 """) 1371 if args is None: 1372 args = sys.argv[1:] 1373 1374 if not args or args[0] not in ('-l', '-c', '-e', '-t'): 1375 print USAGE 1376 sys.exit(1) 1377 1378 if args[0] == '-l': 1379 if len(args) != 2: 1380 print USAGE 1381 sys.exit(1) 1382 zf = ZipFile(args[1], 'r') 1383 zf.printdir() 1384 zf.close() 1385 1386 elif args[0] == '-t': 1387 if len(args) != 2: 1388 print USAGE 1389 sys.exit(1) 1390 zf = ZipFile(args[1], 'r') 1391 badfile = zf.testzip() 1392 if badfile: 1393 print("The following enclosed file is corrupted: {!r}".format(badfile)) 1394 print "Done testing" 1395 1396 elif args[0] == '-e': 1397 if len(args) != 3: 1398 print USAGE 1399 sys.exit(1) 1400 1401 zf = ZipFile(args[1], 'r') 1402 out = args[2] 1403 for path in zf.namelist(): 1404 if path.startswith('./'): 1405 tgt = os.path.join(out, path[2:]) 1406 else: 1407 tgt = os.path.join(out, path) 1408 1409 tgtdir = os.path.dirname(tgt) 1410 if not os.path.exists(tgtdir): 1411 os.makedirs(tgtdir) 1412 with open(tgt, 'wb') as fp: 1413 fp.write(zf.read(path)) 1414 zf.close() 1415 1416 elif args[0] == '-c': 1417 if len(args) < 3: 1418 print USAGE 1419 sys.exit(1) 1420 1421 def addToZip(zf, path, zippath): 1422 if os.path.isfile(path): 1423 zf.write(path, zippath, ZIP_DEFLATED) 1424 elif os.path.isdir(path): 1425 for nm in os.listdir(path): 1426 addToZip(zf, 1427 os.path.join(path, nm), os.path.join(zippath, nm)) 1428 # else: ignore 1429 1430 zf = ZipFile(args[1], 'w', allowZip64=True) 1431 for src in args[2:]: 1432 addToZip(zf, src, os.path.basename(src)) 1433 1434 zf.close() 1435 1436if __name__ == "__main__": 1437 main() 1438