• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2Read and write ZIP files.
3"""
4import struct, os, time, sys, shutil
5import binascii, cStringIO, stat
6import io
7import re
8
9try:
10    import zlib # We may need its compression method
11    crc32 = zlib.crc32
12except ImportError:
13    zlib = None
14    crc32 = binascii.crc32
15
16__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
17           "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
18
19class BadZipfile(Exception):
20    pass
21
22
23class LargeZipFile(Exception):
24    """
25    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26    and those extensions are disabled.
27    """
28
29error = BadZipfile      # The exception raised by this module
30
31ZIP64_LIMIT = (1 << 31) - 1
32ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
34
35# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
40# Below are some formats and associated data for reading/writing headers using
41# the struct module.  The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
45
46# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
48structEndArchive = "<4s4H2LH"
49stringEndArchive = "PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
51
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
68stringCentralDir = "PK\001\002"
69sizeCentralDir = struct.calcsize(structCentralDir)
70
71# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
76_CD_EXTRACT_SYSTEM = 4
77_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
92# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
95stringFileHeader = "PK\003\004"
96sizeFileHeader = struct.calcsize(structFileHeader)
97
98_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
100_FH_EXTRACT_SYSTEM = 2
101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
111# The "Zip64 end of central directory locator" structure, magic number, and size
112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = "PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = "PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
133def _check_zipfile(fp):
134    try:
135        if _EndRecData(fp):
136            return True         # file has correct magic number
137    except IOError:
138        pass
139    return False
140
141def is_zipfile(filename):
142    """Quickly see if a file is a ZIP file by checking the magic number.
143
144    The filename argument may be a file or file-like object too.
145    """
146    result = False
147    try:
148        if hasattr(filename, "read"):
149            result = _check_zipfile(fp=filename)
150        else:
151            with open(filename, "rb") as fp:
152                result = _check_zipfile(fp)
153    except IOError:
154        pass
155    return result
156
157def _EndRecData64(fpin, offset, endrec):
158    """
159    Read the ZIP64 end-of-archive records and use that to update endrec
160    """
161    try:
162        fpin.seek(offset - sizeEndCentDir64Locator, 2)
163    except IOError:
164        # If the seek fails, the file is not large enough to contain a ZIP64
165        # end-of-archive record, so just return the end record we were given.
166        return endrec
167
168    data = fpin.read(sizeEndCentDir64Locator)
169    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
170    if sig != stringEndArchive64Locator:
171        return endrec
172
173    if diskno != 0 or disks != 1:
174        raise BadZipfile("zipfiles that span multiple disks are not supported")
175
176    # Assume no 'zip64 extensible data'
177    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
178    data = fpin.read(sizeEndCentDir64)
179    sig, sz, create_version, read_version, disk_num, disk_dir, \
180            dircount, dircount2, dirsize, diroffset = \
181            struct.unpack(structEndArchive64, data)
182    if sig != stringEndArchive64:
183        return endrec
184
185    # Update the original endrec using data from the ZIP64 record
186    endrec[_ECD_SIGNATURE] = sig
187    endrec[_ECD_DISK_NUMBER] = disk_num
188    endrec[_ECD_DISK_START] = disk_dir
189    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
190    endrec[_ECD_ENTRIES_TOTAL] = dircount2
191    endrec[_ECD_SIZE] = dirsize
192    endrec[_ECD_OFFSET] = diroffset
193    return endrec
194
195
196def _EndRecData(fpin):
197    """Return data from the "End of Central Directory" record, or None.
198
199    The data is a list of the nine items in the ZIP "End of central dir"
200    record followed by a tenth item, the file seek offset of this record."""
201
202    # Determine file size
203    fpin.seek(0, 2)
204    filesize = fpin.tell()
205
206    # Check to see if this is ZIP file with no archive comment (the
207    # "end of central directory" structure should be the last item in the
208    # file if this is the case).
209    try:
210        fpin.seek(-sizeEndCentDir, 2)
211    except IOError:
212        return None
213    data = fpin.read()
214    if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
215        # the signature is correct and there's no comment, unpack structure
216        endrec = struct.unpack(structEndArchive, data)
217        endrec=list(endrec)
218
219        # Append a blank comment and record start offset
220        endrec.append("")
221        endrec.append(filesize - sizeEndCentDir)
222
223        # Try to read the "Zip64 end of central directory" structure
224        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
225
226    # Either this is not a ZIP file, or it is a ZIP file with an archive
227    # comment.  Search the end of the file for the "end of central directory"
228    # record signature. The comment is the last item in the ZIP file and may be
229    # up to 64K long.  It is assumed that the "end of central directory" magic
230    # number does not appear in the comment.
231    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
232    fpin.seek(maxCommentStart, 0)
233    data = fpin.read()
234    start = data.rfind(stringEndArchive)
235    if start >= 0:
236        # found the magic number; attempt to unpack and interpret
237        recData = data[start:start+sizeEndCentDir]
238        endrec = list(struct.unpack(structEndArchive, recData))
239        comment = data[start+sizeEndCentDir:]
240        # check that comment length is correct
241        if endrec[_ECD_COMMENT_SIZE] == len(comment):
242            # Append the archive comment and start offset
243            endrec.append(comment)
244            endrec.append(maxCommentStart + start)
245
246            # Try to read the "Zip64 end of central directory" structure
247            return _EndRecData64(fpin, maxCommentStart + start - filesize,
248                                 endrec)
249
250    # Unable to find a valid end of central directory structure
251    return
252
253
254class ZipInfo (object):
255    """Class with attributes describing each file in the ZIP archive."""
256
257    __slots__ = (
258            'orig_filename',
259            'filename',
260            'date_time',
261            'compress_type',
262            'comment',
263            'extra',
264            'create_system',
265            'create_version',
266            'extract_version',
267            'reserved',
268            'flag_bits',
269            'volume',
270            'internal_attr',
271            'external_attr',
272            'header_offset',
273            'CRC',
274            'compress_size',
275            'file_size',
276            '_raw_time',
277        )
278
279    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
280        self.orig_filename = filename   # Original file name in archive
281
282        # Terminate the file name at the first null byte.  Null bytes in file
283        # names are used as tricks by viruses in archives.
284        null_byte = filename.find(chr(0))
285        if null_byte >= 0:
286            filename = filename[0:null_byte]
287        # This is used to ensure paths in generated ZIP files always use
288        # forward slashes as the directory separator, as required by the
289        # ZIP format specification.
290        if os.sep != "/" and os.sep in filename:
291            filename = filename.replace(os.sep, "/")
292
293        self.filename = filename        # Normalized file name
294        self.date_time = date_time      # year, month, day, hour, min, sec
295        # Standard values:
296        self.compress_type = ZIP_STORED # Type of compression for the file
297        self.comment = ""               # Comment for each file
298        self.extra = ""                 # ZIP extra data
299        if sys.platform == 'win32':
300            self.create_system = 0          # System which created ZIP archive
301        else:
302            # Assume everything else is unix-y
303            self.create_system = 3          # System which created ZIP archive
304        self.create_version = 20        # Version which created ZIP archive
305        self.extract_version = 20       # Version needed to extract archive
306        self.reserved = 0               # Must be zero
307        self.flag_bits = 0              # ZIP flag bits
308        self.volume = 0                 # Volume number of file header
309        self.internal_attr = 0          # Internal attributes
310        self.external_attr = 0          # External file attributes
311        # Other attributes are set by class ZipFile:
312        # header_offset         Byte offset to the file header
313        # CRC                   CRC-32 of the uncompressed file
314        # compress_size         Size of the compressed file
315        # file_size             Size of the uncompressed file
316
317    def FileHeader(self):
318        """Return the per-file header as a string."""
319        dt = self.date_time
320        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
321        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
322        if self.flag_bits & 0x08:
323            # Set these to zero because we write them after the file data
324            CRC = compress_size = file_size = 0
325        else:
326            CRC = self.CRC
327            compress_size = self.compress_size
328            file_size = self.file_size
329
330        extra = self.extra
331
332        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
333            # File is larger than what fits into a 4 byte integer,
334            # fall back to the ZIP64 extension
335            fmt = '<HHQQ'
336            extra = extra + struct.pack(fmt,
337                    1, struct.calcsize(fmt)-4, file_size, compress_size)
338            file_size = 0xffffffff
339            compress_size = 0xffffffff
340            self.extract_version = max(45, self.extract_version)
341            self.create_version = max(45, self.extract_version)
342
343        filename, flag_bits = self._encodeFilenameFlags()
344        header = struct.pack(structFileHeader, stringFileHeader,
345                 self.extract_version, self.reserved, flag_bits,
346                 self.compress_type, dostime, dosdate, CRC,
347                 compress_size, file_size,
348                 len(filename), len(extra))
349        return header + filename + extra
350
351    def _encodeFilenameFlags(self):
352        if isinstance(self.filename, unicode):
353            try:
354                return self.filename.encode('ascii'), self.flag_bits
355            except UnicodeEncodeError:
356                return self.filename.encode('utf-8'), self.flag_bits | 0x800
357        else:
358            return self.filename, self.flag_bits
359
360    def _decodeFilename(self):
361        if self.flag_bits & 0x800:
362            return self.filename.decode('utf-8')
363        else:
364            return self.filename
365
366    def _decodeExtra(self):
367        # Try to decode the extra field.
368        extra = self.extra
369        unpack = struct.unpack
370        while extra:
371            tp, ln = unpack('<HH', extra[:4])
372            if tp == 1:
373                if ln >= 24:
374                    counts = unpack('<QQQ', extra[4:28])
375                elif ln == 16:
376                    counts = unpack('<QQ', extra[4:20])
377                elif ln == 8:
378                    counts = unpack('<Q', extra[4:12])
379                elif ln == 0:
380                    counts = ()
381                else:
382                    raise RuntimeError, "Corrupt extra field %s"%(ln,)
383
384                idx = 0
385
386                # ZIP64 extension (large files and/or large archives)
387                if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
388                    self.file_size = counts[idx]
389                    idx += 1
390
391                if self.compress_size == 0xFFFFFFFFL:
392                    self.compress_size = counts[idx]
393                    idx += 1
394
395                if self.header_offset == 0xffffffffL:
396                    old = self.header_offset
397                    self.header_offset = counts[idx]
398                    idx+=1
399
400            extra = extra[ln+4:]
401
402
403class _ZipDecrypter:
404    """Class to handle decryption of files stored within a ZIP archive.
405
406    ZIP supports a password-based form of encryption. Even though known
407    plaintext attacks have been found against it, it is still useful
408    to be able to get data out of such a file.
409
410    Usage:
411        zd = _ZipDecrypter(mypwd)
412        plain_char = zd(cypher_char)
413        plain_text = map(zd, cypher_text)
414    """
415
416    def _GenerateCRCTable():
417        """Generate a CRC-32 table.
418
419        ZIP encryption uses the CRC32 one-byte primitive for scrambling some
420        internal keys. We noticed that a direct implementation is faster than
421        relying on binascii.crc32().
422        """
423        poly = 0xedb88320
424        table = [0] * 256
425        for i in range(256):
426            crc = i
427            for j in range(8):
428                if crc & 1:
429                    crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
430                else:
431                    crc = ((crc >> 1) & 0x7FFFFFFF)
432            table[i] = crc
433        return table
434    crctable = _GenerateCRCTable()
435
436    def _crc32(self, ch, crc):
437        """Compute the CRC32 primitive on one byte."""
438        return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
439
440    def __init__(self, pwd):
441        self.key0 = 305419896
442        self.key1 = 591751049
443        self.key2 = 878082192
444        for p in pwd:
445            self._UpdateKeys(p)
446
447    def _UpdateKeys(self, c):
448        self.key0 = self._crc32(c, self.key0)
449        self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
450        self.key1 = (self.key1 * 134775813 + 1) & 4294967295
451        self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
452
453    def __call__(self, c):
454        """Decrypt a single character."""
455        c = ord(c)
456        k = self.key2 | 2
457        c = c ^ (((k * (k^1)) >> 8) & 255)
458        c = chr(c)
459        self._UpdateKeys(c)
460        return c
461
462class ZipExtFile(io.BufferedIOBase):
463    """File-like object for reading an archive member.
464       Is returned by ZipFile.open().
465    """
466
467    # Max size supported by decompressor.
468    MAX_N = 1 << 31 - 1
469
470    # Read from compressed files in 4k blocks.
471    MIN_READ_SIZE = 4096
472
473    # Search for universal newlines or line chunks.
474    PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
475
476    def __init__(self, fileobj, mode, zipinfo, decrypter=None):
477        self._fileobj = fileobj
478        self._decrypter = decrypter
479
480        self._compress_type = zipinfo.compress_type
481        self._compress_size = zipinfo.compress_size
482        self._compress_left = zipinfo.compress_size
483
484        if self._compress_type == ZIP_DEFLATED:
485            self._decompressor = zlib.decompressobj(-15)
486        self._unconsumed = ''
487
488        self._readbuffer = ''
489        self._offset = 0
490
491        self._universal = 'U' in mode
492        self.newlines = None
493
494        # Adjust read size for encrypted files since the first 12 bytes
495        # are for the encryption/password information.
496        if self._decrypter is not None:
497            self._compress_left -= 12
498
499        self.mode = mode
500        self.name = zipinfo.filename
501
502        if hasattr(zipinfo, 'CRC'):
503            self._expected_crc = zipinfo.CRC
504            self._running_crc = crc32(b'') & 0xffffffff
505        else:
506            self._expected_crc = None
507
508    def readline(self, limit=-1):
509        """Read and return a line from the stream.
510
511        If limit is specified, at most limit bytes will be read.
512        """
513
514        if not self._universal and limit < 0:
515            # Shortcut common case - newline found in buffer.
516            i = self._readbuffer.find('\n', self._offset) + 1
517            if i > 0:
518                line = self._readbuffer[self._offset: i]
519                self._offset = i
520                return line
521
522        if not self._universal:
523            return io.BufferedIOBase.readline(self, limit)
524
525        line = ''
526        while limit < 0 or len(line) < limit:
527            readahead = self.peek(2)
528            if readahead == '':
529                return line
530
531            #
532            # Search for universal newlines or line chunks.
533            #
534            # The pattern returns either a line chunk or a newline, but not
535            # both. Combined with peek(2), we are assured that the sequence
536            # '\r\n' is always retrieved completely and never split into
537            # separate newlines - '\r', '\n' due to coincidental readaheads.
538            #
539            match = self.PATTERN.search(readahead)
540            newline = match.group('newline')
541            if newline is not None:
542                if self.newlines is None:
543                    self.newlines = []
544                if newline not in self.newlines:
545                    self.newlines.append(newline)
546                self._offset += len(newline)
547                return line + '\n'
548
549            chunk = match.group('chunk')
550            if limit >= 0:
551                chunk = chunk[: limit - len(line)]
552
553            self._offset += len(chunk)
554            line += chunk
555
556        return line
557
558    def peek(self, n=1):
559        """Returns buffered bytes without advancing the position."""
560        if n > len(self._readbuffer) - self._offset:
561            chunk = self.read(n)
562            self._offset -= len(chunk)
563
564        # Return up to 512 bytes to reduce allocation overhead for tight loops.
565        return self._readbuffer[self._offset: self._offset + 512]
566
567    def readable(self):
568        return True
569
570    def read(self, n=-1):
571        """Read and return up to n bytes.
572        If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
573        """
574        buf = ''
575        if n is None:
576            n = -1
577        while True:
578            if n < 0:
579                data = self.read1(n)
580            elif n > len(buf):
581                data = self.read1(n - len(buf))
582            else:
583                return buf
584            if len(data) == 0:
585                return buf
586            buf += data
587
588    def _update_crc(self, newdata, eof):
589        # Update the CRC using the given data.
590        if self._expected_crc is None:
591            # No need to compute the CRC if we don't have a reference value
592            return
593        self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
594        # Check the CRC if we're at the end of the file
595        if eof and self._running_crc != self._expected_crc:
596            raise BadZipfile("Bad CRC-32 for file %r" % self.name)
597
598    def read1(self, n):
599        """Read up to n bytes with at most one read() system call."""
600
601        # Simplify algorithm (branching) by transforming negative n to large n.
602        if n < 0 or n is None:
603            n = self.MAX_N
604
605        # Bytes available in read buffer.
606        len_readbuffer = len(self._readbuffer) - self._offset
607
608        # Read from file.
609        if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
610            nbytes = n - len_readbuffer - len(self._unconsumed)
611            nbytes = max(nbytes, self.MIN_READ_SIZE)
612            nbytes = min(nbytes, self._compress_left)
613
614            data = self._fileobj.read(nbytes)
615            self._compress_left -= len(data)
616
617            if data and self._decrypter is not None:
618                data = ''.join(map(self._decrypter, data))
619
620            if self._compress_type == ZIP_STORED:
621                self._update_crc(data, eof=(self._compress_left==0))
622                self._readbuffer = self._readbuffer[self._offset:] + data
623                self._offset = 0
624            else:
625                # Prepare deflated bytes for decompression.
626                self._unconsumed += data
627
628        # Handle unconsumed data.
629        if (len(self._unconsumed) > 0 and n > len_readbuffer and
630            self._compress_type == ZIP_DEFLATED):
631            data = self._decompressor.decompress(
632                self._unconsumed,
633                max(n - len_readbuffer, self.MIN_READ_SIZE)
634            )
635
636            self._unconsumed = self._decompressor.unconsumed_tail
637            eof = len(self._unconsumed) == 0 and self._compress_left == 0
638            if eof:
639                data += self._decompressor.flush()
640
641            self._update_crc(data, eof=eof)
642            self._readbuffer = self._readbuffer[self._offset:] + data
643            self._offset = 0
644
645        # Read from buffer.
646        data = self._readbuffer[self._offset: self._offset + n]
647        self._offset += len(data)
648        return data
649
650
651
652class ZipFile:
653    """ Class with methods to open, read, write, close, list zip files.
654
655    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
656
657    file: Either the path to the file, or a file-like object.
658          If it is a path, the file will be opened and closed by ZipFile.
659    mode: The mode can be either read "r", write "w" or append "a".
660    compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
661    allowZip64: if True ZipFile will create files with ZIP64 extensions when
662                needed, otherwise it will raise an exception when this would
663                be necessary.
664
665    """
666
667    fp = None                   # Set here since __del__ checks it
668
669    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
670        """Open the ZIP file with mode read "r", write "w" or append "a"."""
671        if mode not in ("r", "w", "a"):
672            raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
673
674        if compression == ZIP_STORED:
675            pass
676        elif compression == ZIP_DEFLATED:
677            if not zlib:
678                raise RuntimeError,\
679                      "Compression requires the (missing) zlib module"
680        else:
681            raise RuntimeError, "That compression method is not supported"
682
683        self._allowZip64 = allowZip64
684        self._didModify = False
685        self.debug = 0  # Level of printing: 0 through 3
686        self.NameToInfo = {}    # Find file info given name
687        self.filelist = []      # List of ZipInfo instances for archive
688        self.compression = compression  # Method of compression
689        self.mode = key = mode.replace('b', '')[0]
690        self.pwd = None
691        self.comment = ''
692
693        # Check if we were passed a file-like object
694        if isinstance(file, basestring):
695            self._filePassed = 0
696            self.filename = file
697            modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
698            try:
699                self.fp = open(file, modeDict[mode])
700            except IOError:
701                if mode == 'a':
702                    mode = key = 'w'
703                    self.fp = open(file, modeDict[mode])
704                else:
705                    raise
706        else:
707            self._filePassed = 1
708            self.fp = file
709            self.filename = getattr(file, 'name', None)
710
711        if key == 'r':
712            self._GetContents()
713        elif key == 'w':
714            # set the modified flag so central directory gets written
715            # even if no files are added to the archive
716            self._didModify = True
717        elif key == 'a':
718            try:
719                # See if file is a zip file
720                self._RealGetContents()
721                # seek to start of directory and overwrite
722                self.fp.seek(self.start_dir, 0)
723            except BadZipfile:
724                # file is not a zip file, just append
725                self.fp.seek(0, 2)
726
727                # set the modified flag so central directory gets written
728                # even if no files are added to the archive
729                self._didModify = True
730        else:
731            if not self._filePassed:
732                self.fp.close()
733                self.fp = None
734            raise RuntimeError, 'Mode must be "r", "w" or "a"'
735
736    def __enter__(self):
737        return self
738
739    def __exit__(self, type, value, traceback):
740        self.close()
741
742    def _GetContents(self):
743        """Read the directory, making sure we close the file if the format
744        is bad."""
745        try:
746            self._RealGetContents()
747        except BadZipfile:
748            if not self._filePassed:
749                self.fp.close()
750                self.fp = None
751            raise
752
753    def _RealGetContents(self):
754        """Read in the table of contents for the ZIP file."""
755        fp = self.fp
756        try:
757            endrec = _EndRecData(fp)
758        except IOError:
759            raise BadZipfile("File is not a zip file")
760        if not endrec:
761            raise BadZipfile, "File is not a zip file"
762        if self.debug > 1:
763            print endrec
764        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
765        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
766        self.comment = endrec[_ECD_COMMENT]     # archive comment
767
768        # "concat" is zero, unless zip was concatenated to another file
769        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
770        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
771            # If Zip64 extension structures are present, account for them
772            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
773
774        if self.debug > 2:
775            inferred = concat + offset_cd
776            print "given, inferred, offset", offset_cd, inferred, concat
777        # self.start_dir:  Position of start of central directory
778        self.start_dir = offset_cd + concat
779        fp.seek(self.start_dir, 0)
780        data = fp.read(size_cd)
781        fp = cStringIO.StringIO(data)
782        total = 0
783        while total < size_cd:
784            centdir = fp.read(sizeCentralDir)
785            if centdir[0:4] != stringCentralDir:
786                raise BadZipfile, "Bad magic number for central directory"
787            centdir = struct.unpack(structCentralDir, centdir)
788            if self.debug > 2:
789                print centdir
790            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
791            # Create ZipInfo instance to store file information
792            x = ZipInfo(filename)
793            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
794            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
795            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
796            (x.create_version, x.create_system, x.extract_version, x.reserved,
797                x.flag_bits, x.compress_type, t, d,
798                x.CRC, x.compress_size, x.file_size) = centdir[1:12]
799            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
800            # Convert date/time code to (year, month, day, hour, min, sec)
801            x._raw_time = t
802            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
803                                     t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
804
805            x._decodeExtra()
806            x.header_offset = x.header_offset + concat
807            x.filename = x._decodeFilename()
808            self.filelist.append(x)
809            self.NameToInfo[x.filename] = x
810
811            # update total bytes read from central directory
812            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
813                     + centdir[_CD_EXTRA_FIELD_LENGTH]
814                     + centdir[_CD_COMMENT_LENGTH])
815
816            if self.debug > 2:
817                print "total", total
818
819
820    def namelist(self):
821        """Return a list of file names in the archive."""
822        l = []
823        for data in self.filelist:
824            l.append(data.filename)
825        return l
826
827    def infolist(self):
828        """Return a list of class ZipInfo instances for files in the
829        archive."""
830        return self.filelist
831
832    def printdir(self):
833        """Print a table of contents for the zip file."""
834        print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
835        for zinfo in self.filelist:
836            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
837            print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
838
839    def testzip(self):
840        """Read all the files and check the CRC."""
841        chunk_size = 2 ** 20
842        for zinfo in self.filelist:
843            try:
844                # Read by chunks, to avoid an OverflowError or a
845                # MemoryError with very large embedded files.
846                f = self.open(zinfo.filename, "r")
847                while f.read(chunk_size):     # Check CRC-32
848                    pass
849            except BadZipfile:
850                return zinfo.filename
851
852    def getinfo(self, name):
853        """Return the instance of ZipInfo given 'name'."""
854        info = self.NameToInfo.get(name)
855        if info is None:
856            raise KeyError(
857                'There is no item named %r in the archive' % name)
858
859        return info
860
861    def setpassword(self, pwd):
862        """Set default password for encrypted files."""
863        self.pwd = pwd
864
865    def read(self, name, pwd=None):
866        """Return file bytes (as a string) for name."""
867        return self.open(name, "r", pwd).read()
868
869    def open(self, name, mode="r", pwd=None):
870        """Return file-like object for 'name'."""
871        if mode not in ("r", "U", "rU"):
872            raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
873        if not self.fp:
874            raise RuntimeError, \
875                  "Attempt to read ZIP archive that was already closed"
876
877        # Only open a new file for instances where we were not
878        # given a file object in the constructor
879        if self._filePassed:
880            zef_file = self.fp
881        else:
882            zef_file = open(self.filename, 'rb')
883
884        # Make sure we have an info object
885        if isinstance(name, ZipInfo):
886            # 'name' is already an info object
887            zinfo = name
888        else:
889            # Get info object for name
890            zinfo = self.getinfo(name)
891
892        zef_file.seek(zinfo.header_offset, 0)
893
894        # Skip the file header:
895        fheader = zef_file.read(sizeFileHeader)
896        if fheader[0:4] != stringFileHeader:
897            raise BadZipfile, "Bad magic number for file header"
898
899        fheader = struct.unpack(structFileHeader, fheader)
900        fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
901        if fheader[_FH_EXTRA_FIELD_LENGTH]:
902            zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
903
904        if fname != zinfo.orig_filename:
905            raise BadZipfile, \
906                      'File name in directory "%s" and header "%s" differ.' % (
907                          zinfo.orig_filename, fname)
908
909        # check for encrypted flag & handle password
910        is_encrypted = zinfo.flag_bits & 0x1
911        zd = None
912        if is_encrypted:
913            if not pwd:
914                pwd = self.pwd
915            if not pwd:
916                raise RuntimeError, "File %s is encrypted, " \
917                      "password required for extraction" % name
918
919            zd = _ZipDecrypter(pwd)
920            # The first 12 bytes in the cypher stream is an encryption header
921            #  used to strengthen the algorithm. The first 11 bytes are
922            #  completely random, while the 12th contains the MSB of the CRC,
923            #  or the MSB of the file time depending on the header type
924            #  and is used to check the correctness of the password.
925            bytes = zef_file.read(12)
926            h = map(zd, bytes[0:12])
927            if zinfo.flag_bits & 0x8:
928                # compare against the file type from extended local headers
929                check_byte = (zinfo._raw_time >> 8) & 0xff
930            else:
931                # compare against the CRC otherwise
932                check_byte = (zinfo.CRC >> 24) & 0xff
933            if ord(h[11]) != check_byte:
934                raise RuntimeError("Bad password for file", name)
935
936        return  ZipExtFile(zef_file, mode, zinfo, zd)
937
938    def extract(self, member, path=None, pwd=None):
939        """Extract a member from the archive to the current working directory,
940           using its full name. Its file information is extracted as accurately
941           as possible. `member' may be a filename or a ZipInfo object. You can
942           specify a different directory using `path'.
943        """
944        if not isinstance(member, ZipInfo):
945            member = self.getinfo(member)
946
947        if path is None:
948            path = os.getcwd()
949
950        return self._extract_member(member, path, pwd)
951
952    def extractall(self, path=None, members=None, pwd=None):
953        """Extract all members from the archive to the current working
954           directory. `path' specifies a different directory to extract to.
955           `members' is optional and must be a subset of the list returned
956           by namelist().
957        """
958        if members is None:
959            members = self.namelist()
960
961        for zipinfo in members:
962            self.extract(zipinfo, path, pwd)
963
964    def _extract_member(self, member, targetpath, pwd):
965        """Extract the ZipInfo object 'member' to a physical
966           file on the path targetpath.
967        """
968        # build the destination pathname, replacing
969        # forward slashes to platform specific separators.
970        # Strip trailing path separator, unless it represents the root.
971        if (targetpath[-1:] in (os.path.sep, os.path.altsep)
972            and len(os.path.splitdrive(targetpath)[1]) > 1):
973            targetpath = targetpath[:-1]
974
975        # don't include leading "/" from file name if present
976        if member.filename[0] == '/':
977            targetpath = os.path.join(targetpath, member.filename[1:])
978        else:
979            targetpath = os.path.join(targetpath, member.filename)
980
981        targetpath = os.path.normpath(targetpath)
982
983        # Create all upper directories if necessary.
984        upperdirs = os.path.dirname(targetpath)
985        if upperdirs and not os.path.exists(upperdirs):
986            os.makedirs(upperdirs)
987
988        if member.filename[-1] == '/':
989            if not os.path.isdir(targetpath):
990                os.mkdir(targetpath)
991            return targetpath
992
993        source = self.open(member, pwd=pwd)
994        target = file(targetpath, "wb")
995        shutil.copyfileobj(source, target)
996        source.close()
997        target.close()
998
999        return targetpath
1000
1001    def _writecheck(self, zinfo):
1002        """Check for errors before writing a file to the archive."""
1003        if zinfo.filename in self.NameToInfo:
1004            if self.debug:      # Warning for duplicate names
1005                print "Duplicate name:", zinfo.filename
1006        if self.mode not in ("w", "a"):
1007            raise RuntimeError, 'write() requires mode "w" or "a"'
1008        if not self.fp:
1009            raise RuntimeError, \
1010                  "Attempt to write ZIP archive that was already closed"
1011        if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1012            raise RuntimeError, \
1013                  "Compression requires the (missing) zlib module"
1014        if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1015            raise RuntimeError, \
1016                  "That compression method is not supported"
1017        if zinfo.file_size > ZIP64_LIMIT:
1018            if not self._allowZip64:
1019                raise LargeZipFile("Filesize would require ZIP64 extensions")
1020        if zinfo.header_offset > ZIP64_LIMIT:
1021            if not self._allowZip64:
1022                raise LargeZipFile("Zipfile size would require ZIP64 extensions")
1023
1024    def write(self, filename, arcname=None, compress_type=None):
1025        """Put the bytes from filename into the archive under the name
1026        arcname."""
1027        if not self.fp:
1028            raise RuntimeError(
1029                  "Attempt to write to ZIP archive that was already closed")
1030
1031        st = os.stat(filename)
1032        isdir = stat.S_ISDIR(st.st_mode)
1033        mtime = time.localtime(st.st_mtime)
1034        date_time = mtime[0:6]
1035        # Create ZipInfo instance to store file information
1036        if arcname is None:
1037            arcname = filename
1038        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1039        while arcname[0] in (os.sep, os.altsep):
1040            arcname = arcname[1:]
1041        if isdir:
1042            arcname += '/'
1043        zinfo = ZipInfo(arcname, date_time)
1044        zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
1045        if compress_type is None:
1046            zinfo.compress_type = self.compression
1047        else:
1048            zinfo.compress_type = compress_type
1049
1050        zinfo.file_size = st.st_size
1051        zinfo.flag_bits = 0x00
1052        zinfo.header_offset = self.fp.tell()    # Start of header bytes
1053
1054        self._writecheck(zinfo)
1055        self._didModify = True
1056
1057        if isdir:
1058            zinfo.file_size = 0
1059            zinfo.compress_size = 0
1060            zinfo.CRC = 0
1061            self.filelist.append(zinfo)
1062            self.NameToInfo[zinfo.filename] = zinfo
1063            self.fp.write(zinfo.FileHeader())
1064            return
1065
1066        with open(filename, "rb") as fp:
1067            # Must overwrite CRC and sizes with correct data later
1068            zinfo.CRC = CRC = 0
1069            zinfo.compress_size = compress_size = 0
1070            zinfo.file_size = file_size = 0
1071            self.fp.write(zinfo.FileHeader())
1072            if zinfo.compress_type == ZIP_DEFLATED:
1073                cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1074                     zlib.DEFLATED, -15)
1075            else:
1076                cmpr = None
1077            while 1:
1078                buf = fp.read(1024 * 8)
1079                if not buf:
1080                    break
1081                file_size = file_size + len(buf)
1082                CRC = crc32(buf, CRC) & 0xffffffff
1083                if cmpr:
1084                    buf = cmpr.compress(buf)
1085                    compress_size = compress_size + len(buf)
1086                self.fp.write(buf)
1087        if cmpr:
1088            buf = cmpr.flush()
1089            compress_size = compress_size + len(buf)
1090            self.fp.write(buf)
1091            zinfo.compress_size = compress_size
1092        else:
1093            zinfo.compress_size = file_size
1094        zinfo.CRC = CRC
1095        zinfo.file_size = file_size
1096        # Seek backwards and write CRC and file sizes
1097        position = self.fp.tell()       # Preserve current position in file
1098        self.fp.seek(zinfo.header_offset + 14, 0)
1099        self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1100              zinfo.file_size))
1101        self.fp.seek(position, 0)
1102        self.filelist.append(zinfo)
1103        self.NameToInfo[zinfo.filename] = zinfo
1104
1105    def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
1106        """Write a file into the archive.  The contents is the string
1107        'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
1108        the name of the file in the archive."""
1109        if not isinstance(zinfo_or_arcname, ZipInfo):
1110            zinfo = ZipInfo(filename=zinfo_or_arcname,
1111                            date_time=time.localtime(time.time())[:6])
1112
1113            zinfo.compress_type = self.compression
1114            zinfo.external_attr = 0600 << 16
1115        else:
1116            zinfo = zinfo_or_arcname
1117
1118        if not self.fp:
1119            raise RuntimeError(
1120                  "Attempt to write to ZIP archive that was already closed")
1121
1122        if compress_type is not None:
1123            zinfo.compress_type = compress_type
1124
1125        zinfo.file_size = len(bytes)            # Uncompressed size
1126        zinfo.header_offset = self.fp.tell()    # Start of header bytes
1127        self._writecheck(zinfo)
1128        self._didModify = True
1129        zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum
1130        if zinfo.compress_type == ZIP_DEFLATED:
1131            co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1132                 zlib.DEFLATED, -15)
1133            bytes = co.compress(bytes) + co.flush()
1134            zinfo.compress_size = len(bytes)    # Compressed size
1135        else:
1136            zinfo.compress_size = zinfo.file_size
1137        zinfo.header_offset = self.fp.tell()    # Start of header bytes
1138        self.fp.write(zinfo.FileHeader())
1139        self.fp.write(bytes)
1140        self.fp.flush()
1141        if zinfo.flag_bits & 0x08:
1142            # Write CRC and file sizes after the file data
1143            self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1144                  zinfo.file_size))
1145        self.filelist.append(zinfo)
1146        self.NameToInfo[zinfo.filename] = zinfo
1147
1148    def __del__(self):
1149        """Call the "close()" method in case the user forgot."""
1150        self.close()
1151
1152    def close(self):
1153        """Close the file, and for mode "w" and "a" write the ending
1154        records."""
1155        if self.fp is None:
1156            return
1157
1158        if self.mode in ("w", "a") and self._didModify: # write ending records
1159            count = 0
1160            pos1 = self.fp.tell()
1161            for zinfo in self.filelist:         # write central directory
1162                count = count + 1
1163                dt = zinfo.date_time
1164                dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1165                dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1166                extra = []
1167                if zinfo.file_size > ZIP64_LIMIT \
1168                        or zinfo.compress_size > ZIP64_LIMIT:
1169                    extra.append(zinfo.file_size)
1170                    extra.append(zinfo.compress_size)
1171                    file_size = 0xffffffff
1172                    compress_size = 0xffffffff
1173                else:
1174                    file_size = zinfo.file_size
1175                    compress_size = zinfo.compress_size
1176
1177                if zinfo.header_offset > ZIP64_LIMIT:
1178                    extra.append(zinfo.header_offset)
1179                    header_offset = 0xffffffffL
1180                else:
1181                    header_offset = zinfo.header_offset
1182
1183                extra_data = zinfo.extra
1184                if extra:
1185                    # Append a ZIP64 field to the extra's
1186                    extra_data = struct.pack(
1187                            '<HH' + 'Q'*len(extra),
1188                            1, 8*len(extra), *extra) + extra_data
1189
1190                    extract_version = max(45, zinfo.extract_version)
1191                    create_version = max(45, zinfo.create_version)
1192                else:
1193                    extract_version = zinfo.extract_version
1194                    create_version = zinfo.create_version
1195
1196                try:
1197                    filename, flag_bits = zinfo._encodeFilenameFlags()
1198                    centdir = struct.pack(structCentralDir,
1199                     stringCentralDir, create_version,
1200                     zinfo.create_system, extract_version, zinfo.reserved,
1201                     flag_bits, zinfo.compress_type, dostime, dosdate,
1202                     zinfo.CRC, compress_size, file_size,
1203                     len(filename), len(extra_data), len(zinfo.comment),
1204                     0, zinfo.internal_attr, zinfo.external_attr,
1205                     header_offset)
1206                except DeprecationWarning:
1207                    print >>sys.stderr, (structCentralDir,
1208                     stringCentralDir, create_version,
1209                     zinfo.create_system, extract_version, zinfo.reserved,
1210                     zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1211                     zinfo.CRC, compress_size, file_size,
1212                     len(zinfo.filename), len(extra_data), len(zinfo.comment),
1213                     0, zinfo.internal_attr, zinfo.external_attr,
1214                     header_offset)
1215                    raise
1216                self.fp.write(centdir)
1217                self.fp.write(filename)
1218                self.fp.write(extra_data)
1219                self.fp.write(zinfo.comment)
1220
1221            pos2 = self.fp.tell()
1222            # Write end-of-zip-archive record
1223            centDirCount = count
1224            centDirSize = pos2 - pos1
1225            centDirOffset = pos1
1226            if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1227                centDirOffset > ZIP64_LIMIT or
1228                centDirSize > ZIP64_LIMIT):
1229                # Need to write the ZIP64 end-of-archive records
1230                zip64endrec = struct.pack(
1231                        structEndArchive64, stringEndArchive64,
1232                        44, 45, 45, 0, 0, centDirCount, centDirCount,
1233                        centDirSize, centDirOffset)
1234                self.fp.write(zip64endrec)
1235
1236                zip64locrec = struct.pack(
1237                        structEndArchive64Locator,
1238                        stringEndArchive64Locator, 0, pos2, 1)
1239                self.fp.write(zip64locrec)
1240                centDirCount = min(centDirCount, 0xFFFF)
1241                centDirSize = min(centDirSize, 0xFFFFFFFF)
1242                centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1243
1244            # check for valid comment length
1245            if len(self.comment) >= ZIP_MAX_COMMENT:
1246                if self.debug > 0:
1247                    msg = 'Archive comment is too long; truncating to %d bytes' \
1248                          % ZIP_MAX_COMMENT
1249                self.comment = self.comment[:ZIP_MAX_COMMENT]
1250
1251            endrec = struct.pack(structEndArchive, stringEndArchive,
1252                                 0, 0, centDirCount, centDirCount,
1253                                 centDirSize, centDirOffset, len(self.comment))
1254            self.fp.write(endrec)
1255            self.fp.write(self.comment)
1256            self.fp.flush()
1257
1258        if not self._filePassed:
1259            self.fp.close()
1260        self.fp = None
1261
1262
1263class PyZipFile(ZipFile):
1264    """Class to create ZIP archives with Python library files and packages."""
1265
1266    def writepy(self, pathname, basename = ""):
1267        """Add all files from "pathname" to the ZIP archive.
1268
1269        If pathname is a package directory, search the directory and
1270        all package subdirectories recursively for all *.py and enter
1271        the modules into the archive.  If pathname is a plain
1272        directory, listdir *.py and enter all modules.  Else, pathname
1273        must be a Python *.py file and the module will be put into the
1274        archive.  Added modules are always module.pyo or module.pyc.
1275        This method will compile the module.py into module.pyc if
1276        necessary.
1277        """
1278        dir, name = os.path.split(pathname)
1279        if os.path.isdir(pathname):
1280            initname = os.path.join(pathname, "__init__.py")
1281            if os.path.isfile(initname):
1282                # This is a package directory, add it
1283                if basename:
1284                    basename = "%s/%s" % (basename, name)
1285                else:
1286                    basename = name
1287                if self.debug:
1288                    print "Adding package in", pathname, "as", basename
1289                fname, arcname = self._get_codename(initname[0:-3], basename)
1290                if self.debug:
1291                    print "Adding", arcname
1292                self.write(fname, arcname)
1293                dirlist = os.listdir(pathname)
1294                dirlist.remove("__init__.py")
1295                # Add all *.py files and package subdirectories
1296                for filename in dirlist:
1297                    path = os.path.join(pathname, filename)
1298                    root, ext = os.path.splitext(filename)
1299                    if os.path.isdir(path):
1300                        if os.path.isfile(os.path.join(path, "__init__.py")):
1301                            # This is a package directory, add it
1302                            self.writepy(path, basename)  # Recursive call
1303                    elif ext == ".py":
1304                        fname, arcname = self._get_codename(path[0:-3],
1305                                         basename)
1306                        if self.debug:
1307                            print "Adding", arcname
1308                        self.write(fname, arcname)
1309            else:
1310                # This is NOT a package directory, add its files at top level
1311                if self.debug:
1312                    print "Adding files from directory", pathname
1313                for filename in os.listdir(pathname):
1314                    path = os.path.join(pathname, filename)
1315                    root, ext = os.path.splitext(filename)
1316                    if ext == ".py":
1317                        fname, arcname = self._get_codename(path[0:-3],
1318                                         basename)
1319                        if self.debug:
1320                            print "Adding", arcname
1321                        self.write(fname, arcname)
1322        else:
1323            if pathname[-3:] != ".py":
1324                raise RuntimeError, \
1325                      'Files added with writepy() must end with ".py"'
1326            fname, arcname = self._get_codename(pathname[0:-3], basename)
1327            if self.debug:
1328                print "Adding file", arcname
1329            self.write(fname, arcname)
1330
1331    def _get_codename(self, pathname, basename):
1332        """Return (filename, archivename) for the path.
1333
1334        Given a module name path, return the correct file path and
1335        archive name, compiling if necessary.  For example, given
1336        /python/lib/string, return (/python/lib/string.pyc, string).
1337        """
1338        file_py  = pathname + ".py"
1339        file_pyc = pathname + ".pyc"
1340        file_pyo = pathname + ".pyo"
1341        if os.path.isfile(file_pyo) and \
1342                            os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1343            fname = file_pyo    # Use .pyo file
1344        elif not os.path.isfile(file_pyc) or \
1345             os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1346            import py_compile
1347            if self.debug:
1348                print "Compiling", file_py
1349            try:
1350                py_compile.compile(file_py, file_pyc, None, True)
1351            except py_compile.PyCompileError,err:
1352                print err.msg
1353            fname = file_pyc
1354        else:
1355            fname = file_pyc
1356        archivename = os.path.split(fname)[1]
1357        if basename:
1358            archivename = "%s/%s" % (basename, archivename)
1359        return (fname, archivename)
1360
1361
1362def main(args = None):
1363    import textwrap
1364    USAGE=textwrap.dedent("""\
1365        Usage:
1366            zipfile.py -l zipfile.zip        # Show listing of a zipfile
1367            zipfile.py -t zipfile.zip        # Test if a zipfile is valid
1368            zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1369            zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1370        """)
1371    if args is None:
1372        args = sys.argv[1:]
1373
1374    if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1375        print USAGE
1376        sys.exit(1)
1377
1378    if args[0] == '-l':
1379        if len(args) != 2:
1380            print USAGE
1381            sys.exit(1)
1382        zf = ZipFile(args[1], 'r')
1383        zf.printdir()
1384        zf.close()
1385
1386    elif args[0] == '-t':
1387        if len(args) != 2:
1388            print USAGE
1389            sys.exit(1)
1390        zf = ZipFile(args[1], 'r')
1391        badfile = zf.testzip()
1392        if badfile:
1393            print("The following enclosed file is corrupted: {!r}".format(badfile))
1394        print "Done testing"
1395
1396    elif args[0] == '-e':
1397        if len(args) != 3:
1398            print USAGE
1399            sys.exit(1)
1400
1401        zf = ZipFile(args[1], 'r')
1402        out = args[2]
1403        for path in zf.namelist():
1404            if path.startswith('./'):
1405                tgt = os.path.join(out, path[2:])
1406            else:
1407                tgt = os.path.join(out, path)
1408
1409            tgtdir = os.path.dirname(tgt)
1410            if not os.path.exists(tgtdir):
1411                os.makedirs(tgtdir)
1412            with open(tgt, 'wb') as fp:
1413                fp.write(zf.read(path))
1414        zf.close()
1415
1416    elif args[0] == '-c':
1417        if len(args) < 3:
1418            print USAGE
1419            sys.exit(1)
1420
1421        def addToZip(zf, path, zippath):
1422            if os.path.isfile(path):
1423                zf.write(path, zippath, ZIP_DEFLATED)
1424            elif os.path.isdir(path):
1425                for nm in os.listdir(path):
1426                    addToZip(zf,
1427                            os.path.join(path, nm), os.path.join(zippath, nm))
1428            # else: ignore
1429
1430        zf = ZipFile(args[1], 'w', allowZip64=True)
1431        for src in args[2:]:
1432            addToZip(zf, src, os.path.basename(src))
1433
1434        zf.close()
1435
1436if __name__ == "__main__":
1437    main()
1438