• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2Read and write ZIP files.
3"""
4import struct, os, time, sys, shutil
5import binascii, cStringIO, stat
6import io
7import re
8import string
9
10try:
11    import zlib # We may need its compression method
12    crc32 = zlib.crc32
13except ImportError:
14    zlib = None
15    crc32 = binascii.crc32
16
17__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
18           "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
19
20class BadZipfile(Exception):
21    pass
22
23
24class LargeZipFile(Exception):
25    """
26    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
27    and those extensions are disabled.
28    """
29
30error = BadZipfile      # The exception raised by this module
31
32ZIP64_LIMIT = (1 << 31) - 1
33ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
34ZIP_MAX_COMMENT = (1 << 16) - 1
35
36# constants for Zip file compression methods
37ZIP_STORED = 0
38ZIP_DEFLATED = 8
39# Other ZIP compression methods not supported
40
41# Below are some formats and associated data for reading/writing headers using
42# the struct module.  The names and structures of headers/records are those used
43# in the PKWARE description of the ZIP file format:
44#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
45# (URL valid as of January 2008)
46
47# The "end of central directory" structure, magic number, size, and indices
48# (section V.I in the format document)
49structEndArchive = "<4s4H2LH"
50stringEndArchive = "PK\005\006"
51sizeEndCentDir = struct.calcsize(structEndArchive)
52
53_ECD_SIGNATURE = 0
54_ECD_DISK_NUMBER = 1
55_ECD_DISK_START = 2
56_ECD_ENTRIES_THIS_DISK = 3
57_ECD_ENTRIES_TOTAL = 4
58_ECD_SIZE = 5
59_ECD_OFFSET = 6
60_ECD_COMMENT_SIZE = 7
61# These last two indices are not part of the structure as defined in the
62# spec, but they are used internally by this module as a convenience
63_ECD_COMMENT = 8
64_ECD_LOCATION = 9
65
66# The "central directory" structure, magic number, size, and indices
67# of entries in the structure (section V.F in the format document)
68structCentralDir = "<4s4B4HL2L5H2L"
69stringCentralDir = "PK\001\002"
70sizeCentralDir = struct.calcsize(structCentralDir)
71
72# indexes of entries in the central directory structure
73_CD_SIGNATURE = 0
74_CD_CREATE_VERSION = 1
75_CD_CREATE_SYSTEM = 2
76_CD_EXTRACT_VERSION = 3
77_CD_EXTRACT_SYSTEM = 4
78_CD_FLAG_BITS = 5
79_CD_COMPRESS_TYPE = 6
80_CD_TIME = 7
81_CD_DATE = 8
82_CD_CRC = 9
83_CD_COMPRESSED_SIZE = 10
84_CD_UNCOMPRESSED_SIZE = 11
85_CD_FILENAME_LENGTH = 12
86_CD_EXTRA_FIELD_LENGTH = 13
87_CD_COMMENT_LENGTH = 14
88_CD_DISK_NUMBER_START = 15
89_CD_INTERNAL_FILE_ATTRIBUTES = 16
90_CD_EXTERNAL_FILE_ATTRIBUTES = 17
91_CD_LOCAL_HEADER_OFFSET = 18
92
93# The "local file header" structure, magic number, size, and indices
94# (section V.A in the format document)
95structFileHeader = "<4s2B4HL2L2H"
96stringFileHeader = "PK\003\004"
97sizeFileHeader = struct.calcsize(structFileHeader)
98
99_FH_SIGNATURE = 0
100_FH_EXTRACT_VERSION = 1
101_FH_EXTRACT_SYSTEM = 2
102_FH_GENERAL_PURPOSE_FLAG_BITS = 3
103_FH_COMPRESSION_METHOD = 4
104_FH_LAST_MOD_TIME = 5
105_FH_LAST_MOD_DATE = 6
106_FH_CRC = 7
107_FH_COMPRESSED_SIZE = 8
108_FH_UNCOMPRESSED_SIZE = 9
109_FH_FILENAME_LENGTH = 10
110_FH_EXTRA_FIELD_LENGTH = 11
111
112# The "Zip64 end of central directory locator" structure, magic number, and size
113structEndArchive64Locator = "<4sLQL"
114stringEndArchive64Locator = "PK\x06\x07"
115sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
116
117# The "Zip64 end of central directory" record, magic number, size, and indices
118# (section V.G in the format document)
119structEndArchive64 = "<4sQ2H2L4Q"
120stringEndArchive64 = "PK\x06\x06"
121sizeEndCentDir64 = struct.calcsize(structEndArchive64)
122
123_CD64_SIGNATURE = 0
124_CD64_DIRECTORY_RECSIZE = 1
125_CD64_CREATE_VERSION = 2
126_CD64_EXTRACT_VERSION = 3
127_CD64_DISK_NUMBER = 4
128_CD64_DISK_NUMBER_START = 5
129_CD64_NUMBER_ENTRIES_THIS_DISK = 6
130_CD64_NUMBER_ENTRIES_TOTAL = 7
131_CD64_DIRECTORY_SIZE = 8
132_CD64_OFFSET_START_CENTDIR = 9
133
134def _check_zipfile(fp):
135    try:
136        if _EndRecData(fp):
137            return True         # file has correct magic number
138    except IOError:
139        pass
140    return False
141
142def is_zipfile(filename):
143    """Quickly see if a file is a ZIP file by checking the magic number.
144
145    The filename argument may be a file or file-like object too.
146    """
147    result = False
148    try:
149        if hasattr(filename, "read"):
150            result = _check_zipfile(fp=filename)
151        else:
152            with open(filename, "rb") as fp:
153                result = _check_zipfile(fp)
154    except IOError:
155        pass
156    return result
157
158def _EndRecData64(fpin, offset, endrec):
159    """
160    Read the ZIP64 end-of-archive records and use that to update endrec
161    """
162    try:
163        fpin.seek(offset - sizeEndCentDir64Locator, 2)
164    except IOError:
165        # If the seek fails, the file is not large enough to contain a ZIP64
166        # end-of-archive record, so just return the end record we were given.
167        return endrec
168
169    data = fpin.read(sizeEndCentDir64Locator)
170    if len(data) != sizeEndCentDir64Locator:
171        return endrec
172    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
173    if sig != stringEndArchive64Locator:
174        return endrec
175
176    if diskno != 0 or disks != 1:
177        raise BadZipfile("zipfiles that span multiple disks are not supported")
178
179    # Assume no 'zip64 extensible data'
180    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
181    data = fpin.read(sizeEndCentDir64)
182    if len(data) != sizeEndCentDir64:
183        return endrec
184    sig, sz, create_version, read_version, disk_num, disk_dir, \
185            dircount, dircount2, dirsize, diroffset = \
186            struct.unpack(structEndArchive64, data)
187    if sig != stringEndArchive64:
188        return endrec
189
190    # Update the original endrec using data from the ZIP64 record
191    endrec[_ECD_SIGNATURE] = sig
192    endrec[_ECD_DISK_NUMBER] = disk_num
193    endrec[_ECD_DISK_START] = disk_dir
194    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
195    endrec[_ECD_ENTRIES_TOTAL] = dircount2
196    endrec[_ECD_SIZE] = dirsize
197    endrec[_ECD_OFFSET] = diroffset
198    return endrec
199
200
201def _EndRecData(fpin):
202    """Return data from the "End of Central Directory" record, or None.
203
204    The data is a list of the nine items in the ZIP "End of central dir"
205    record followed by a tenth item, the file seek offset of this record."""
206
207    # Determine file size
208    fpin.seek(0, 2)
209    filesize = fpin.tell()
210
211    # Check to see if this is ZIP file with no archive comment (the
212    # "end of central directory" structure should be the last item in the
213    # file if this is the case).
214    try:
215        fpin.seek(-sizeEndCentDir, 2)
216    except IOError:
217        return None
218    data = fpin.read()
219    if (len(data) == sizeEndCentDir and
220        data[0:4] == stringEndArchive and
221        data[-2:] == b"\000\000"):
222        # the signature is correct and there's no comment, unpack structure
223        endrec = struct.unpack(structEndArchive, data)
224        endrec=list(endrec)
225
226        # Append a blank comment and record start offset
227        endrec.append("")
228        endrec.append(filesize - sizeEndCentDir)
229
230        # Try to read the "Zip64 end of central directory" structure
231        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
232
233    # Either this is not a ZIP file, or it is a ZIP file with an archive
234    # comment.  Search the end of the file for the "end of central directory"
235    # record signature. The comment is the last item in the ZIP file and may be
236    # up to 64K long.  It is assumed that the "end of central directory" magic
237    # number does not appear in the comment.
238    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
239    fpin.seek(maxCommentStart, 0)
240    data = fpin.read()
241    start = data.rfind(stringEndArchive)
242    if start >= 0:
243        # found the magic number; attempt to unpack and interpret
244        recData = data[start:start+sizeEndCentDir]
245        if len(recData) != sizeEndCentDir:
246            # Zip file is corrupted.
247            return None
248        endrec = list(struct.unpack(structEndArchive, recData))
249        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
250        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
251        endrec.append(comment)
252        endrec.append(maxCommentStart + start)
253
254        # Try to read the "Zip64 end of central directory" structure
255        return _EndRecData64(fpin, maxCommentStart + start - filesize,
256                             endrec)
257
258    # Unable to find a valid end of central directory structure
259    return None
260
261
262class ZipInfo (object):
263    """Class with attributes describing each file in the ZIP archive."""
264
265    __slots__ = (
266            'orig_filename',
267            'filename',
268            'date_time',
269            'compress_type',
270            'comment',
271            'extra',
272            'create_system',
273            'create_version',
274            'extract_version',
275            'reserved',
276            'flag_bits',
277            'volume',
278            'internal_attr',
279            'external_attr',
280            'header_offset',
281            'CRC',
282            'compress_size',
283            'file_size',
284            '_raw_time',
285        )
286
287    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
288        self.orig_filename = filename   # Original file name in archive
289
290        # Terminate the file name at the first null byte.  Null bytes in file
291        # names are used as tricks by viruses in archives.
292        null_byte = filename.find(chr(0))
293        if null_byte >= 0:
294            filename = filename[0:null_byte]
295        # This is used to ensure paths in generated ZIP files always use
296        # forward slashes as the directory separator, as required by the
297        # ZIP format specification.
298        if os.sep != "/" and os.sep in filename:
299            filename = filename.replace(os.sep, "/")
300
301        self.filename = filename        # Normalized file name
302        self.date_time = date_time      # year, month, day, hour, min, sec
303
304        if date_time[0] < 1980:
305            raise ValueError('ZIP does not support timestamps before 1980')
306
307        # Standard values:
308        self.compress_type = ZIP_STORED # Type of compression for the file
309        self.comment = ""               # Comment for each file
310        self.extra = ""                 # ZIP extra data
311        if sys.platform == 'win32':
312            self.create_system = 0          # System which created ZIP archive
313        else:
314            # Assume everything else is unix-y
315            self.create_system = 3          # System which created ZIP archive
316        self.create_version = 20        # Version which created ZIP archive
317        self.extract_version = 20       # Version needed to extract archive
318        self.reserved = 0               # Must be zero
319        self.flag_bits = 0              # ZIP flag bits
320        self.volume = 0                 # Volume number of file header
321        self.internal_attr = 0          # Internal attributes
322        self.external_attr = 0          # External file attributes
323        # Other attributes are set by class ZipFile:
324        # header_offset         Byte offset to the file header
325        # CRC                   CRC-32 of the uncompressed file
326        # compress_size         Size of the compressed file
327        # file_size             Size of the uncompressed file
328
329    def FileHeader(self, zip64=None):
330        """Return the per-file header as a string."""
331        dt = self.date_time
332        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
333        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
334        if self.flag_bits & 0x08:
335            # Set these to zero because we write them after the file data
336            CRC = compress_size = file_size = 0
337        else:
338            CRC = self.CRC
339            compress_size = self.compress_size
340            file_size = self.file_size
341
342        extra = self.extra
343
344        if zip64 is None:
345            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
346        if zip64:
347            fmt = '<HHQQ'
348            extra = extra + struct.pack(fmt,
349                    1, struct.calcsize(fmt)-4, file_size, compress_size)
350        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
351            if not zip64:
352                raise LargeZipFile("Filesize would require ZIP64 extensions")
353            # File is larger than what fits into a 4 byte integer,
354            # fall back to the ZIP64 extension
355            file_size = 0xffffffff
356            compress_size = 0xffffffff
357            self.extract_version = max(45, self.extract_version)
358            self.create_version = max(45, self.extract_version)
359
360        filename, flag_bits = self._encodeFilenameFlags()
361        header = struct.pack(structFileHeader, stringFileHeader,
362                 self.extract_version, self.reserved, flag_bits,
363                 self.compress_type, dostime, dosdate, CRC,
364                 compress_size, file_size,
365                 len(filename), len(extra))
366        return header + filename + extra
367
368    def _encodeFilenameFlags(self):
369        if isinstance(self.filename, unicode):
370            try:
371                return self.filename.encode('ascii'), self.flag_bits
372            except UnicodeEncodeError:
373                return self.filename.encode('utf-8'), self.flag_bits | 0x800
374        else:
375            return self.filename, self.flag_bits
376
377    def _decodeFilename(self):
378        if self.flag_bits & 0x800:
379            return self.filename.decode('utf-8')
380        else:
381            return self.filename
382
383    def _decodeExtra(self):
384        # Try to decode the extra field.
385        extra = self.extra
386        unpack = struct.unpack
387        while len(extra) >= 4:
388            tp, ln = unpack('<HH', extra[:4])
389            if tp == 1:
390                if ln >= 24:
391                    counts = unpack('<QQQ', extra[4:28])
392                elif ln == 16:
393                    counts = unpack('<QQ', extra[4:20])
394                elif ln == 8:
395                    counts = unpack('<Q', extra[4:12])
396                elif ln == 0:
397                    counts = ()
398                else:
399                    raise RuntimeError, "Corrupt extra field %s"%(ln,)
400
401                idx = 0
402
403                # ZIP64 extension (large files and/or large archives)
404                if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
405                    self.file_size = counts[idx]
406                    idx += 1
407
408                if self.compress_size == 0xFFFFFFFFL:
409                    self.compress_size = counts[idx]
410                    idx += 1
411
412                if self.header_offset == 0xffffffffL:
413                    old = self.header_offset
414                    self.header_offset = counts[idx]
415                    idx+=1
416
417            extra = extra[ln+4:]
418
419
420class _ZipDecrypter:
421    """Class to handle decryption of files stored within a ZIP archive.
422
423    ZIP supports a password-based form of encryption. Even though known
424    plaintext attacks have been found against it, it is still useful
425    to be able to get data out of such a file.
426
427    Usage:
428        zd = _ZipDecrypter(mypwd)
429        plain_char = zd(cypher_char)
430        plain_text = map(zd, cypher_text)
431    """
432
433    def _GenerateCRCTable():
434        """Generate a CRC-32 table.
435
436        ZIP encryption uses the CRC32 one-byte primitive for scrambling some
437        internal keys. We noticed that a direct implementation is faster than
438        relying on binascii.crc32().
439        """
440        poly = 0xedb88320
441        table = [0] * 256
442        for i in range(256):
443            crc = i
444            for j in range(8):
445                if crc & 1:
446                    crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
447                else:
448                    crc = ((crc >> 1) & 0x7FFFFFFF)
449            table[i] = crc
450        return table
451    crctable = _GenerateCRCTable()
452
453    def _crc32(self, ch, crc):
454        """Compute the CRC32 primitive on one byte."""
455        return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
456
457    def __init__(self, pwd):
458        self.key0 = 305419896
459        self.key1 = 591751049
460        self.key2 = 878082192
461        for p in pwd:
462            self._UpdateKeys(p)
463
464    def _UpdateKeys(self, c):
465        self.key0 = self._crc32(c, self.key0)
466        self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
467        self.key1 = (self.key1 * 134775813 + 1) & 4294967295
468        self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
469
470    def __call__(self, c):
471        """Decrypt a single character."""
472        c = ord(c)
473        k = self.key2 | 2
474        c = c ^ (((k * (k^1)) >> 8) & 255)
475        c = chr(c)
476        self._UpdateKeys(c)
477        return c
478
479
480compressor_names = {
481    0: 'store',
482    1: 'shrink',
483    2: 'reduce',
484    3: 'reduce',
485    4: 'reduce',
486    5: 'reduce',
487    6: 'implode',
488    7: 'tokenize',
489    8: 'deflate',
490    9: 'deflate64',
491    10: 'implode',
492    12: 'bzip2',
493    14: 'lzma',
494    18: 'terse',
495    19: 'lz77',
496    97: 'wavpack',
497    98: 'ppmd',
498}
499
500
501class ZipExtFile(io.BufferedIOBase):
502    """File-like object for reading an archive member.
503       Is returned by ZipFile.open().
504    """
505
506    # Max size supported by decompressor.
507    MAX_N = 1 << 31 - 1
508
509    # Read from compressed files in 4k blocks.
510    MIN_READ_SIZE = 4096
511
512    # Search for universal newlines or line chunks.
513    PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
514
515    def __init__(self, fileobj, mode, zipinfo, decrypter=None,
516            close_fileobj=False):
517        self._fileobj = fileobj
518        self._decrypter = decrypter
519        self._close_fileobj = close_fileobj
520
521        self._compress_type = zipinfo.compress_type
522        self._compress_size = zipinfo.compress_size
523        self._compress_left = zipinfo.compress_size
524
525        if self._compress_type == ZIP_DEFLATED:
526            self._decompressor = zlib.decompressobj(-15)
527        elif self._compress_type != ZIP_STORED:
528            descr = compressor_names.get(self._compress_type)
529            if descr:
530                raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
531            else:
532                raise NotImplementedError("compression type %d" % (self._compress_type,))
533        self._unconsumed = ''
534
535        self._readbuffer = ''
536        self._offset = 0
537
538        self._universal = 'U' in mode
539        self.newlines = None
540
541        # Adjust read size for encrypted files since the first 12 bytes
542        # are for the encryption/password information.
543        if self._decrypter is not None:
544            self._compress_left -= 12
545
546        self.mode = mode
547        self.name = zipinfo.filename
548
549        if hasattr(zipinfo, 'CRC'):
550            self._expected_crc = zipinfo.CRC
551            self._running_crc = crc32(b'') & 0xffffffff
552        else:
553            self._expected_crc = None
554
555    def readline(self, limit=-1):
556        """Read and return a line from the stream.
557
558        If limit is specified, at most limit bytes will be read.
559        """
560
561        if not self._universal and limit < 0:
562            # Shortcut common case - newline found in buffer.
563            i = self._readbuffer.find('\n', self._offset) + 1
564            if i > 0:
565                line = self._readbuffer[self._offset: i]
566                self._offset = i
567                return line
568
569        if not self._universal:
570            return io.BufferedIOBase.readline(self, limit)
571
572        line = ''
573        while limit < 0 or len(line) < limit:
574            readahead = self.peek(2)
575            if readahead == '':
576                return line
577
578            #
579            # Search for universal newlines or line chunks.
580            #
581            # The pattern returns either a line chunk or a newline, but not
582            # both. Combined with peek(2), we are assured that the sequence
583            # '\r\n' is always retrieved completely and never split into
584            # separate newlines - '\r', '\n' due to coincidental readaheads.
585            #
586            match = self.PATTERN.search(readahead)
587            newline = match.group('newline')
588            if newline is not None:
589                if self.newlines is None:
590                    self.newlines = []
591                if newline not in self.newlines:
592                    self.newlines.append(newline)
593                self._offset += len(newline)
594                return line + '\n'
595
596            chunk = match.group('chunk')
597            if limit >= 0:
598                chunk = chunk[: limit - len(line)]
599
600            self._offset += len(chunk)
601            line += chunk
602
603        return line
604
605    def peek(self, n=1):
606        """Returns buffered bytes without advancing the position."""
607        if n > len(self._readbuffer) - self._offset:
608            chunk = self.read(n)
609            if len(chunk) > self._offset:
610                self._readbuffer = chunk + self._readbuffer[self._offset:]
611                self._offset = 0
612            else:
613                self._offset -= len(chunk)
614
615        # Return up to 512 bytes to reduce allocation overhead for tight loops.
616        return self._readbuffer[self._offset: self._offset + 512]
617
618    def readable(self):
619        return True
620
621    def read(self, n=-1):
622        """Read and return up to n bytes.
623        If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
624        """
625        buf = ''
626        if n is None:
627            n = -1
628        while True:
629            if n < 0:
630                data = self.read1(n)
631            elif n > len(buf):
632                data = self.read1(n - len(buf))
633            else:
634                return buf
635            if len(data) == 0:
636                return buf
637            buf += data
638
639    def _update_crc(self, newdata, eof):
640        # Update the CRC using the given data.
641        if self._expected_crc is None:
642            # No need to compute the CRC if we don't have a reference value
643            return
644        self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
645        # Check the CRC if we're at the end of the file
646        if eof and self._running_crc != self._expected_crc:
647            raise BadZipfile("Bad CRC-32 for file %r" % self.name)
648
649    def read1(self, n):
650        """Read up to n bytes with at most one read() system call."""
651
652        # Simplify algorithm (branching) by transforming negative n to large n.
653        if n < 0 or n is None:
654            n = self.MAX_N
655
656        # Bytes available in read buffer.
657        len_readbuffer = len(self._readbuffer) - self._offset
658
659        # Read from file.
660        if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
661            nbytes = n - len_readbuffer - len(self._unconsumed)
662            nbytes = max(nbytes, self.MIN_READ_SIZE)
663            nbytes = min(nbytes, self._compress_left)
664
665            data = self._fileobj.read(nbytes)
666            self._compress_left -= len(data)
667
668            if data and self._decrypter is not None:
669                data = ''.join(map(self._decrypter, data))
670
671            if self._compress_type == ZIP_STORED:
672                self._update_crc(data, eof=(self._compress_left==0))
673                self._readbuffer = self._readbuffer[self._offset:] + data
674                self._offset = 0
675            else:
676                # Prepare deflated bytes for decompression.
677                self._unconsumed += data
678
679        # Handle unconsumed data.
680        if (len(self._unconsumed) > 0 and n > len_readbuffer and
681            self._compress_type == ZIP_DEFLATED):
682            data = self._decompressor.decompress(
683                self._unconsumed,
684                max(n - len_readbuffer, self.MIN_READ_SIZE)
685            )
686
687            self._unconsumed = self._decompressor.unconsumed_tail
688            eof = len(self._unconsumed) == 0 and self._compress_left == 0
689            if eof:
690                data += self._decompressor.flush()
691
692            self._update_crc(data, eof=eof)
693            self._readbuffer = self._readbuffer[self._offset:] + data
694            self._offset = 0
695
696        # Read from buffer.
697        data = self._readbuffer[self._offset: self._offset + n]
698        self._offset += len(data)
699        return data
700
701    def close(self):
702        try :
703            if self._close_fileobj:
704                self._fileobj.close()
705        finally:
706            super(ZipExtFile, self).close()
707
708
709class ZipFile(object):
710    """ Class with methods to open, read, write, close, list zip files.
711
712    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
713
714    file: Either the path to the file, or a file-like object.
715          If it is a path, the file will be opened and closed by ZipFile.
716    mode: The mode can be either read "r", write "w" or append "a".
717    compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
718    allowZip64: if True ZipFile will create files with ZIP64 extensions when
719                needed, otherwise it will raise an exception when this would
720                be necessary.
721
722    """
723
724    fp = None                   # Set here since __del__ checks it
725
726    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
727        """Open the ZIP file with mode read "r", write "w" or append "a"."""
728        if mode not in ("r", "w", "a"):
729            raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
730
731        if compression == ZIP_STORED:
732            pass
733        elif compression == ZIP_DEFLATED:
734            if not zlib:
735                raise RuntimeError,\
736                      "Compression requires the (missing) zlib module"
737        else:
738            raise RuntimeError, "That compression method is not supported"
739
740        self._allowZip64 = allowZip64
741        self._didModify = False
742        self.debug = 0  # Level of printing: 0 through 3
743        self.NameToInfo = {}    # Find file info given name
744        self.filelist = []      # List of ZipInfo instances for archive
745        self.compression = compression  # Method of compression
746        self.mode = key = mode.replace('b', '')[0]
747        self.pwd = None
748        self._comment = ''
749
750        # Check if we were passed a file-like object
751        if isinstance(file, basestring):
752            self._filePassed = 0
753            self.filename = file
754            modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
755            try:
756                self.fp = open(file, modeDict[mode])
757            except IOError:
758                if mode == 'a':
759                    mode = key = 'w'
760                    self.fp = open(file, modeDict[mode])
761                else:
762                    raise
763        else:
764            self._filePassed = 1
765            self.fp = file
766            self.filename = getattr(file, 'name', None)
767
768        try:
769            if key == 'r':
770                self._RealGetContents()
771            elif key == 'w':
772                # set the modified flag so central directory gets written
773                # even if no files are added to the archive
774                self._didModify = True
775                self._start_disk = self.fp.tell()
776            elif key == 'a':
777                try:
778                    # See if file is a zip file
779                    self._RealGetContents()
780                    # seek to start of directory and overwrite
781                    self.fp.seek(self.start_dir, 0)
782                except BadZipfile:
783                    # file is not a zip file, just append
784                    self.fp.seek(0, 2)
785
786                    # set the modified flag so central directory gets written
787                    # even if no files are added to the archive
788                    self._didModify = True
789                    self._start_disk = self.fp.tell()
790            else:
791                raise RuntimeError('Mode must be "r", "w" or "a"')
792        except:
793            fp = self.fp
794            self.fp = None
795            if not self._filePassed:
796                fp.close()
797            raise
798
799    def __enter__(self):
800        return self
801
802    def __exit__(self, type, value, traceback):
803        self.close()
804
805    def _RealGetContents(self):
806        """Read in the table of contents for the ZIP file."""
807        fp = self.fp
808        try:
809            endrec = _EndRecData(fp)
810        except IOError:
811            raise BadZipfile("File is not a zip file")
812        if not endrec:
813            raise BadZipfile, "File is not a zip file"
814        if self.debug > 1:
815            print endrec
816        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
817        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
818        self._comment = endrec[_ECD_COMMENT]    # archive comment
819
820        # self._start_disk:  Position of the start of ZIP archive
821        # It is zero, unless ZIP was concatenated to another file
822        self._start_disk = endrec[_ECD_LOCATION] - size_cd - offset_cd
823        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
824            # If Zip64 extension structures are present, account for them
825            self._start_disk -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
826
827        if self.debug > 2:
828            inferred = self._start_disk + offset_cd
829            print "given, inferred, offset", offset_cd, inferred, self._start_disk
830        # self.start_dir:  Position of start of central directory
831        self.start_dir = offset_cd + self._start_disk
832        fp.seek(self.start_dir, 0)
833        data = fp.read(size_cd)
834        fp = cStringIO.StringIO(data)
835        total = 0
836        while total < size_cd:
837            centdir = fp.read(sizeCentralDir)
838            if len(centdir) != sizeCentralDir:
839                raise BadZipfile("Truncated central directory")
840            centdir = struct.unpack(structCentralDir, centdir)
841            if centdir[_CD_SIGNATURE] != stringCentralDir:
842                raise BadZipfile("Bad magic number for central directory")
843            if self.debug > 2:
844                print centdir
845            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
846            # Create ZipInfo instance to store file information
847            x = ZipInfo(filename)
848            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
849            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
850            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
851            (x.create_version, x.create_system, x.extract_version, x.reserved,
852                x.flag_bits, x.compress_type, t, d,
853                x.CRC, x.compress_size, x.file_size) = centdir[1:12]
854            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
855            # Convert date/time code to (year, month, day, hour, min, sec)
856            x._raw_time = t
857            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
858                                     t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
859
860            x._decodeExtra()
861            x.header_offset = x.header_offset + self._start_disk
862            x.filename = x._decodeFilename()
863            self.filelist.append(x)
864            self.NameToInfo[x.filename] = x
865
866            # update total bytes read from central directory
867            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
868                     + centdir[_CD_EXTRA_FIELD_LENGTH]
869                     + centdir[_CD_COMMENT_LENGTH])
870
871            if self.debug > 2:
872                print "total", total
873
874
875    def namelist(self):
876        """Return a list of file names in the archive."""
877        l = []
878        for data in self.filelist:
879            l.append(data.filename)
880        return l
881
882    def infolist(self):
883        """Return a list of class ZipInfo instances for files in the
884        archive."""
885        return self.filelist
886
887    def printdir(self):
888        """Print a table of contents for the zip file."""
889        print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
890        for zinfo in self.filelist:
891            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
892            print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
893
894    def testzip(self):
895        """Read all the files and check the CRC."""
896        chunk_size = 2 ** 20
897        for zinfo in self.filelist:
898            try:
899                # Read by chunks, to avoid an OverflowError or a
900                # MemoryError with very large embedded files.
901                with self.open(zinfo.filename, "r") as f:
902                    while f.read(chunk_size):     # Check CRC-32
903                        pass
904            except BadZipfile:
905                return zinfo.filename
906
907    def getinfo(self, name):
908        """Return the instance of ZipInfo given 'name'."""
909        info = self.NameToInfo.get(name)
910        if info is None:
911            raise KeyError(
912                'There is no item named %r in the archive' % name)
913
914        return info
915
916    def setpassword(self, pwd):
917        """Set default password for encrypted files."""
918        self.pwd = pwd
919
920    @property
921    def comment(self):
922        """The comment text associated with the ZIP file."""
923        return self._comment
924
925    @comment.setter
926    def comment(self, comment):
927        # check for valid comment length
928        if len(comment) > ZIP_MAX_COMMENT:
929            import warnings
930            warnings.warn('Archive comment is too long; truncating to %d bytes'
931                          % ZIP_MAX_COMMENT, stacklevel=2)
932            comment = comment[:ZIP_MAX_COMMENT]
933        self._comment = comment
934        self._didModify = True
935
936    def read(self, name, pwd=None):
937        """Return file bytes (as a string) for name."""
938        return self.open(name, "r", pwd).read()
939
940    def open(self, name, mode="r", pwd=None):
941        """Return file-like object for 'name'."""
942        if mode not in ("r", "U", "rU"):
943            raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
944        if not self.fp:
945            raise RuntimeError, \
946                  "Attempt to read ZIP archive that was already closed"
947
948        # Only open a new file for instances where we were not
949        # given a file object in the constructor
950        if self._filePassed:
951            zef_file = self.fp
952            should_close = False
953        else:
954            zef_file = open(self.filename, 'rb')
955            should_close = True
956
957        try:
958            # Make sure we have an info object
959            if isinstance(name, ZipInfo):
960                # 'name' is already an info object
961                zinfo = name
962            else:
963                # Get info object for name
964                zinfo = self.getinfo(name)
965
966            zef_file.seek(zinfo.header_offset, 0)
967
968            # Skip the file header:
969            fheader = zef_file.read(sizeFileHeader)
970            if len(fheader) != sizeFileHeader:
971                raise BadZipfile("Truncated file header")
972            fheader = struct.unpack(structFileHeader, fheader)
973            if fheader[_FH_SIGNATURE] != stringFileHeader:
974                raise BadZipfile("Bad magic number for file header")
975
976            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
977            if fheader[_FH_EXTRA_FIELD_LENGTH]:
978                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
979
980            if fname != zinfo.orig_filename:
981                raise BadZipfile, \
982                        'File name in directory "%s" and header "%s" differ.' % (
983                            zinfo.orig_filename, fname)
984
985            # check for encrypted flag & handle password
986            is_encrypted = zinfo.flag_bits & 0x1
987            zd = None
988            if is_encrypted:
989                if not pwd:
990                    pwd = self.pwd
991                if not pwd:
992                    raise RuntimeError, "File %s is encrypted, " \
993                        "password required for extraction" % name
994
995                zd = _ZipDecrypter(pwd)
996                # The first 12 bytes in the cypher stream is an encryption header
997                #  used to strengthen the algorithm. The first 11 bytes are
998                #  completely random, while the 12th contains the MSB of the CRC,
999                #  or the MSB of the file time depending on the header type
1000                #  and is used to check the correctness of the password.
1001                bytes = zef_file.read(12)
1002                h = map(zd, bytes[0:12])
1003                if zinfo.flag_bits & 0x8:
1004                    # compare against the file type from extended local headers
1005                    check_byte = (zinfo._raw_time >> 8) & 0xff
1006                else:
1007                    # compare against the CRC otherwise
1008                    check_byte = (zinfo.CRC >> 24) & 0xff
1009                if ord(h[11]) != check_byte:
1010                    raise RuntimeError("Bad password for file", name)
1011
1012            return ZipExtFile(zef_file, mode, zinfo, zd,
1013                    close_fileobj=should_close)
1014        except:
1015            if should_close:
1016                zef_file.close()
1017            raise
1018
1019    def extract(self, member, path=None, pwd=None):
1020        """Extract a member from the archive to the current working directory,
1021           using its full name. Its file information is extracted as accurately
1022           as possible. `member' may be a filename or a ZipInfo object. You can
1023           specify a different directory using `path'.
1024        """
1025        if not isinstance(member, ZipInfo):
1026            member = self.getinfo(member)
1027
1028        if path is None:
1029            path = os.getcwd()
1030
1031        return self._extract_member(member, path, pwd)
1032
1033    def extractall(self, path=None, members=None, pwd=None):
1034        """Extract all members from the archive to the current working
1035           directory. `path' specifies a different directory to extract to.
1036           `members' is optional and must be a subset of the list returned
1037           by namelist().
1038        """
1039        if members is None:
1040            members = self.namelist()
1041
1042        for zipinfo in members:
1043            self.extract(zipinfo, path, pwd)
1044
1045    def _extract_member(self, member, targetpath, pwd):
1046        """Extract the ZipInfo object 'member' to a physical
1047           file on the path targetpath.
1048        """
1049        # build the destination pathname, replacing
1050        # forward slashes to platform specific separators.
1051        arcname = member.filename.replace('/', os.path.sep)
1052
1053        if os.path.altsep:
1054            arcname = arcname.replace(os.path.altsep, os.path.sep)
1055        # interpret absolute pathname as relative, remove drive letter or
1056        # UNC path, redundant separators, "." and ".." components.
1057        arcname = os.path.splitdrive(arcname)[1]
1058        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1059                    if x not in ('', os.path.curdir, os.path.pardir))
1060        if os.path.sep == '\\':
1061            # filter illegal characters on Windows
1062            illegal = ':<>|"?*'
1063            if isinstance(arcname, unicode):
1064                table = {ord(c): ord('_') for c in illegal}
1065            else:
1066                table = string.maketrans(illegal, '_' * len(illegal))
1067            arcname = arcname.translate(table)
1068            # remove trailing dots
1069            arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
1070            arcname = os.path.sep.join(x for x in arcname if x)
1071
1072        targetpath = os.path.join(targetpath, arcname)
1073        targetpath = os.path.normpath(targetpath)
1074
1075        # Create all upper directories if necessary.
1076        upperdirs = os.path.dirname(targetpath)
1077        if upperdirs and not os.path.exists(upperdirs):
1078            os.makedirs(upperdirs)
1079
1080        if member.filename[-1] == '/':
1081            if not os.path.isdir(targetpath):
1082                os.mkdir(targetpath)
1083            return targetpath
1084
1085        with self.open(member, pwd=pwd) as source, \
1086             file(targetpath, "wb") as target:
1087            shutil.copyfileobj(source, target)
1088
1089        return targetpath
1090
1091    def _writecheck(self, zinfo):
1092        """Check for errors before writing a file to the archive."""
1093        if zinfo.filename in self.NameToInfo:
1094            import warnings
1095            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1096        if self.mode not in ("w", "a"):
1097            raise RuntimeError, 'write() requires mode "w" or "a"'
1098        if not self.fp:
1099            raise RuntimeError, \
1100                  "Attempt to write ZIP archive that was already closed"
1101        if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1102            raise RuntimeError, \
1103                  "Compression requires the (missing) zlib module"
1104        if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1105            raise RuntimeError, \
1106                  "That compression method is not supported"
1107        if not self._allowZip64:
1108            requires_zip64 = None
1109            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1110                requires_zip64 = "Files count"
1111            elif zinfo.file_size > ZIP64_LIMIT:
1112                requires_zip64 = "Filesize"
1113            elif zinfo.header_offset > ZIP64_LIMIT:
1114                requires_zip64 = "Zipfile size"
1115            if requires_zip64:
1116                raise LargeZipFile(requires_zip64 +
1117                                   " would require ZIP64 extensions")
1118
1119    def write(self, filename, arcname=None, compress_type=None):
1120        """Put the bytes from filename into the archive under the name
1121        arcname."""
1122        if not self.fp:
1123            raise RuntimeError(
1124                  "Attempt to write to ZIP archive that was already closed")
1125
1126        st = os.stat(filename)
1127        isdir = stat.S_ISDIR(st.st_mode)
1128        mtime = time.localtime(st.st_mtime)
1129        date_time = mtime[0:6]
1130        # Create ZipInfo instance to store file information
1131        if arcname is None:
1132            arcname = filename
1133        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1134        while arcname[0] in (os.sep, os.altsep):
1135            arcname = arcname[1:]
1136        if isdir:
1137            arcname += '/'
1138        zinfo = ZipInfo(arcname, date_time)
1139        zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
1140        if isdir:
1141            zinfo.compress_type = ZIP_STORED
1142        elif compress_type is None:
1143            zinfo.compress_type = self.compression
1144        else:
1145            zinfo.compress_type = compress_type
1146
1147        zinfo.file_size = st.st_size
1148        zinfo.flag_bits = 0x00
1149        zinfo.header_offset = self.fp.tell()    # Start of header bytes
1150
1151        self._writecheck(zinfo)
1152        self._didModify = True
1153
1154        if isdir:
1155            zinfo.file_size = 0
1156            zinfo.compress_size = 0
1157            zinfo.CRC = 0
1158            zinfo.external_attr |= 0x10  # MS-DOS directory flag
1159            self.filelist.append(zinfo)
1160            self.NameToInfo[zinfo.filename] = zinfo
1161            self.fp.write(zinfo.FileHeader(False))
1162            return
1163
1164        with open(filename, "rb") as fp:
1165            # Must overwrite CRC and sizes with correct data later
1166            zinfo.CRC = CRC = 0
1167            zinfo.compress_size = compress_size = 0
1168            # Compressed size can be larger than uncompressed size
1169            zip64 = self._allowZip64 and \
1170                    zinfo.file_size * 1.05 > ZIP64_LIMIT
1171            self.fp.write(zinfo.FileHeader(zip64))
1172            if zinfo.compress_type == ZIP_DEFLATED:
1173                cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1174                     zlib.DEFLATED, -15)
1175            else:
1176                cmpr = None
1177            file_size = 0
1178            while 1:
1179                buf = fp.read(1024 * 8)
1180                if not buf:
1181                    break
1182                file_size = file_size + len(buf)
1183                CRC = crc32(buf, CRC) & 0xffffffff
1184                if cmpr:
1185                    buf = cmpr.compress(buf)
1186                    compress_size = compress_size + len(buf)
1187                self.fp.write(buf)
1188        if cmpr:
1189            buf = cmpr.flush()
1190            compress_size = compress_size + len(buf)
1191            self.fp.write(buf)
1192            zinfo.compress_size = compress_size
1193        else:
1194            zinfo.compress_size = file_size
1195        zinfo.CRC = CRC
1196        zinfo.file_size = file_size
1197        if not zip64 and self._allowZip64:
1198            if file_size > ZIP64_LIMIT:
1199                raise RuntimeError('File size has increased during compressing')
1200            if compress_size > ZIP64_LIMIT:
1201                raise RuntimeError('Compressed size larger than uncompressed size')
1202        # Seek backwards and write file header (which will now include
1203        # correct CRC and file sizes)
1204        position = self.fp.tell() # Preserve current position in file
1205        self.fp.seek(zinfo.header_offset, 0)
1206        self.fp.write(zinfo.FileHeader(zip64))
1207        self.fp.seek(position, 0)
1208        self.filelist.append(zinfo)
1209        self.NameToInfo[zinfo.filename] = zinfo
1210
1211    def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
1212        """Write a file into the archive.  The contents is the string
1213        'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
1214        the name of the file in the archive."""
1215        if not isinstance(zinfo_or_arcname, ZipInfo):
1216            zinfo = ZipInfo(filename=zinfo_or_arcname,
1217                            date_time=time.localtime(time.time())[:6])
1218
1219            zinfo.compress_type = self.compression
1220            if zinfo.filename[-1] == '/':
1221                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1222                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1223            else:
1224                zinfo.external_attr = 0o600 << 16     # ?rw-------
1225        else:
1226            zinfo = zinfo_or_arcname
1227
1228        if not self.fp:
1229            raise RuntimeError(
1230                  "Attempt to write to ZIP archive that was already closed")
1231
1232        if compress_type is not None:
1233            zinfo.compress_type = compress_type
1234
1235        zinfo.file_size = len(bytes)            # Uncompressed size
1236        zinfo.header_offset = self.fp.tell()    # Start of header bytes
1237        self._writecheck(zinfo)
1238        self._didModify = True
1239        zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum
1240        if zinfo.compress_type == ZIP_DEFLATED:
1241            co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1242                 zlib.DEFLATED, -15)
1243            bytes = co.compress(bytes) + co.flush()
1244            zinfo.compress_size = len(bytes)    # Compressed size
1245        else:
1246            zinfo.compress_size = zinfo.file_size
1247        zip64 = zinfo.file_size > ZIP64_LIMIT or \
1248                zinfo.compress_size > ZIP64_LIMIT
1249        if zip64 and not self._allowZip64:
1250            raise LargeZipFile("Filesize would require ZIP64 extensions")
1251        self.fp.write(zinfo.FileHeader(zip64))
1252        self.fp.write(bytes)
1253        if zinfo.flag_bits & 0x08:
1254            # Write CRC and file sizes after the file data
1255            fmt = '<LQQ' if zip64 else '<LLL'
1256            self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1257                  zinfo.file_size))
1258        self.fp.flush()
1259        self.filelist.append(zinfo)
1260        self.NameToInfo[zinfo.filename] = zinfo
1261
1262    def __del__(self):
1263        """Call the "close()" method in case the user forgot."""
1264        self.close()
1265
1266    def close(self):
1267        """Close the file, and for mode "w" and "a" write the ending
1268        records."""
1269        if self.fp is None:
1270            return
1271
1272        try:
1273            if self.mode in ("w", "a") and self._didModify: # write ending records
1274                pos1 = self.fp.tell()
1275                for zinfo in self.filelist:         # write central directory
1276                    dt = zinfo.date_time
1277                    dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1278                    dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1279                    extra = []
1280                    if zinfo.file_size > ZIP64_LIMIT \
1281                            or zinfo.compress_size > ZIP64_LIMIT:
1282                        extra.append(zinfo.file_size)
1283                        extra.append(zinfo.compress_size)
1284                        file_size = 0xffffffff
1285                        compress_size = 0xffffffff
1286                    else:
1287                        file_size = zinfo.file_size
1288                        compress_size = zinfo.compress_size
1289
1290                    header_offset = zinfo.header_offset - self._start_disk
1291                    if header_offset > ZIP64_LIMIT:
1292                        extra.append(header_offset)
1293                        header_offset = 0xffffffffL
1294
1295                    extra_data = zinfo.extra
1296                    if extra:
1297                        # Append a ZIP64 field to the extra's
1298                        extra_data = struct.pack(
1299                                '<HH' + 'Q'*len(extra),
1300                                1, 8*len(extra), *extra) + extra_data
1301
1302                        extract_version = max(45, zinfo.extract_version)
1303                        create_version = max(45, zinfo.create_version)
1304                    else:
1305                        extract_version = zinfo.extract_version
1306                        create_version = zinfo.create_version
1307
1308                    try:
1309                        filename, flag_bits = zinfo._encodeFilenameFlags()
1310                        centdir = struct.pack(structCentralDir,
1311                        stringCentralDir, create_version,
1312                        zinfo.create_system, extract_version, zinfo.reserved,
1313                        flag_bits, zinfo.compress_type, dostime, dosdate,
1314                        zinfo.CRC, compress_size, file_size,
1315                        len(filename), len(extra_data), len(zinfo.comment),
1316                        0, zinfo.internal_attr, zinfo.external_attr,
1317                        header_offset)
1318                    except DeprecationWarning:
1319                        print >>sys.stderr, (structCentralDir,
1320                        stringCentralDir, create_version,
1321                        zinfo.create_system, extract_version, zinfo.reserved,
1322                        zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1323                        zinfo.CRC, compress_size, file_size,
1324                        len(zinfo.filename), len(extra_data), len(zinfo.comment),
1325                        0, zinfo.internal_attr, zinfo.external_attr,
1326                        header_offset)
1327                        raise
1328                    self.fp.write(centdir)
1329                    self.fp.write(filename)
1330                    self.fp.write(extra_data)
1331                    self.fp.write(zinfo.comment)
1332
1333                pos2 = self.fp.tell()
1334                # Write end-of-zip-archive record
1335                centDirCount = len(self.filelist)
1336                centDirSize = pos2 - pos1
1337                centDirOffset = pos1 - self._start_disk
1338                requires_zip64 = None
1339                if centDirCount > ZIP_FILECOUNT_LIMIT:
1340                    requires_zip64 = "Files count"
1341                elif centDirOffset > ZIP64_LIMIT:
1342                    requires_zip64 = "Central directory offset"
1343                elif centDirSize > ZIP64_LIMIT:
1344                    requires_zip64 = "Central directory size"
1345                if requires_zip64:
1346                    # Need to write the ZIP64 end-of-archive records
1347                    if not self._allowZip64:
1348                        raise LargeZipFile(requires_zip64 +
1349                                           " would require ZIP64 extensions")
1350                    zip64endrec = struct.pack(
1351                            structEndArchive64, stringEndArchive64,
1352                            44, 45, 45, 0, 0, centDirCount, centDirCount,
1353                            centDirSize, centDirOffset)
1354                    self.fp.write(zip64endrec)
1355
1356                    zip64locrec = struct.pack(
1357                            structEndArchive64Locator,
1358                            stringEndArchive64Locator, 0, pos2, 1)
1359                    self.fp.write(zip64locrec)
1360                    centDirCount = min(centDirCount, 0xFFFF)
1361                    centDirSize = min(centDirSize, 0xFFFFFFFF)
1362                    centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1363
1364                endrec = struct.pack(structEndArchive, stringEndArchive,
1365                                    0, 0, centDirCount, centDirCount,
1366                                    centDirSize, centDirOffset, len(self._comment))
1367                self.fp.write(endrec)
1368                self.fp.write(self._comment)
1369                self.fp.flush()
1370        finally:
1371            fp = self.fp
1372            self.fp = None
1373            if not self._filePassed:
1374                fp.close()
1375
1376
1377class PyZipFile(ZipFile):
1378    """Class to create ZIP archives with Python library files and packages."""
1379
1380    def writepy(self, pathname, basename = ""):
1381        """Add all files from "pathname" to the ZIP archive.
1382
1383        If pathname is a package directory, search the directory and
1384        all package subdirectories recursively for all *.py and enter
1385        the modules into the archive.  If pathname is a plain
1386        directory, listdir *.py and enter all modules.  Else, pathname
1387        must be a Python *.py file and the module will be put into the
1388        archive.  Added modules are always module.pyo or module.pyc.
1389        This method will compile the module.py into module.pyc if
1390        necessary.
1391        """
1392        dir, name = os.path.split(pathname)
1393        if os.path.isdir(pathname):
1394            initname = os.path.join(pathname, "__init__.py")
1395            if os.path.isfile(initname):
1396                # This is a package directory, add it
1397                if basename:
1398                    basename = "%s/%s" % (basename, name)
1399                else:
1400                    basename = name
1401                if self.debug:
1402                    print "Adding package in", pathname, "as", basename
1403                fname, arcname = self._get_codename(initname[0:-3], basename)
1404                if self.debug:
1405                    print "Adding", arcname
1406                self.write(fname, arcname)
1407                dirlist = os.listdir(pathname)
1408                dirlist.remove("__init__.py")
1409                # Add all *.py files and package subdirectories
1410                for filename in dirlist:
1411                    path = os.path.join(pathname, filename)
1412                    root, ext = os.path.splitext(filename)
1413                    if os.path.isdir(path):
1414                        if os.path.isfile(os.path.join(path, "__init__.py")):
1415                            # This is a package directory, add it
1416                            self.writepy(path, basename)  # Recursive call
1417                    elif ext == ".py":
1418                        fname, arcname = self._get_codename(path[0:-3],
1419                                         basename)
1420                        if self.debug:
1421                            print "Adding", arcname
1422                        self.write(fname, arcname)
1423            else:
1424                # This is NOT a package directory, add its files at top level
1425                if self.debug:
1426                    print "Adding files from directory", pathname
1427                for filename in os.listdir(pathname):
1428                    path = os.path.join(pathname, filename)
1429                    root, ext = os.path.splitext(filename)
1430                    if ext == ".py":
1431                        fname, arcname = self._get_codename(path[0:-3],
1432                                         basename)
1433                        if self.debug:
1434                            print "Adding", arcname
1435                        self.write(fname, arcname)
1436        else:
1437            if pathname[-3:] != ".py":
1438                raise RuntimeError, \
1439                      'Files added with writepy() must end with ".py"'
1440            fname, arcname = self._get_codename(pathname[0:-3], basename)
1441            if self.debug:
1442                print "Adding file", arcname
1443            self.write(fname, arcname)
1444
1445    def _get_codename(self, pathname, basename):
1446        """Return (filename, archivename) for the path.
1447
1448        Given a module name path, return the correct file path and
1449        archive name, compiling if necessary.  For example, given
1450        /python/lib/string, return (/python/lib/string.pyc, string).
1451        """
1452        file_py  = pathname + ".py"
1453        file_pyc = pathname + ".pyc"
1454        file_pyo = pathname + ".pyo"
1455        if os.path.isfile(file_pyo) and \
1456                            os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1457            fname = file_pyo    # Use .pyo file
1458        elif not os.path.isfile(file_pyc) or \
1459             os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1460            import py_compile
1461            if self.debug:
1462                print "Compiling", file_py
1463            try:
1464                py_compile.compile(file_py, file_pyc, None, True)
1465            except py_compile.PyCompileError,err:
1466                print err.msg
1467            fname = file_pyc
1468        else:
1469            fname = file_pyc
1470        archivename = os.path.split(fname)[1]
1471        if basename:
1472            archivename = "%s/%s" % (basename, archivename)
1473        return (fname, archivename)
1474
1475
1476def main(args = None):
1477    import textwrap
1478    USAGE=textwrap.dedent("""\
1479        Usage:
1480            zipfile.py -l zipfile.zip        # Show listing of a zipfile
1481            zipfile.py -t zipfile.zip        # Test if a zipfile is valid
1482            zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1483            zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1484        """)
1485    if args is None:
1486        args = sys.argv[1:]
1487
1488    if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1489        print USAGE
1490        sys.exit(1)
1491
1492    if args[0] == '-l':
1493        if len(args) != 2:
1494            print USAGE
1495            sys.exit(1)
1496        with ZipFile(args[1], 'r') as zf:
1497            zf.printdir()
1498
1499    elif args[0] == '-t':
1500        if len(args) != 2:
1501            print USAGE
1502            sys.exit(1)
1503        with ZipFile(args[1], 'r') as zf:
1504            badfile = zf.testzip()
1505        if badfile:
1506            print("The following enclosed file is corrupted: {!r}".format(badfile))
1507        print "Done testing"
1508
1509    elif args[0] == '-e':
1510        if len(args) != 3:
1511            print USAGE
1512            sys.exit(1)
1513
1514        with ZipFile(args[1], 'r') as zf:
1515            zf.extractall(args[2])
1516
1517    elif args[0] == '-c':
1518        if len(args) < 3:
1519            print USAGE
1520            sys.exit(1)
1521
1522        def addToZip(zf, path, zippath):
1523            if os.path.isfile(path):
1524                zf.write(path, zippath, ZIP_DEFLATED)
1525            elif os.path.isdir(path):
1526                if zippath:
1527                    zf.write(path, zippath)
1528                for nm in os.listdir(path):
1529                    addToZip(zf,
1530                            os.path.join(path, nm), os.path.join(zippath, nm))
1531            # else: ignore
1532
1533        with ZipFile(args[1], 'w', allowZip64=True) as zf:
1534            for path in args[2:]:
1535                zippath = os.path.basename(path)
1536                if not zippath:
1537                    zippath = os.path.basename(os.path.dirname(path))
1538                if zippath in ('', os.curdir, os.pardir):
1539                    zippath = ''
1540                addToZip(zf, path, zippath)
1541
1542if __name__ == "__main__":
1543    main()
1544