1# -*- coding: iso-8859-1 -*- 2#------------------------------------------------------------------- 3# tarfile.py 4#------------------------------------------------------------------- 5# Copyright (C) 2002 Lars Gust�bel <lars@gustaebel.de> 6# All rights reserved. 7# 8# Permission is hereby granted, free of charge, to any person 9# obtaining a copy of this software and associated documentation 10# files (the "Software"), to deal in the Software without 11# restriction, including without limitation the rights to use, 12# copy, modify, merge, publish, distribute, sublicense, and/or sell 13# copies of the Software, and to permit persons to whom the 14# Software is furnished to do so, subject to the following 15# conditions: 16# 17# The above copyright notice and this permission notice shall be 18# included in all copies or substantial portions of the Software. 19# 20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 27# OTHER DEALINGS IN THE SOFTWARE. 28# 29"""Read from and write to tar format archives. 30""" 31 32__version__ = "$Revision: 85213 $" 33# $Source$ 34 35version = "0.9.0" 36__author__ = "Lars Gust�bel (lars@gustaebel.de)" 37__date__ = "$Date$" 38__cvsid__ = "$Id$" 39__credits__ = "Gustavo Niemeyer, Niels Gust�bel, Richard Townsend." 40 41#--------- 42# Imports 43#--------- 44from __builtin__ import open as bltn_open 45import sys 46import os 47import shutil 48import stat 49import errno 50import time 51import struct 52import copy 53import re 54import operator 55 56try: 57 import grp, pwd 58except ImportError: 59 grp = pwd = None 60 61# from tarfile import * 62__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] 63 64#--------------------------------------------------------- 65# tar constants 66#--------------------------------------------------------- 67NUL = "\0" # the null character 68BLOCKSIZE = 512 # length of processing blocks 69RECORDSIZE = BLOCKSIZE * 20 # length of records 70GNU_MAGIC = "ustar \0" # magic gnu tar string 71POSIX_MAGIC = "ustar\x0000" # magic posix tar string 72 73LENGTH_NAME = 100 # maximum length of a filename 74LENGTH_LINK = 100 # maximum length of a linkname 75LENGTH_PREFIX = 155 # maximum length of the prefix field 76 77REGTYPE = "0" # regular file 78AREGTYPE = "\0" # regular file 79LNKTYPE = "1" # link (inside tarfile) 80SYMTYPE = "2" # symbolic link 81CHRTYPE = "3" # character special device 82BLKTYPE = "4" # block special device 83DIRTYPE = "5" # directory 84FIFOTYPE = "6" # fifo special device 85CONTTYPE = "7" # contiguous file 86 87GNUTYPE_LONGNAME = "L" # GNU tar longname 88GNUTYPE_LONGLINK = "K" # GNU tar longlink 89GNUTYPE_SPARSE = "S" # GNU tar sparse file 90 91XHDTYPE = "x" # POSIX.1-2001 extended header 92XGLTYPE = "g" # POSIX.1-2001 global header 93SOLARIS_XHDTYPE = "X" # Solaris extended header 94 95USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format 96GNU_FORMAT = 1 # GNU tar format 97PAX_FORMAT = 2 # POSIX.1-2001 (pax) format 98DEFAULT_FORMAT = GNU_FORMAT 99 100#--------------------------------------------------------- 101# tarfile constants 102#--------------------------------------------------------- 103# File types that tarfile supports: 104SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, 105 SYMTYPE, DIRTYPE, FIFOTYPE, 106 CONTTYPE, CHRTYPE, BLKTYPE, 107 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 108 GNUTYPE_SPARSE) 109 110# File types that will be treated as a regular file. 111REGULAR_TYPES = (REGTYPE, AREGTYPE, 112 CONTTYPE, GNUTYPE_SPARSE) 113 114# File types that are part of the GNU tar format. 115GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 116 GNUTYPE_SPARSE) 117 118# Fields from a pax header that override a TarInfo attribute. 119PAX_FIELDS = ("path", "linkpath", "size", "mtime", 120 "uid", "gid", "uname", "gname") 121 122# Fields in a pax header that are numbers, all other fields 123# are treated as strings. 124PAX_NUMBER_FIELDS = { 125 "atime": float, 126 "ctime": float, 127 "mtime": float, 128 "uid": int, 129 "gid": int, 130 "size": int 131} 132 133#--------------------------------------------------------- 134# Bits used in the mode field, values in octal. 135#--------------------------------------------------------- 136S_IFLNK = 0120000 # symbolic link 137S_IFREG = 0100000 # regular file 138S_IFBLK = 0060000 # block device 139S_IFDIR = 0040000 # directory 140S_IFCHR = 0020000 # character device 141S_IFIFO = 0010000 # fifo 142 143TSUID = 04000 # set UID on execution 144TSGID = 02000 # set GID on execution 145TSVTX = 01000 # reserved 146 147TUREAD = 0400 # read by owner 148TUWRITE = 0200 # write by owner 149TUEXEC = 0100 # execute/search by owner 150TGREAD = 0040 # read by group 151TGWRITE = 0020 # write by group 152TGEXEC = 0010 # execute/search by group 153TOREAD = 0004 # read by other 154TOWRITE = 0002 # write by other 155TOEXEC = 0001 # execute/search by other 156 157#--------------------------------------------------------- 158# initialization 159#--------------------------------------------------------- 160ENCODING = sys.getfilesystemencoding() 161if ENCODING is None: 162 ENCODING = sys.getdefaultencoding() 163 164#--------------------------------------------------------- 165# Some useful functions 166#--------------------------------------------------------- 167 168def stn(s, length): 169 """Convert a python string to a null-terminated string buffer. 170 """ 171 return s[:length] + (length - len(s)) * NUL 172 173def nts(s): 174 """Convert a null-terminated string field to a python string. 175 """ 176 # Use the string up to the first null char. 177 p = s.find("\0") 178 if p == -1: 179 return s 180 return s[:p] 181 182def nti(s): 183 """Convert a number field to a python number. 184 """ 185 # There are two possible encodings for a number field, see 186 # itn() below. 187 if s[0] != chr(0200): 188 try: 189 n = int(nts(s).strip() or "0", 8) 190 except ValueError: 191 raise InvalidHeaderError("invalid header") 192 else: 193 n = 0L 194 for i in xrange(len(s) - 1): 195 n <<= 8 196 n += ord(s[i + 1]) 197 return n 198 199def itn(n, digits=8, format=DEFAULT_FORMAT): 200 """Convert a python number to a number field. 201 """ 202 # POSIX 1003.1-1988 requires numbers to be encoded as a string of 203 # octal digits followed by a null-byte, this allows values up to 204 # (8**(digits-1))-1. GNU tar allows storing numbers greater than 205 # that if necessary. A leading 0200 byte indicates this particular 206 # encoding, the following digits-1 bytes are a big-endian 207 # representation. This allows values up to (256**(digits-1))-1. 208 if 0 <= n < 8 ** (digits - 1): 209 s = "%0*o" % (digits - 1, n) + NUL 210 else: 211 if format != GNU_FORMAT or n >= 256 ** (digits - 1): 212 raise ValueError("overflow in number field") 213 214 if n < 0: 215 # XXX We mimic GNU tar's behaviour with negative numbers, 216 # this could raise OverflowError. 217 n = struct.unpack("L", struct.pack("l", n))[0] 218 219 s = "" 220 for i in xrange(digits - 1): 221 s = chr(n & 0377) + s 222 n >>= 8 223 s = chr(0200) + s 224 return s 225 226def uts(s, encoding, errors): 227 """Convert a unicode object to a string. 228 """ 229 if errors == "utf-8": 230 # An extra error handler similar to the -o invalid=UTF-8 option 231 # in POSIX.1-2001. Replace untranslatable characters with their 232 # UTF-8 representation. 233 try: 234 return s.encode(encoding, "strict") 235 except UnicodeEncodeError: 236 x = [] 237 for c in s: 238 try: 239 x.append(c.encode(encoding, "strict")) 240 except UnicodeEncodeError: 241 x.append(c.encode("utf8")) 242 return "".join(x) 243 else: 244 return s.encode(encoding, errors) 245 246def calc_chksums(buf): 247 """Calculate the checksum for a member's header by summing up all 248 characters except for the chksum field which is treated as if 249 it was filled with spaces. According to the GNU tar sources, 250 some tars (Sun and NeXT) calculate chksum with signed char, 251 which will be different if there are chars in the buffer with 252 the high bit set. So we calculate two checksums, unsigned and 253 signed. 254 """ 255 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512])) 256 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512])) 257 return unsigned_chksum, signed_chksum 258 259def copyfileobj(src, dst, length=None): 260 """Copy length bytes from fileobj src to fileobj dst. 261 If length is None, copy the entire content. 262 """ 263 if length == 0: 264 return 265 if length is None: 266 shutil.copyfileobj(src, dst) 267 return 268 269 BUFSIZE = 16 * 1024 270 blocks, remainder = divmod(length, BUFSIZE) 271 for b in xrange(blocks): 272 buf = src.read(BUFSIZE) 273 if len(buf) < BUFSIZE: 274 raise IOError("end of file reached") 275 dst.write(buf) 276 277 if remainder != 0: 278 buf = src.read(remainder) 279 if len(buf) < remainder: 280 raise IOError("end of file reached") 281 dst.write(buf) 282 return 283 284filemode_table = ( 285 ((S_IFLNK, "l"), 286 (S_IFREG, "-"), 287 (S_IFBLK, "b"), 288 (S_IFDIR, "d"), 289 (S_IFCHR, "c"), 290 (S_IFIFO, "p")), 291 292 ((TUREAD, "r"),), 293 ((TUWRITE, "w"),), 294 ((TUEXEC|TSUID, "s"), 295 (TSUID, "S"), 296 (TUEXEC, "x")), 297 298 ((TGREAD, "r"),), 299 ((TGWRITE, "w"),), 300 ((TGEXEC|TSGID, "s"), 301 (TSGID, "S"), 302 (TGEXEC, "x")), 303 304 ((TOREAD, "r"),), 305 ((TOWRITE, "w"),), 306 ((TOEXEC|TSVTX, "t"), 307 (TSVTX, "T"), 308 (TOEXEC, "x")) 309) 310 311def filemode(mode): 312 """Convert a file's mode to a string of the form 313 -rwxrwxrwx. 314 Used by TarFile.list() 315 """ 316 perm = [] 317 for table in filemode_table: 318 for bit, char in table: 319 if mode & bit == bit: 320 perm.append(char) 321 break 322 else: 323 perm.append("-") 324 return "".join(perm) 325 326class TarError(Exception): 327 """Base exception.""" 328 pass 329class ExtractError(TarError): 330 """General exception for extract errors.""" 331 pass 332class ReadError(TarError): 333 """Exception for unreadable tar archives.""" 334 pass 335class CompressionError(TarError): 336 """Exception for unavailable compression methods.""" 337 pass 338class StreamError(TarError): 339 """Exception for unsupported operations on stream-like TarFiles.""" 340 pass 341class HeaderError(TarError): 342 """Base exception for header errors.""" 343 pass 344class EmptyHeaderError(HeaderError): 345 """Exception for empty headers.""" 346 pass 347class TruncatedHeaderError(HeaderError): 348 """Exception for truncated headers.""" 349 pass 350class EOFHeaderError(HeaderError): 351 """Exception for end of file headers.""" 352 pass 353class InvalidHeaderError(HeaderError): 354 """Exception for invalid headers.""" 355 pass 356class SubsequentHeaderError(HeaderError): 357 """Exception for missing and invalid extended headers.""" 358 pass 359 360#--------------------------- 361# internal stream interface 362#--------------------------- 363class _LowLevelFile: 364 """Low-level file object. Supports reading and writing. 365 It is used instead of a regular file object for streaming 366 access. 367 """ 368 369 def __init__(self, name, mode): 370 mode = { 371 "r": os.O_RDONLY, 372 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 373 }[mode] 374 if hasattr(os, "O_BINARY"): 375 mode |= os.O_BINARY 376 self.fd = os.open(name, mode, 0666) 377 378 def close(self): 379 os.close(self.fd) 380 381 def read(self, size): 382 return os.read(self.fd, size) 383 384 def write(self, s): 385 os.write(self.fd, s) 386 387class _Stream: 388 """Class that serves as an adapter between TarFile and 389 a stream-like object. The stream-like object only 390 needs to have a read() or write() method and is accessed 391 blockwise. Use of gzip or bzip2 compression is possible. 392 A stream-like object could be for example: sys.stdin, 393 sys.stdout, a socket, a tape device etc. 394 395 _Stream is intended to be used only internally. 396 """ 397 398 def __init__(self, name, mode, comptype, fileobj, bufsize): 399 """Construct a _Stream object. 400 """ 401 self._extfileobj = True 402 if fileobj is None: 403 fileobj = _LowLevelFile(name, mode) 404 self._extfileobj = False 405 406 if comptype == '*': 407 # Enable transparent compression detection for the 408 # stream interface 409 fileobj = _StreamProxy(fileobj) 410 comptype = fileobj.getcomptype() 411 412 self.name = name or "" 413 self.mode = mode 414 self.comptype = comptype 415 self.fileobj = fileobj 416 self.bufsize = bufsize 417 self.buf = "" 418 self.pos = 0L 419 self.closed = False 420 421 try: 422 if comptype == "gz": 423 try: 424 import zlib 425 except ImportError: 426 raise CompressionError("zlib module is not available") 427 self.zlib = zlib 428 self.crc = zlib.crc32("") & 0xffffffffL 429 if mode == "r": 430 self._init_read_gz() 431 else: 432 self._init_write_gz() 433 434 elif comptype == "bz2": 435 try: 436 import bz2 437 except ImportError: 438 raise CompressionError("bz2 module is not available") 439 if mode == "r": 440 self.dbuf = "" 441 self.cmp = bz2.BZ2Decompressor() 442 else: 443 self.cmp = bz2.BZ2Compressor() 444 except: 445 if not self._extfileobj: 446 self.fileobj.close() 447 self.closed = True 448 raise 449 450 def __del__(self): 451 if hasattr(self, "closed") and not self.closed: 452 self.close() 453 454 def _init_write_gz(self): 455 """Initialize for writing with gzip compression. 456 """ 457 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, 458 -self.zlib.MAX_WBITS, 459 self.zlib.DEF_MEM_LEVEL, 460 0) 461 timestamp = struct.pack("<L", long(time.time())) 462 self.__write("\037\213\010\010%s\002\377" % timestamp) 463 if type(self.name) is unicode: 464 self.name = self.name.encode("iso-8859-1", "replace") 465 if self.name.endswith(".gz"): 466 self.name = self.name[:-3] 467 self.__write(self.name + NUL) 468 469 def write(self, s): 470 """Write string s to the stream. 471 """ 472 if self.comptype == "gz": 473 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL 474 self.pos += len(s) 475 if self.comptype != "tar": 476 s = self.cmp.compress(s) 477 self.__write(s) 478 479 def __write(self, s): 480 """Write string s to the stream if a whole new block 481 is ready to be written. 482 """ 483 self.buf += s 484 while len(self.buf) > self.bufsize: 485 self.fileobj.write(self.buf[:self.bufsize]) 486 self.buf = self.buf[self.bufsize:] 487 488 def close(self): 489 """Close the _Stream object. No operation should be 490 done on it afterwards. 491 """ 492 if self.closed: 493 return 494 495 self.closed = True 496 try: 497 if self.mode == "w" and self.comptype != "tar": 498 self.buf += self.cmp.flush() 499 500 if self.mode == "w" and self.buf: 501 self.fileobj.write(self.buf) 502 self.buf = "" 503 if self.comptype == "gz": 504 # The native zlib crc is an unsigned 32-bit integer, but 505 # the Python wrapper implicitly casts that to a signed C 506 # long. So, on a 32-bit box self.crc may "look negative", 507 # while the same crc on a 64-bit box may "look positive". 508 # To avoid irksome warnings from the `struct` module, force 509 # it to look positive on all boxes. 510 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL)) 511 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL)) 512 finally: 513 if not self._extfileobj: 514 self.fileobj.close() 515 516 def _init_read_gz(self): 517 """Initialize for reading a gzip compressed fileobj. 518 """ 519 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS) 520 self.dbuf = "" 521 522 # taken from gzip.GzipFile with some alterations 523 if self.__read(2) != "\037\213": 524 raise ReadError("not a gzip file") 525 if self.__read(1) != "\010": 526 raise CompressionError("unsupported compression method") 527 528 flag = ord(self.__read(1)) 529 self.__read(6) 530 531 if flag & 4: 532 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1)) 533 self.read(xlen) 534 if flag & 8: 535 while True: 536 s = self.__read(1) 537 if not s or s == NUL: 538 break 539 if flag & 16: 540 while True: 541 s = self.__read(1) 542 if not s or s == NUL: 543 break 544 if flag & 2: 545 self.__read(2) 546 547 def tell(self): 548 """Return the stream's file pointer position. 549 """ 550 return self.pos 551 552 def seek(self, pos=0): 553 """Set the stream's file pointer to pos. Negative seeking 554 is forbidden. 555 """ 556 if pos - self.pos >= 0: 557 blocks, remainder = divmod(pos - self.pos, self.bufsize) 558 for i in xrange(blocks): 559 self.read(self.bufsize) 560 self.read(remainder) 561 else: 562 raise StreamError("seeking backwards is not allowed") 563 return self.pos 564 565 def read(self, size=None): 566 """Return the next size number of bytes from the stream. 567 If size is not defined, return all bytes of the stream 568 up to EOF. 569 """ 570 if size is None: 571 t = [] 572 while True: 573 buf = self._read(self.bufsize) 574 if not buf: 575 break 576 t.append(buf) 577 buf = "".join(t) 578 else: 579 buf = self._read(size) 580 self.pos += len(buf) 581 return buf 582 583 def _read(self, size): 584 """Return size bytes from the stream. 585 """ 586 if self.comptype == "tar": 587 return self.__read(size) 588 589 c = len(self.dbuf) 590 t = [self.dbuf] 591 while c < size: 592 buf = self.__read(self.bufsize) 593 if not buf: 594 break 595 try: 596 buf = self.cmp.decompress(buf) 597 except IOError: 598 raise ReadError("invalid compressed data") 599 t.append(buf) 600 c += len(buf) 601 t = "".join(t) 602 self.dbuf = t[size:] 603 return t[:size] 604 605 def __read(self, size): 606 """Return size bytes from stream. If internal buffer is empty, 607 read another block from the stream. 608 """ 609 c = len(self.buf) 610 t = [self.buf] 611 while c < size: 612 buf = self.fileobj.read(self.bufsize) 613 if not buf: 614 break 615 t.append(buf) 616 c += len(buf) 617 t = "".join(t) 618 self.buf = t[size:] 619 return t[:size] 620# class _Stream 621 622class _StreamProxy(object): 623 """Small proxy class that enables transparent compression 624 detection for the Stream interface (mode 'r|*'). 625 """ 626 627 def __init__(self, fileobj): 628 self.fileobj = fileobj 629 self.buf = self.fileobj.read(BLOCKSIZE) 630 631 def read(self, size): 632 self.read = self.fileobj.read 633 return self.buf 634 635 def getcomptype(self): 636 if self.buf.startswith("\037\213\010"): 637 return "gz" 638 if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY": 639 return "bz2" 640 return "tar" 641 642 def close(self): 643 self.fileobj.close() 644# class StreamProxy 645 646class _BZ2Proxy(object): 647 """Small proxy class that enables external file object 648 support for "r:bz2" and "w:bz2" modes. This is actually 649 a workaround for a limitation in bz2 module's BZ2File 650 class which (unlike gzip.GzipFile) has no support for 651 a file object argument. 652 """ 653 654 blocksize = 16 * 1024 655 656 def __init__(self, fileobj, mode): 657 self.fileobj = fileobj 658 self.mode = mode 659 self.name = getattr(self.fileobj, "name", None) 660 self.init() 661 662 def init(self): 663 import bz2 664 self.pos = 0 665 if self.mode == "r": 666 self.bz2obj = bz2.BZ2Decompressor() 667 self.fileobj.seek(0) 668 self.buf = "" 669 else: 670 self.bz2obj = bz2.BZ2Compressor() 671 672 def read(self, size): 673 b = [self.buf] 674 x = len(self.buf) 675 while x < size: 676 raw = self.fileobj.read(self.blocksize) 677 if not raw: 678 break 679 data = self.bz2obj.decompress(raw) 680 b.append(data) 681 x += len(data) 682 self.buf = "".join(b) 683 684 buf = self.buf[:size] 685 self.buf = self.buf[size:] 686 self.pos += len(buf) 687 return buf 688 689 def seek(self, pos): 690 if pos < self.pos: 691 self.init() 692 self.read(pos - self.pos) 693 694 def tell(self): 695 return self.pos 696 697 def write(self, data): 698 self.pos += len(data) 699 raw = self.bz2obj.compress(data) 700 self.fileobj.write(raw) 701 702 def close(self): 703 if self.mode == "w": 704 raw = self.bz2obj.flush() 705 self.fileobj.write(raw) 706# class _BZ2Proxy 707 708#------------------------ 709# Extraction file object 710#------------------------ 711class _FileInFile(object): 712 """A thin wrapper around an existing file object that 713 provides a part of its data as an individual file 714 object. 715 """ 716 717 def __init__(self, fileobj, offset, size, sparse=None): 718 self.fileobj = fileobj 719 self.offset = offset 720 self.size = size 721 self.sparse = sparse 722 self.position = 0 723 724 def tell(self): 725 """Return the current file position. 726 """ 727 return self.position 728 729 def seek(self, position): 730 """Seek to a position in the file. 731 """ 732 self.position = position 733 734 def read(self, size=None): 735 """Read data from the file. 736 """ 737 if size is None: 738 size = self.size - self.position 739 else: 740 size = min(size, self.size - self.position) 741 742 if self.sparse is None: 743 return self.readnormal(size) 744 else: 745 return self.readsparse(size) 746 747 def __read(self, size): 748 buf = self.fileobj.read(size) 749 if len(buf) != size: 750 raise ReadError("unexpected end of data") 751 return buf 752 753 def readnormal(self, size): 754 """Read operation for regular files. 755 """ 756 self.fileobj.seek(self.offset + self.position) 757 self.position += size 758 return self.__read(size) 759 760 def readsparse(self, size): 761 """Read operation for sparse files. 762 """ 763 data = [] 764 while size > 0: 765 buf = self.readsparsesection(size) 766 if not buf: 767 break 768 size -= len(buf) 769 data.append(buf) 770 return "".join(data) 771 772 def readsparsesection(self, size): 773 """Read a single section of a sparse file. 774 """ 775 section = self.sparse.find(self.position) 776 777 if section is None: 778 return "" 779 780 size = min(size, section.offset + section.size - self.position) 781 782 if isinstance(section, _data): 783 realpos = section.realpos + self.position - section.offset 784 self.fileobj.seek(self.offset + realpos) 785 self.position += size 786 return self.__read(size) 787 else: 788 self.position += size 789 return NUL * size 790#class _FileInFile 791 792 793class ExFileObject(object): 794 """File-like object for reading an archive member. 795 Is returned by TarFile.extractfile(). 796 """ 797 blocksize = 1024 798 799 def __init__(self, tarfile, tarinfo): 800 self.fileobj = _FileInFile(tarfile.fileobj, 801 tarinfo.offset_data, 802 tarinfo.size, 803 getattr(tarinfo, "sparse", None)) 804 self.name = tarinfo.name 805 self.mode = "r" 806 self.closed = False 807 self.size = tarinfo.size 808 809 self.position = 0 810 self.buffer = "" 811 812 def read(self, size=None): 813 """Read at most size bytes from the file. If size is not 814 present or None, read all data until EOF is reached. 815 """ 816 if self.closed: 817 raise ValueError("I/O operation on closed file") 818 819 buf = "" 820 if self.buffer: 821 if size is None: 822 buf = self.buffer 823 self.buffer = "" 824 else: 825 buf = self.buffer[:size] 826 self.buffer = self.buffer[size:] 827 828 if size is None: 829 buf += self.fileobj.read() 830 else: 831 buf += self.fileobj.read(size - len(buf)) 832 833 self.position += len(buf) 834 return buf 835 836 def readline(self, size=-1): 837 """Read one entire line from the file. If size is present 838 and non-negative, return a string with at most that 839 size, which may be an incomplete line. 840 """ 841 if self.closed: 842 raise ValueError("I/O operation on closed file") 843 844 if "\n" in self.buffer: 845 pos = self.buffer.find("\n") + 1 846 else: 847 buffers = [self.buffer] 848 while True: 849 buf = self.fileobj.read(self.blocksize) 850 buffers.append(buf) 851 if not buf or "\n" in buf: 852 self.buffer = "".join(buffers) 853 pos = self.buffer.find("\n") + 1 854 if pos == 0: 855 # no newline found. 856 pos = len(self.buffer) 857 break 858 859 if size != -1: 860 pos = min(size, pos) 861 862 buf = self.buffer[:pos] 863 self.buffer = self.buffer[pos:] 864 self.position += len(buf) 865 return buf 866 867 def readlines(self): 868 """Return a list with all remaining lines. 869 """ 870 result = [] 871 while True: 872 line = self.readline() 873 if not line: break 874 result.append(line) 875 return result 876 877 def tell(self): 878 """Return the current file position. 879 """ 880 if self.closed: 881 raise ValueError("I/O operation on closed file") 882 883 return self.position 884 885 def seek(self, pos, whence=os.SEEK_SET): 886 """Seek to a position in the file. 887 """ 888 if self.closed: 889 raise ValueError("I/O operation on closed file") 890 891 if whence == os.SEEK_SET: 892 self.position = min(max(pos, 0), self.size) 893 elif whence == os.SEEK_CUR: 894 if pos < 0: 895 self.position = max(self.position + pos, 0) 896 else: 897 self.position = min(self.position + pos, self.size) 898 elif whence == os.SEEK_END: 899 self.position = max(min(self.size + pos, self.size), 0) 900 else: 901 raise ValueError("Invalid argument") 902 903 self.buffer = "" 904 self.fileobj.seek(self.position) 905 906 def close(self): 907 """Close the file object. 908 """ 909 self.closed = True 910 911 def __iter__(self): 912 """Get an iterator over the file's lines. 913 """ 914 while True: 915 line = self.readline() 916 if not line: 917 break 918 yield line 919#class ExFileObject 920 921#------------------ 922# Exported Classes 923#------------------ 924class TarInfo(object): 925 """Informational class which holds the details about an 926 archive member given by a tar header block. 927 TarInfo objects are returned by TarFile.getmember(), 928 TarFile.getmembers() and TarFile.gettarinfo() and are 929 usually created internally. 930 """ 931 932 def __init__(self, name=""): 933 """Construct a TarInfo object. name is the optional name 934 of the member. 935 """ 936 self.name = name # member name 937 self.mode = 0644 # file permissions 938 self.uid = 0 # user id 939 self.gid = 0 # group id 940 self.size = 0 # file size 941 self.mtime = 0 # modification time 942 self.chksum = 0 # header checksum 943 self.type = REGTYPE # member type 944 self.linkname = "" # link name 945 self.uname = "" # user name 946 self.gname = "" # group name 947 self.devmajor = 0 # device major number 948 self.devminor = 0 # device minor number 949 950 self.offset = 0 # the tar header starts here 951 self.offset_data = 0 # the file's data starts here 952 953 self.pax_headers = {} # pax header information 954 955 # In pax headers the "name" and "linkname" field are called 956 # "path" and "linkpath". 957 def _getpath(self): 958 return self.name 959 def _setpath(self, name): 960 self.name = name 961 path = property(_getpath, _setpath) 962 963 def _getlinkpath(self): 964 return self.linkname 965 def _setlinkpath(self, linkname): 966 self.linkname = linkname 967 linkpath = property(_getlinkpath, _setlinkpath) 968 969 def __repr__(self): 970 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) 971 972 def get_info(self, encoding, errors): 973 """Return the TarInfo's attributes as a dictionary. 974 """ 975 info = { 976 "name": self.name, 977 "mode": self.mode & 07777, 978 "uid": self.uid, 979 "gid": self.gid, 980 "size": self.size, 981 "mtime": self.mtime, 982 "chksum": self.chksum, 983 "type": self.type, 984 "linkname": self.linkname, 985 "uname": self.uname, 986 "gname": self.gname, 987 "devmajor": self.devmajor, 988 "devminor": self.devminor 989 } 990 991 if info["type"] == DIRTYPE and not info["name"].endswith("/"): 992 info["name"] += "/" 993 994 for key in ("name", "linkname", "uname", "gname"): 995 if type(info[key]) is unicode: 996 info[key] = info[key].encode(encoding, errors) 997 998 return info 999 1000 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"): 1001 """Return a tar header as a string of 512 byte blocks. 1002 """ 1003 info = self.get_info(encoding, errors) 1004 1005 if format == USTAR_FORMAT: 1006 return self.create_ustar_header(info) 1007 elif format == GNU_FORMAT: 1008 return self.create_gnu_header(info) 1009 elif format == PAX_FORMAT: 1010 return self.create_pax_header(info, encoding, errors) 1011 else: 1012 raise ValueError("invalid format") 1013 1014 def create_ustar_header(self, info): 1015 """Return the object as a ustar header block. 1016 """ 1017 info["magic"] = POSIX_MAGIC 1018 1019 if len(info["linkname"]) > LENGTH_LINK: 1020 raise ValueError("linkname is too long") 1021 1022 if len(info["name"]) > LENGTH_NAME: 1023 info["prefix"], info["name"] = self._posix_split_name(info["name"]) 1024 1025 return self._create_header(info, USTAR_FORMAT) 1026 1027 def create_gnu_header(self, info): 1028 """Return the object as a GNU header block sequence. 1029 """ 1030 info["magic"] = GNU_MAGIC 1031 1032 buf = "" 1033 if len(info["linkname"]) > LENGTH_LINK: 1034 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK) 1035 1036 if len(info["name"]) > LENGTH_NAME: 1037 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME) 1038 1039 return buf + self._create_header(info, GNU_FORMAT) 1040 1041 def create_pax_header(self, info, encoding, errors): 1042 """Return the object as a ustar header block. If it cannot be 1043 represented this way, prepend a pax extended header sequence 1044 with supplement information. 1045 """ 1046 info["magic"] = POSIX_MAGIC 1047 pax_headers = self.pax_headers.copy() 1048 1049 # Test string fields for values that exceed the field length or cannot 1050 # be represented in ASCII encoding. 1051 for name, hname, length in ( 1052 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), 1053 ("uname", "uname", 32), ("gname", "gname", 32)): 1054 1055 if hname in pax_headers: 1056 # The pax header has priority. 1057 continue 1058 1059 val = info[name].decode(encoding, errors) 1060 1061 # Try to encode the string as ASCII. 1062 try: 1063 val.encode("ascii") 1064 except UnicodeEncodeError: 1065 pax_headers[hname] = val 1066 continue 1067 1068 if len(info[name]) > length: 1069 pax_headers[hname] = val 1070 1071 # Test number fields for values that exceed the field limit or values 1072 # that like to be stored as float. 1073 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): 1074 if name in pax_headers: 1075 # The pax header has priority. Avoid overflow. 1076 info[name] = 0 1077 continue 1078 1079 val = info[name] 1080 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): 1081 pax_headers[name] = unicode(val) 1082 info[name] = 0 1083 1084 # Create a pax extended header if necessary. 1085 if pax_headers: 1086 buf = self._create_pax_generic_header(pax_headers) 1087 else: 1088 buf = "" 1089 1090 return buf + self._create_header(info, USTAR_FORMAT) 1091 1092 @classmethod 1093 def create_pax_global_header(cls, pax_headers): 1094 """Return the object as a pax global header block sequence. 1095 """ 1096 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE) 1097 1098 def _posix_split_name(self, name): 1099 """Split a name longer than 100 chars into a prefix 1100 and a name part. 1101 """ 1102 prefix = name[:LENGTH_PREFIX + 1] 1103 while prefix and prefix[-1] != "/": 1104 prefix = prefix[:-1] 1105 1106 name = name[len(prefix):] 1107 prefix = prefix[:-1] 1108 1109 if not prefix or len(name) > LENGTH_NAME: 1110 raise ValueError("name is too long") 1111 return prefix, name 1112 1113 @staticmethod 1114 def _create_header(info, format): 1115 """Return a header block. info is a dictionary with file 1116 information, format must be one of the *_FORMAT constants. 1117 """ 1118 parts = [ 1119 stn(info.get("name", ""), 100), 1120 itn(info.get("mode", 0) & 07777, 8, format), 1121 itn(info.get("uid", 0), 8, format), 1122 itn(info.get("gid", 0), 8, format), 1123 itn(info.get("size", 0), 12, format), 1124 itn(info.get("mtime", 0), 12, format), 1125 " ", # checksum field 1126 info.get("type", REGTYPE), 1127 stn(info.get("linkname", ""), 100), 1128 stn(info.get("magic", POSIX_MAGIC), 8), 1129 stn(info.get("uname", ""), 32), 1130 stn(info.get("gname", ""), 32), 1131 itn(info.get("devmajor", 0), 8, format), 1132 itn(info.get("devminor", 0), 8, format), 1133 stn(info.get("prefix", ""), 155) 1134 ] 1135 1136 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts)) 1137 chksum = calc_chksums(buf[-BLOCKSIZE:])[0] 1138 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:] 1139 return buf 1140 1141 @staticmethod 1142 def _create_payload(payload): 1143 """Return the string payload filled with zero bytes 1144 up to the next 512 byte border. 1145 """ 1146 blocks, remainder = divmod(len(payload), BLOCKSIZE) 1147 if remainder > 0: 1148 payload += (BLOCKSIZE - remainder) * NUL 1149 return payload 1150 1151 @classmethod 1152 def _create_gnu_long_header(cls, name, type): 1153 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence 1154 for name. 1155 """ 1156 name += NUL 1157 1158 info = {} 1159 info["name"] = "././@LongLink" 1160 info["type"] = type 1161 info["size"] = len(name) 1162 info["magic"] = GNU_MAGIC 1163 1164 # create extended header + name blocks. 1165 return cls._create_header(info, USTAR_FORMAT) + \ 1166 cls._create_payload(name) 1167 1168 @classmethod 1169 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE): 1170 """Return a POSIX.1-2001 extended or global header sequence 1171 that contains a list of keyword, value pairs. The values 1172 must be unicode objects. 1173 """ 1174 records = [] 1175 for keyword, value in pax_headers.iteritems(): 1176 keyword = keyword.encode("utf8") 1177 value = value.encode("utf8") 1178 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' 1179 n = p = 0 1180 while True: 1181 n = l + len(str(p)) 1182 if n == p: 1183 break 1184 p = n 1185 records.append("%d %s=%s\n" % (p, keyword, value)) 1186 records = "".join(records) 1187 1188 # We use a hardcoded "././@PaxHeader" name like star does 1189 # instead of the one that POSIX recommends. 1190 info = {} 1191 info["name"] = "././@PaxHeader" 1192 info["type"] = type 1193 info["size"] = len(records) 1194 info["magic"] = POSIX_MAGIC 1195 1196 # Create pax header + record blocks. 1197 return cls._create_header(info, USTAR_FORMAT) + \ 1198 cls._create_payload(records) 1199 1200 @classmethod 1201 def frombuf(cls, buf): 1202 """Construct a TarInfo object from a 512 byte string buffer. 1203 """ 1204 if len(buf) == 0: 1205 raise EmptyHeaderError("empty header") 1206 if len(buf) != BLOCKSIZE: 1207 raise TruncatedHeaderError("truncated header") 1208 if buf.count(NUL) == BLOCKSIZE: 1209 raise EOFHeaderError("end of file header") 1210 1211 chksum = nti(buf[148:156]) 1212 if chksum not in calc_chksums(buf): 1213 raise InvalidHeaderError("bad checksum") 1214 1215 obj = cls() 1216 obj.buf = buf 1217 obj.name = nts(buf[0:100]) 1218 obj.mode = nti(buf[100:108]) 1219 obj.uid = nti(buf[108:116]) 1220 obj.gid = nti(buf[116:124]) 1221 obj.size = nti(buf[124:136]) 1222 obj.mtime = nti(buf[136:148]) 1223 obj.chksum = chksum 1224 obj.type = buf[156:157] 1225 obj.linkname = nts(buf[157:257]) 1226 obj.uname = nts(buf[265:297]) 1227 obj.gname = nts(buf[297:329]) 1228 obj.devmajor = nti(buf[329:337]) 1229 obj.devminor = nti(buf[337:345]) 1230 prefix = nts(buf[345:500]) 1231 1232 # Old V7 tar format represents a directory as a regular 1233 # file with a trailing slash. 1234 if obj.type == AREGTYPE and obj.name.endswith("/"): 1235 obj.type = DIRTYPE 1236 1237 # Remove redundant slashes from directories. 1238 if obj.isdir(): 1239 obj.name = obj.name.rstrip("/") 1240 1241 # Reconstruct a ustar longname. 1242 if prefix and obj.type not in GNU_TYPES: 1243 obj.name = prefix + "/" + obj.name 1244 return obj 1245 1246 @classmethod 1247 def fromtarfile(cls, tarfile): 1248 """Return the next TarInfo object from TarFile object 1249 tarfile. 1250 """ 1251 buf = tarfile.fileobj.read(BLOCKSIZE) 1252 obj = cls.frombuf(buf) 1253 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE 1254 return obj._proc_member(tarfile) 1255 1256 #-------------------------------------------------------------------------- 1257 # The following are methods that are called depending on the type of a 1258 # member. The entry point is _proc_member() which can be overridden in a 1259 # subclass to add custom _proc_*() methods. A _proc_*() method MUST 1260 # implement the following 1261 # operations: 1262 # 1. Set self.offset_data to the position where the data blocks begin, 1263 # if there is data that follows. 1264 # 2. Set tarfile.offset to the position where the next member's header will 1265 # begin. 1266 # 3. Return self or another valid TarInfo object. 1267 def _proc_member(self, tarfile): 1268 """Choose the right processing method depending on 1269 the type and call it. 1270 """ 1271 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): 1272 return self._proc_gnulong(tarfile) 1273 elif self.type == GNUTYPE_SPARSE: 1274 return self._proc_sparse(tarfile) 1275 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): 1276 return self._proc_pax(tarfile) 1277 else: 1278 return self._proc_builtin(tarfile) 1279 1280 def _proc_builtin(self, tarfile): 1281 """Process a builtin type or an unknown type which 1282 will be treated as a regular file. 1283 """ 1284 self.offset_data = tarfile.fileobj.tell() 1285 offset = self.offset_data 1286 if self.isreg() or self.type not in SUPPORTED_TYPES: 1287 # Skip the following data blocks. 1288 offset += self._block(self.size) 1289 tarfile.offset = offset 1290 1291 # Patch the TarInfo object with saved global 1292 # header information. 1293 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) 1294 1295 return self 1296 1297 def _proc_gnulong(self, tarfile): 1298 """Process the blocks that hold a GNU longname 1299 or longlink member. 1300 """ 1301 buf = tarfile.fileobj.read(self._block(self.size)) 1302 1303 # Fetch the next header and process it. 1304 try: 1305 next = self.fromtarfile(tarfile) 1306 except HeaderError: 1307 raise SubsequentHeaderError("missing or bad subsequent header") 1308 1309 # Patch the TarInfo object from the next header with 1310 # the longname information. 1311 next.offset = self.offset 1312 if self.type == GNUTYPE_LONGNAME: 1313 next.name = nts(buf) 1314 elif self.type == GNUTYPE_LONGLINK: 1315 next.linkname = nts(buf) 1316 1317 return next 1318 1319 def _proc_sparse(self, tarfile): 1320 """Process a GNU sparse header plus extra headers. 1321 """ 1322 buf = self.buf 1323 sp = _ringbuffer() 1324 pos = 386 1325 lastpos = 0L 1326 realpos = 0L 1327 # There are 4 possible sparse structs in the 1328 # first header. 1329 for i in xrange(4): 1330 try: 1331 offset = nti(buf[pos:pos + 12]) 1332 numbytes = nti(buf[pos + 12:pos + 24]) 1333 except ValueError: 1334 break 1335 if offset > lastpos: 1336 sp.append(_hole(lastpos, offset - lastpos)) 1337 sp.append(_data(offset, numbytes, realpos)) 1338 realpos += numbytes 1339 lastpos = offset + numbytes 1340 pos += 24 1341 1342 isextended = ord(buf[482]) 1343 origsize = nti(buf[483:495]) 1344 1345 # If the isextended flag is given, 1346 # there are extra headers to process. 1347 while isextended == 1: 1348 buf = tarfile.fileobj.read(BLOCKSIZE) 1349 pos = 0 1350 for i in xrange(21): 1351 try: 1352 offset = nti(buf[pos:pos + 12]) 1353 numbytes = nti(buf[pos + 12:pos + 24]) 1354 except ValueError: 1355 break 1356 if offset > lastpos: 1357 sp.append(_hole(lastpos, offset - lastpos)) 1358 sp.append(_data(offset, numbytes, realpos)) 1359 realpos += numbytes 1360 lastpos = offset + numbytes 1361 pos += 24 1362 isextended = ord(buf[504]) 1363 1364 if lastpos < origsize: 1365 sp.append(_hole(lastpos, origsize - lastpos)) 1366 1367 self.sparse = sp 1368 1369 self.offset_data = tarfile.fileobj.tell() 1370 tarfile.offset = self.offset_data + self._block(self.size) 1371 self.size = origsize 1372 1373 return self 1374 1375 def _proc_pax(self, tarfile): 1376 """Process an extended or global header as described in 1377 POSIX.1-2001. 1378 """ 1379 # Read the header information. 1380 buf = tarfile.fileobj.read(self._block(self.size)) 1381 1382 # A pax header stores supplemental information for either 1383 # the following file (extended) or all following files 1384 # (global). 1385 if self.type == XGLTYPE: 1386 pax_headers = tarfile.pax_headers 1387 else: 1388 pax_headers = tarfile.pax_headers.copy() 1389 1390 # Parse pax header information. A record looks like that: 1391 # "%d %s=%s\n" % (length, keyword, value). length is the size 1392 # of the complete record including the length field itself and 1393 # the newline. keyword and value are both UTF-8 encoded strings. 1394 regex = re.compile(r"(\d+) ([^=]+)=", re.U) 1395 pos = 0 1396 while True: 1397 match = regex.match(buf, pos) 1398 if not match: 1399 break 1400 1401 length, keyword = match.groups() 1402 length = int(length) 1403 value = buf[match.end(2) + 1:match.start(1) + length - 1] 1404 1405 keyword = keyword.decode("utf8") 1406 value = value.decode("utf8") 1407 1408 pax_headers[keyword] = value 1409 pos += length 1410 1411 # Fetch the next header. 1412 try: 1413 next = self.fromtarfile(tarfile) 1414 except HeaderError: 1415 raise SubsequentHeaderError("missing or bad subsequent header") 1416 1417 if self.type in (XHDTYPE, SOLARIS_XHDTYPE): 1418 # Patch the TarInfo object with the extended header info. 1419 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) 1420 next.offset = self.offset 1421 1422 if "size" in pax_headers: 1423 # If the extended header replaces the size field, 1424 # we need to recalculate the offset where the next 1425 # header starts. 1426 offset = next.offset_data 1427 if next.isreg() or next.type not in SUPPORTED_TYPES: 1428 offset += next._block(next.size) 1429 tarfile.offset = offset 1430 1431 return next 1432 1433 def _apply_pax_info(self, pax_headers, encoding, errors): 1434 """Replace fields with supplemental information from a previous 1435 pax extended or global header. 1436 """ 1437 for keyword, value in pax_headers.iteritems(): 1438 if keyword not in PAX_FIELDS: 1439 continue 1440 1441 if keyword == "path": 1442 value = value.rstrip("/") 1443 1444 if keyword in PAX_NUMBER_FIELDS: 1445 try: 1446 value = PAX_NUMBER_FIELDS[keyword](value) 1447 except ValueError: 1448 value = 0 1449 else: 1450 value = uts(value, encoding, errors) 1451 1452 setattr(self, keyword, value) 1453 1454 self.pax_headers = pax_headers.copy() 1455 1456 def _block(self, count): 1457 """Round up a byte count by BLOCKSIZE and return it, 1458 e.g. _block(834) => 1024. 1459 """ 1460 blocks, remainder = divmod(count, BLOCKSIZE) 1461 if remainder: 1462 blocks += 1 1463 return blocks * BLOCKSIZE 1464 1465 def isreg(self): 1466 return self.type in REGULAR_TYPES 1467 def isfile(self): 1468 return self.isreg() 1469 def isdir(self): 1470 return self.type == DIRTYPE 1471 def issym(self): 1472 return self.type == SYMTYPE 1473 def islnk(self): 1474 return self.type == LNKTYPE 1475 def ischr(self): 1476 return self.type == CHRTYPE 1477 def isblk(self): 1478 return self.type == BLKTYPE 1479 def isfifo(self): 1480 return self.type == FIFOTYPE 1481 def issparse(self): 1482 return self.type == GNUTYPE_SPARSE 1483 def isdev(self): 1484 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) 1485# class TarInfo 1486 1487class TarFile(object): 1488 """The TarFile Class provides an interface to tar archives. 1489 """ 1490 1491 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) 1492 1493 dereference = False # If true, add content of linked file to the 1494 # tar file, else the link. 1495 1496 ignore_zeros = False # If true, skips empty or invalid blocks and 1497 # continues processing. 1498 1499 errorlevel = 1 # If 0, fatal errors only appear in debug 1500 # messages (if debug >= 0). If > 0, errors 1501 # are passed to the caller as exceptions. 1502 1503 format = DEFAULT_FORMAT # The format to use when creating an archive. 1504 1505 encoding = ENCODING # Encoding for 8-bit character strings. 1506 1507 errors = None # Error handler for unicode conversion. 1508 1509 tarinfo = TarInfo # The default TarInfo class to use. 1510 1511 fileobject = ExFileObject # The default ExFileObject class to use. 1512 1513 def __init__(self, name=None, mode="r", fileobj=None, format=None, 1514 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, 1515 errors=None, pax_headers=None, debug=None, errorlevel=None): 1516 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to 1517 read from an existing archive, 'a' to append data to an existing 1518 file or 'w' to create a new file overwriting an existing one. `mode' 1519 defaults to 'r'. 1520 If `fileobj' is given, it is used for reading or writing data. If it 1521 can be determined, `mode' is overridden by `fileobj's mode. 1522 `fileobj' is not closed, when TarFile is closed. 1523 """ 1524 modes = {"r": "rb", "a": "r+b", "w": "wb"} 1525 if mode not in modes: 1526 raise ValueError("mode must be 'r', 'a' or 'w'") 1527 self.mode = mode 1528 self._mode = modes[mode] 1529 1530 if not fileobj: 1531 if self.mode == "a" and not os.path.exists(name): 1532 # Create nonexistent files in append mode. 1533 self.mode = "w" 1534 self._mode = "wb" 1535 fileobj = bltn_open(name, self._mode) 1536 self._extfileobj = False 1537 else: 1538 if name is None and hasattr(fileobj, "name"): 1539 name = fileobj.name 1540 if hasattr(fileobj, "mode"): 1541 self._mode = fileobj.mode 1542 self._extfileobj = True 1543 self.name = os.path.abspath(name) if name else None 1544 self.fileobj = fileobj 1545 1546 # Init attributes. 1547 if format is not None: 1548 self.format = format 1549 if tarinfo is not None: 1550 self.tarinfo = tarinfo 1551 if dereference is not None: 1552 self.dereference = dereference 1553 if ignore_zeros is not None: 1554 self.ignore_zeros = ignore_zeros 1555 if encoding is not None: 1556 self.encoding = encoding 1557 1558 if errors is not None: 1559 self.errors = errors 1560 elif mode == "r": 1561 self.errors = "utf-8" 1562 else: 1563 self.errors = "strict" 1564 1565 if pax_headers is not None and self.format == PAX_FORMAT: 1566 self.pax_headers = pax_headers 1567 else: 1568 self.pax_headers = {} 1569 1570 if debug is not None: 1571 self.debug = debug 1572 if errorlevel is not None: 1573 self.errorlevel = errorlevel 1574 1575 # Init datastructures. 1576 self.closed = False 1577 self.members = [] # list of members as TarInfo objects 1578 self._loaded = False # flag if all members have been read 1579 self.offset = self.fileobj.tell() 1580 # current position in the archive file 1581 self.inodes = {} # dictionary caching the inodes of 1582 # archive members already added 1583 1584 try: 1585 if self.mode == "r": 1586 self.firstmember = None 1587 self.firstmember = self.next() 1588 1589 if self.mode == "a": 1590 # Move to the end of the archive, 1591 # before the first empty block. 1592 while True: 1593 self.fileobj.seek(self.offset) 1594 try: 1595 tarinfo = self.tarinfo.fromtarfile(self) 1596 self.members.append(tarinfo) 1597 except EOFHeaderError: 1598 self.fileobj.seek(self.offset) 1599 break 1600 except HeaderError, e: 1601 raise ReadError(str(e)) 1602 1603 if self.mode in "aw": 1604 self._loaded = True 1605 1606 if self.pax_headers: 1607 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) 1608 self.fileobj.write(buf) 1609 self.offset += len(buf) 1610 except: 1611 if not self._extfileobj: 1612 self.fileobj.close() 1613 self.closed = True 1614 raise 1615 1616 def _getposix(self): 1617 return self.format == USTAR_FORMAT 1618 def _setposix(self, value): 1619 import warnings 1620 warnings.warn("use the format attribute instead", DeprecationWarning, 1621 2) 1622 if value: 1623 self.format = USTAR_FORMAT 1624 else: 1625 self.format = GNU_FORMAT 1626 posix = property(_getposix, _setposix) 1627 1628 #-------------------------------------------------------------------------- 1629 # Below are the classmethods which act as alternate constructors to the 1630 # TarFile class. The open() method is the only one that is needed for 1631 # public use; it is the "super"-constructor and is able to select an 1632 # adequate "sub"-constructor for a particular compression using the mapping 1633 # from OPEN_METH. 1634 # 1635 # This concept allows one to subclass TarFile without losing the comfort of 1636 # the super-constructor. A sub-constructor is registered and made available 1637 # by adding it to the mapping in OPEN_METH. 1638 1639 @classmethod 1640 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): 1641 """Open a tar archive for reading, writing or appending. Return 1642 an appropriate TarFile class. 1643 1644 mode: 1645 'r' or 'r:*' open for reading with transparent compression 1646 'r:' open for reading exclusively uncompressed 1647 'r:gz' open for reading with gzip compression 1648 'r:bz2' open for reading with bzip2 compression 1649 'a' or 'a:' open for appending, creating the file if necessary 1650 'w' or 'w:' open for writing without compression 1651 'w:gz' open for writing with gzip compression 1652 'w:bz2' open for writing with bzip2 compression 1653 1654 'r|*' open a stream of tar blocks with transparent compression 1655 'r|' open an uncompressed stream of tar blocks for reading 1656 'r|gz' open a gzip compressed stream of tar blocks 1657 'r|bz2' open a bzip2 compressed stream of tar blocks 1658 'w|' open an uncompressed stream for writing 1659 'w|gz' open a gzip compressed stream for writing 1660 'w|bz2' open a bzip2 compressed stream for writing 1661 """ 1662 1663 if not name and not fileobj: 1664 raise ValueError("nothing to open") 1665 1666 if mode in ("r", "r:*"): 1667 # Find out which *open() is appropriate for opening the file. 1668 def not_compressed(comptype): 1669 return cls.OPEN_METH[comptype] == 'taropen' 1670 for comptype in sorted(cls.OPEN_METH, key=not_compressed): 1671 func = getattr(cls, cls.OPEN_METH[comptype]) 1672 if fileobj is not None: 1673 saved_pos = fileobj.tell() 1674 try: 1675 return func(name, "r", fileobj, **kwargs) 1676 except (ReadError, CompressionError), e: 1677 if fileobj is not None: 1678 fileobj.seek(saved_pos) 1679 continue 1680 raise ReadError("file could not be opened successfully") 1681 1682 elif ":" in mode: 1683 filemode, comptype = mode.split(":", 1) 1684 filemode = filemode or "r" 1685 comptype = comptype or "tar" 1686 1687 # Select the *open() function according to 1688 # given compression. 1689 if comptype in cls.OPEN_METH: 1690 func = getattr(cls, cls.OPEN_METH[comptype]) 1691 else: 1692 raise CompressionError("unknown compression type %r" % comptype) 1693 return func(name, filemode, fileobj, **kwargs) 1694 1695 elif "|" in mode: 1696 filemode, comptype = mode.split("|", 1) 1697 filemode = filemode or "r" 1698 comptype = comptype or "tar" 1699 1700 if filemode not in ("r", "w"): 1701 raise ValueError("mode must be 'r' or 'w'") 1702 1703 stream = _Stream(name, filemode, comptype, fileobj, bufsize) 1704 try: 1705 t = cls(name, filemode, stream, **kwargs) 1706 except: 1707 stream.close() 1708 raise 1709 t._extfileobj = False 1710 return t 1711 1712 elif mode in ("a", "w"): 1713 return cls.taropen(name, mode, fileobj, **kwargs) 1714 1715 raise ValueError("undiscernible mode") 1716 1717 @classmethod 1718 def taropen(cls, name, mode="r", fileobj=None, **kwargs): 1719 """Open uncompressed tar archive name for reading or writing. 1720 """ 1721 if mode not in ("r", "a", "w"): 1722 raise ValueError("mode must be 'r', 'a' or 'w'") 1723 return cls(name, mode, fileobj, **kwargs) 1724 1725 @classmethod 1726 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 1727 """Open gzip compressed tar archive name for reading or writing. 1728 Appending is not allowed. 1729 """ 1730 if mode not in ("r", "w"): 1731 raise ValueError("mode must be 'r' or 'w'") 1732 1733 try: 1734 import gzip 1735 gzip.GzipFile 1736 except (ImportError, AttributeError): 1737 raise CompressionError("gzip module is not available") 1738 1739 try: 1740 fileobj = gzip.GzipFile(name, mode, compresslevel, fileobj) 1741 except OSError: 1742 if fileobj is not None and mode == 'r': 1743 raise ReadError("not a gzip file") 1744 raise 1745 1746 try: 1747 t = cls.taropen(name, mode, fileobj, **kwargs) 1748 except IOError: 1749 fileobj.close() 1750 if mode == 'r': 1751 raise ReadError("not a gzip file") 1752 raise 1753 except: 1754 fileobj.close() 1755 raise 1756 t._extfileobj = False 1757 return t 1758 1759 @classmethod 1760 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 1761 """Open bzip2 compressed tar archive name for reading or writing. 1762 Appending is not allowed. 1763 """ 1764 if mode not in ("r", "w"): 1765 raise ValueError("mode must be 'r' or 'w'.") 1766 1767 try: 1768 import bz2 1769 except ImportError: 1770 raise CompressionError("bz2 module is not available") 1771 1772 if fileobj is not None: 1773 fileobj = _BZ2Proxy(fileobj, mode) 1774 else: 1775 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) 1776 1777 try: 1778 t = cls.taropen(name, mode, fileobj, **kwargs) 1779 except (IOError, EOFError): 1780 fileobj.close() 1781 if mode == 'r': 1782 raise ReadError("not a bzip2 file") 1783 raise 1784 except: 1785 fileobj.close() 1786 raise 1787 t._extfileobj = False 1788 return t 1789 1790 # All *open() methods are registered here. 1791 OPEN_METH = { 1792 "tar": "taropen", # uncompressed tar 1793 "gz": "gzopen", # gzip compressed tar 1794 "bz2": "bz2open" # bzip2 compressed tar 1795 } 1796 1797 #-------------------------------------------------------------------------- 1798 # The public methods which TarFile provides: 1799 1800 def close(self): 1801 """Close the TarFile. In write-mode, two finishing zero blocks are 1802 appended to the archive. 1803 """ 1804 if self.closed: 1805 return 1806 1807 self.closed = True 1808 try: 1809 if self.mode in "aw": 1810 self.fileobj.write(NUL * (BLOCKSIZE * 2)) 1811 self.offset += (BLOCKSIZE * 2) 1812 # fill up the end with zero-blocks 1813 # (like option -b20 for tar does) 1814 blocks, remainder = divmod(self.offset, RECORDSIZE) 1815 if remainder > 0: 1816 self.fileobj.write(NUL * (RECORDSIZE - remainder)) 1817 finally: 1818 if not self._extfileobj: 1819 self.fileobj.close() 1820 1821 def getmember(self, name): 1822 """Return a TarInfo object for member `name'. If `name' can not be 1823 found in the archive, KeyError is raised. If a member occurs more 1824 than once in the archive, its last occurrence is assumed to be the 1825 most up-to-date version. 1826 """ 1827 tarinfo = self._getmember(name) 1828 if tarinfo is None: 1829 raise KeyError("filename %r not found" % name) 1830 return tarinfo 1831 1832 def getmembers(self): 1833 """Return the members of the archive as a list of TarInfo objects. The 1834 list has the same order as the members in the archive. 1835 """ 1836 self._check() 1837 if not self._loaded: # if we want to obtain a list of 1838 self._load() # all members, we first have to 1839 # scan the whole archive. 1840 return self.members 1841 1842 def getnames(self): 1843 """Return the members of the archive as a list of their names. It has 1844 the same order as the list returned by getmembers(). 1845 """ 1846 return [tarinfo.name for tarinfo in self.getmembers()] 1847 1848 def gettarinfo(self, name=None, arcname=None, fileobj=None): 1849 """Create a TarInfo object from the result of os.stat or equivalent 1850 on an existing file. The file is either named by `name', or 1851 specified as a file object `fileobj' with a file descriptor. If 1852 given, `arcname' specifies an alternative name for the file in the 1853 archive, otherwise, the name is taken from the 'name' attribute of 1854 'fileobj', or the 'name' argument. 1855 """ 1856 self._check("aw") 1857 1858 # When fileobj is given, replace name by 1859 # fileobj's real name. 1860 if fileobj is not None: 1861 name = fileobj.name 1862 1863 # Building the name of the member in the archive. 1864 # Backward slashes are converted to forward slashes, 1865 # Absolute paths are turned to relative paths. 1866 if arcname is None: 1867 arcname = name 1868 drv, arcname = os.path.splitdrive(arcname) 1869 arcname = arcname.replace(os.sep, "/") 1870 arcname = arcname.lstrip("/") 1871 1872 # Now, fill the TarInfo object with 1873 # information specific for the file. 1874 tarinfo = self.tarinfo() 1875 tarinfo.tarfile = self # Not needed 1876 1877 # Use os.stat or os.lstat, depending on platform 1878 # and if symlinks shall be resolved. 1879 if fileobj is None: 1880 if hasattr(os, "lstat") and not self.dereference: 1881 statres = os.lstat(name) 1882 else: 1883 statres = os.stat(name) 1884 else: 1885 statres = os.fstat(fileobj.fileno()) 1886 linkname = "" 1887 1888 stmd = statres.st_mode 1889 if stat.S_ISREG(stmd): 1890 inode = (statres.st_ino, statres.st_dev) 1891 if not self.dereference and statres.st_nlink > 1 and \ 1892 inode in self.inodes and arcname != self.inodes[inode]: 1893 # Is it a hardlink to an already 1894 # archived file? 1895 type = LNKTYPE 1896 linkname = self.inodes[inode] 1897 else: 1898 # The inode is added only if its valid. 1899 # For win32 it is always 0. 1900 type = REGTYPE 1901 if inode[0]: 1902 self.inodes[inode] = arcname 1903 elif stat.S_ISDIR(stmd): 1904 type = DIRTYPE 1905 elif stat.S_ISFIFO(stmd): 1906 type = FIFOTYPE 1907 elif stat.S_ISLNK(stmd): 1908 type = SYMTYPE 1909 linkname = os.readlink(name) 1910 elif stat.S_ISCHR(stmd): 1911 type = CHRTYPE 1912 elif stat.S_ISBLK(stmd): 1913 type = BLKTYPE 1914 else: 1915 return None 1916 1917 # Fill the TarInfo object with all 1918 # information we can get. 1919 tarinfo.name = arcname 1920 tarinfo.mode = stmd 1921 tarinfo.uid = statres.st_uid 1922 tarinfo.gid = statres.st_gid 1923 if type == REGTYPE: 1924 tarinfo.size = statres.st_size 1925 else: 1926 tarinfo.size = 0L 1927 tarinfo.mtime = statres.st_mtime 1928 tarinfo.type = type 1929 tarinfo.linkname = linkname 1930 if pwd: 1931 try: 1932 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] 1933 except KeyError: 1934 pass 1935 if grp: 1936 try: 1937 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] 1938 except KeyError: 1939 pass 1940 1941 if type in (CHRTYPE, BLKTYPE): 1942 if hasattr(os, "major") and hasattr(os, "minor"): 1943 tarinfo.devmajor = os.major(statres.st_rdev) 1944 tarinfo.devminor = os.minor(statres.st_rdev) 1945 return tarinfo 1946 1947 def list(self, verbose=True): 1948 """Print a table of contents to sys.stdout. If `verbose' is False, only 1949 the names of the members are printed. If it is True, an `ls -l'-like 1950 output is produced. 1951 """ 1952 self._check() 1953 1954 for tarinfo in self: 1955 if verbose: 1956 print filemode(tarinfo.mode), 1957 print "%s/%s" % (tarinfo.uname or tarinfo.uid, 1958 tarinfo.gname or tarinfo.gid), 1959 if tarinfo.ischr() or tarinfo.isblk(): 1960 print "%10s" % ("%d,%d" \ 1961 % (tarinfo.devmajor, tarinfo.devminor)), 1962 else: 1963 print "%10d" % tarinfo.size, 1964 print "%d-%02d-%02d %02d:%02d:%02d" \ 1965 % time.localtime(tarinfo.mtime)[:6], 1966 1967 print tarinfo.name + ("/" if tarinfo.isdir() else ""), 1968 1969 if verbose: 1970 if tarinfo.issym(): 1971 print "->", tarinfo.linkname, 1972 if tarinfo.islnk(): 1973 print "link to", tarinfo.linkname, 1974 print 1975 1976 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None): 1977 """Add the file `name' to the archive. `name' may be any type of file 1978 (directory, fifo, symbolic link, etc.). If given, `arcname' 1979 specifies an alternative name for the file in the archive. 1980 Directories are added recursively by default. This can be avoided by 1981 setting `recursive' to False. `exclude' is a function that should 1982 return True for each filename to be excluded. `filter' is a function 1983 that expects a TarInfo object argument and returns the changed 1984 TarInfo object, if it returns None the TarInfo object will be 1985 excluded from the archive. 1986 """ 1987 self._check("aw") 1988 1989 if arcname is None: 1990 arcname = name 1991 1992 # Exclude pathnames. 1993 if exclude is not None: 1994 import warnings 1995 warnings.warn("use the filter argument instead", 1996 DeprecationWarning, 2) 1997 if exclude(name): 1998 self._dbg(2, "tarfile: Excluded %r" % name) 1999 return 2000 2001 # Skip if somebody tries to archive the archive... 2002 if self.name is not None and os.path.abspath(name) == self.name: 2003 self._dbg(2, "tarfile: Skipped %r" % name) 2004 return 2005 2006 self._dbg(1, name) 2007 2008 # Create a TarInfo object from the file. 2009 tarinfo = self.gettarinfo(name, arcname) 2010 2011 if tarinfo is None: 2012 self._dbg(1, "tarfile: Unsupported type %r" % name) 2013 return 2014 2015 # Change or exclude the TarInfo object. 2016 if filter is not None: 2017 tarinfo = filter(tarinfo) 2018 if tarinfo is None: 2019 self._dbg(2, "tarfile: Excluded %r" % name) 2020 return 2021 2022 # Append the tar header and data to the archive. 2023 if tarinfo.isreg(): 2024 with bltn_open(name, "rb") as f: 2025 self.addfile(tarinfo, f) 2026 2027 elif tarinfo.isdir(): 2028 self.addfile(tarinfo) 2029 if recursive: 2030 for f in os.listdir(name): 2031 self.add(os.path.join(name, f), os.path.join(arcname, f), 2032 recursive, exclude, filter) 2033 2034 else: 2035 self.addfile(tarinfo) 2036 2037 def addfile(self, tarinfo, fileobj=None): 2038 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is 2039 given, tarinfo.size bytes are read from it and added to the archive. 2040 You can create TarInfo objects directly, or by using gettarinfo(). 2041 On Windows platforms, `fileobj' should always be opened with mode 2042 'rb' to avoid irritation about the file size. 2043 """ 2044 self._check("aw") 2045 2046 tarinfo = copy.copy(tarinfo) 2047 2048 buf = tarinfo.tobuf(self.format, self.encoding, self.errors) 2049 self.fileobj.write(buf) 2050 self.offset += len(buf) 2051 2052 # If there's data to follow, append it. 2053 if fileobj is not None: 2054 copyfileobj(fileobj, self.fileobj, tarinfo.size) 2055 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) 2056 if remainder > 0: 2057 self.fileobj.write(NUL * (BLOCKSIZE - remainder)) 2058 blocks += 1 2059 self.offset += blocks * BLOCKSIZE 2060 2061 self.members.append(tarinfo) 2062 2063 def extractall(self, path=".", members=None): 2064 """Extract all members from the archive to the current working 2065 directory and set owner, modification time and permissions on 2066 directories afterwards. `path' specifies a different directory 2067 to extract to. `members' is optional and must be a subset of the 2068 list returned by getmembers(). 2069 """ 2070 directories = [] 2071 2072 if members is None: 2073 members = self 2074 2075 for tarinfo in members: 2076 if tarinfo.isdir(): 2077 # Extract directories with a safe mode. 2078 directories.append(tarinfo) 2079 tarinfo = copy.copy(tarinfo) 2080 tarinfo.mode = 0700 2081 self.extract(tarinfo, path) 2082 2083 # Reverse sort directories. 2084 directories.sort(key=operator.attrgetter('name')) 2085 directories.reverse() 2086 2087 # Set correct owner, mtime and filemode on directories. 2088 for tarinfo in directories: 2089 dirpath = os.path.join(path, tarinfo.name) 2090 try: 2091 self.chown(tarinfo, dirpath) 2092 self.utime(tarinfo, dirpath) 2093 self.chmod(tarinfo, dirpath) 2094 except ExtractError, e: 2095 if self.errorlevel > 1: 2096 raise 2097 else: 2098 self._dbg(1, "tarfile: %s" % e) 2099 2100 def extract(self, member, path=""): 2101 """Extract a member from the archive to the current working directory, 2102 using its full name. Its file information is extracted as accurately 2103 as possible. `member' may be a filename or a TarInfo object. You can 2104 specify a different directory using `path'. 2105 """ 2106 self._check("r") 2107 2108 if isinstance(member, basestring): 2109 tarinfo = self.getmember(member) 2110 else: 2111 tarinfo = member 2112 2113 # Prepare the link target for makelink(). 2114 if tarinfo.islnk(): 2115 tarinfo._link_target = os.path.join(path, tarinfo.linkname) 2116 2117 try: 2118 self._extract_member(tarinfo, os.path.join(path, tarinfo.name)) 2119 except EnvironmentError, e: 2120 if self.errorlevel > 0: 2121 raise 2122 else: 2123 if e.filename is None: 2124 self._dbg(1, "tarfile: %s" % e.strerror) 2125 else: 2126 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) 2127 except ExtractError, e: 2128 if self.errorlevel > 1: 2129 raise 2130 else: 2131 self._dbg(1, "tarfile: %s" % e) 2132 2133 def extractfile(self, member): 2134 """Extract a member from the archive as a file object. `member' may be 2135 a filename or a TarInfo object. If `member' is a regular file, a 2136 file-like object is returned. If `member' is a link, a file-like 2137 object is constructed from the link's target. If `member' is none of 2138 the above, None is returned. 2139 The file-like object is read-only and provides the following 2140 methods: read(), readline(), readlines(), seek() and tell() 2141 """ 2142 self._check("r") 2143 2144 if isinstance(member, basestring): 2145 tarinfo = self.getmember(member) 2146 else: 2147 tarinfo = member 2148 2149 if tarinfo.isreg(): 2150 return self.fileobject(self, tarinfo) 2151 2152 elif tarinfo.type not in SUPPORTED_TYPES: 2153 # If a member's type is unknown, it is treated as a 2154 # regular file. 2155 return self.fileobject(self, tarinfo) 2156 2157 elif tarinfo.islnk() or tarinfo.issym(): 2158 if isinstance(self.fileobj, _Stream): 2159 # A small but ugly workaround for the case that someone tries 2160 # to extract a (sym)link as a file-object from a non-seekable 2161 # stream of tar blocks. 2162 raise StreamError("cannot extract (sym)link as file object") 2163 else: 2164 # A (sym)link's file object is its target's file object. 2165 return self.extractfile(self._find_link_target(tarinfo)) 2166 else: 2167 # If there's no data associated with the member (directory, chrdev, 2168 # blkdev, etc.), return None instead of a file object. 2169 return None 2170 2171 def _extract_member(self, tarinfo, targetpath): 2172 """Extract the TarInfo object tarinfo to a physical 2173 file called targetpath. 2174 """ 2175 # Fetch the TarInfo object for the given name 2176 # and build the destination pathname, replacing 2177 # forward slashes to platform specific separators. 2178 targetpath = targetpath.rstrip("/") 2179 targetpath = targetpath.replace("/", os.sep) 2180 2181 # Create all upper directories. 2182 upperdirs = os.path.dirname(targetpath) 2183 if upperdirs and not os.path.exists(upperdirs): 2184 # Create directories that are not part of the archive with 2185 # default permissions. 2186 os.makedirs(upperdirs) 2187 2188 if tarinfo.islnk() or tarinfo.issym(): 2189 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) 2190 else: 2191 self._dbg(1, tarinfo.name) 2192 2193 if tarinfo.isreg(): 2194 self.makefile(tarinfo, targetpath) 2195 elif tarinfo.isdir(): 2196 self.makedir(tarinfo, targetpath) 2197 elif tarinfo.isfifo(): 2198 self.makefifo(tarinfo, targetpath) 2199 elif tarinfo.ischr() or tarinfo.isblk(): 2200 self.makedev(tarinfo, targetpath) 2201 elif tarinfo.islnk() or tarinfo.issym(): 2202 self.makelink(tarinfo, targetpath) 2203 elif tarinfo.type not in SUPPORTED_TYPES: 2204 self.makeunknown(tarinfo, targetpath) 2205 else: 2206 self.makefile(tarinfo, targetpath) 2207 2208 self.chown(tarinfo, targetpath) 2209 if not tarinfo.issym(): 2210 self.chmod(tarinfo, targetpath) 2211 self.utime(tarinfo, targetpath) 2212 2213 #-------------------------------------------------------------------------- 2214 # Below are the different file methods. They are called via 2215 # _extract_member() when extract() is called. They can be replaced in a 2216 # subclass to implement other functionality. 2217 2218 def makedir(self, tarinfo, targetpath): 2219 """Make a directory called targetpath. 2220 """ 2221 try: 2222 # Use a safe mode for the directory, the real mode is set 2223 # later in _extract_member(). 2224 os.mkdir(targetpath, 0700) 2225 except EnvironmentError, e: 2226 if e.errno != errno.EEXIST: 2227 raise 2228 2229 def makefile(self, tarinfo, targetpath): 2230 """Make a file called targetpath. 2231 """ 2232 source = self.extractfile(tarinfo) 2233 try: 2234 with bltn_open(targetpath, "wb") as target: 2235 copyfileobj(source, target) 2236 finally: 2237 source.close() 2238 2239 def makeunknown(self, tarinfo, targetpath): 2240 """Make a file from a TarInfo object with an unknown type 2241 at targetpath. 2242 """ 2243 self.makefile(tarinfo, targetpath) 2244 self._dbg(1, "tarfile: Unknown file type %r, " \ 2245 "extracted as regular file." % tarinfo.type) 2246 2247 def makefifo(self, tarinfo, targetpath): 2248 """Make a fifo called targetpath. 2249 """ 2250 if hasattr(os, "mkfifo"): 2251 os.mkfifo(targetpath) 2252 else: 2253 raise ExtractError("fifo not supported by system") 2254 2255 def makedev(self, tarinfo, targetpath): 2256 """Make a character or block device called targetpath. 2257 """ 2258 if not hasattr(os, "mknod") or not hasattr(os, "makedev"): 2259 raise ExtractError("special devices not supported by system") 2260 2261 mode = tarinfo.mode 2262 if tarinfo.isblk(): 2263 mode |= stat.S_IFBLK 2264 else: 2265 mode |= stat.S_IFCHR 2266 2267 os.mknod(targetpath, mode, 2268 os.makedev(tarinfo.devmajor, tarinfo.devminor)) 2269 2270 def makelink(self, tarinfo, targetpath): 2271 """Make a (symbolic) link called targetpath. If it cannot be created 2272 (platform limitation), we try to make a copy of the referenced file 2273 instead of a link. 2274 """ 2275 if hasattr(os, "symlink") and hasattr(os, "link"): 2276 # For systems that support symbolic and hard links. 2277 if tarinfo.issym(): 2278 if os.path.lexists(targetpath): 2279 os.unlink(targetpath) 2280 os.symlink(tarinfo.linkname, targetpath) 2281 else: 2282 # See extract(). 2283 if os.path.exists(tarinfo._link_target): 2284 if os.path.lexists(targetpath): 2285 os.unlink(targetpath) 2286 os.link(tarinfo._link_target, targetpath) 2287 else: 2288 self._extract_member(self._find_link_target(tarinfo), targetpath) 2289 else: 2290 try: 2291 self._extract_member(self._find_link_target(tarinfo), targetpath) 2292 except KeyError: 2293 raise ExtractError("unable to resolve link inside archive") 2294 2295 def chown(self, tarinfo, targetpath): 2296 """Set owner of targetpath according to tarinfo. 2297 """ 2298 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: 2299 # We have to be root to do so. 2300 try: 2301 g = grp.getgrnam(tarinfo.gname)[2] 2302 except KeyError: 2303 g = tarinfo.gid 2304 try: 2305 u = pwd.getpwnam(tarinfo.uname)[2] 2306 except KeyError: 2307 u = tarinfo.uid 2308 try: 2309 if tarinfo.issym() and hasattr(os, "lchown"): 2310 os.lchown(targetpath, u, g) 2311 else: 2312 if sys.platform != "os2emx": 2313 os.chown(targetpath, u, g) 2314 except EnvironmentError, e: 2315 raise ExtractError("could not change owner") 2316 2317 def chmod(self, tarinfo, targetpath): 2318 """Set file permissions of targetpath according to tarinfo. 2319 """ 2320 if hasattr(os, 'chmod'): 2321 try: 2322 os.chmod(targetpath, tarinfo.mode) 2323 except EnvironmentError, e: 2324 raise ExtractError("could not change mode") 2325 2326 def utime(self, tarinfo, targetpath): 2327 """Set modification time of targetpath according to tarinfo. 2328 """ 2329 if not hasattr(os, 'utime'): 2330 return 2331 try: 2332 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) 2333 except EnvironmentError, e: 2334 raise ExtractError("could not change modification time") 2335 2336 #-------------------------------------------------------------------------- 2337 def next(self): 2338 """Return the next member of the archive as a TarInfo object, when 2339 TarFile is opened for reading. Return None if there is no more 2340 available. 2341 """ 2342 self._check("ra") 2343 if self.firstmember is not None: 2344 m = self.firstmember 2345 self.firstmember = None 2346 return m 2347 2348 # Advance the file pointer. 2349 if self.offset != self.fileobj.tell(): 2350 self.fileobj.seek(self.offset - 1) 2351 if not self.fileobj.read(1): 2352 raise ReadError("unexpected end of data") 2353 2354 # Read the next block. 2355 tarinfo = None 2356 while True: 2357 try: 2358 tarinfo = self.tarinfo.fromtarfile(self) 2359 except EOFHeaderError, e: 2360 if self.ignore_zeros: 2361 self._dbg(2, "0x%X: %s" % (self.offset, e)) 2362 self.offset += BLOCKSIZE 2363 continue 2364 except InvalidHeaderError, e: 2365 if self.ignore_zeros: 2366 self._dbg(2, "0x%X: %s" % (self.offset, e)) 2367 self.offset += BLOCKSIZE 2368 continue 2369 elif self.offset == 0: 2370 raise ReadError(str(e)) 2371 except EmptyHeaderError: 2372 if self.offset == 0: 2373 raise ReadError("empty file") 2374 except TruncatedHeaderError, e: 2375 if self.offset == 0: 2376 raise ReadError(str(e)) 2377 except SubsequentHeaderError, e: 2378 raise ReadError(str(e)) 2379 break 2380 2381 if tarinfo is not None: 2382 self.members.append(tarinfo) 2383 else: 2384 self._loaded = True 2385 2386 return tarinfo 2387 2388 #-------------------------------------------------------------------------- 2389 # Little helper methods: 2390 2391 def _getmember(self, name, tarinfo=None, normalize=False): 2392 """Find an archive member by name from bottom to top. 2393 If tarinfo is given, it is used as the starting point. 2394 """ 2395 # Ensure that all members have been loaded. 2396 members = self.getmembers() 2397 2398 # Limit the member search list up to tarinfo. 2399 if tarinfo is not None: 2400 members = members[:members.index(tarinfo)] 2401 2402 if normalize: 2403 name = os.path.normpath(name) 2404 2405 for member in reversed(members): 2406 if normalize: 2407 member_name = os.path.normpath(member.name) 2408 else: 2409 member_name = member.name 2410 2411 if name == member_name: 2412 return member 2413 2414 def _load(self): 2415 """Read through the entire archive file and look for readable 2416 members. 2417 """ 2418 while True: 2419 tarinfo = self.next() 2420 if tarinfo is None: 2421 break 2422 self._loaded = True 2423 2424 def _check(self, mode=None): 2425 """Check if TarFile is still open, and if the operation's mode 2426 corresponds to TarFile's mode. 2427 """ 2428 if self.closed: 2429 raise IOError("%s is closed" % self.__class__.__name__) 2430 if mode is not None and self.mode not in mode: 2431 raise IOError("bad operation for mode %r" % self.mode) 2432 2433 def _find_link_target(self, tarinfo): 2434 """Find the target member of a symlink or hardlink member in the 2435 archive. 2436 """ 2437 if tarinfo.issym(): 2438 # Always search the entire archive. 2439 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname))) 2440 limit = None 2441 else: 2442 # Search the archive before the link, because a hard link is 2443 # just a reference to an already archived file. 2444 linkname = tarinfo.linkname 2445 limit = tarinfo 2446 2447 member = self._getmember(linkname, tarinfo=limit, normalize=True) 2448 if member is None: 2449 raise KeyError("linkname %r not found" % linkname) 2450 return member 2451 2452 def __iter__(self): 2453 """Provide an iterator object. 2454 """ 2455 if self._loaded: 2456 return iter(self.members) 2457 else: 2458 return TarIter(self) 2459 2460 def _dbg(self, level, msg): 2461 """Write debugging output to sys.stderr. 2462 """ 2463 if level <= self.debug: 2464 print >> sys.stderr, msg 2465 2466 def __enter__(self): 2467 self._check() 2468 return self 2469 2470 def __exit__(self, type, value, traceback): 2471 if type is None: 2472 self.close() 2473 else: 2474 # An exception occurred. We must not call close() because 2475 # it would try to write end-of-archive blocks and padding. 2476 if not self._extfileobj: 2477 self.fileobj.close() 2478 self.closed = True 2479# class TarFile 2480 2481class TarIter: 2482 """Iterator Class. 2483 2484 for tarinfo in TarFile(...): 2485 suite... 2486 """ 2487 2488 def __init__(self, tarfile): 2489 """Construct a TarIter object. 2490 """ 2491 self.tarfile = tarfile 2492 self.index = 0 2493 def __iter__(self): 2494 """Return iterator object. 2495 """ 2496 return self 2497 def next(self): 2498 """Return the next item using TarFile's next() method. 2499 When all members have been read, set TarFile as _loaded. 2500 """ 2501 # Fix for SF #1100429: Under rare circumstances it can 2502 # happen that getmembers() is called during iteration, 2503 # which will cause TarIter to stop prematurely. 2504 2505 if self.index == 0 and self.tarfile.firstmember is not None: 2506 tarinfo = self.tarfile.next() 2507 elif self.index < len(self.tarfile.members): 2508 tarinfo = self.tarfile.members[self.index] 2509 elif not self.tarfile._loaded: 2510 tarinfo = self.tarfile.next() 2511 if not tarinfo: 2512 self.tarfile._loaded = True 2513 raise StopIteration 2514 else: 2515 raise StopIteration 2516 self.index += 1 2517 return tarinfo 2518 2519# Helper classes for sparse file support 2520class _section: 2521 """Base class for _data and _hole. 2522 """ 2523 def __init__(self, offset, size): 2524 self.offset = offset 2525 self.size = size 2526 def __contains__(self, offset): 2527 return self.offset <= offset < self.offset + self.size 2528 2529class _data(_section): 2530 """Represent a data section in a sparse file. 2531 """ 2532 def __init__(self, offset, size, realpos): 2533 _section.__init__(self, offset, size) 2534 self.realpos = realpos 2535 2536class _hole(_section): 2537 """Represent a hole section in a sparse file. 2538 """ 2539 pass 2540 2541class _ringbuffer(list): 2542 """Ringbuffer class which increases performance 2543 over a regular list. 2544 """ 2545 def __init__(self): 2546 self.idx = 0 2547 def find(self, offset): 2548 idx = self.idx 2549 while True: 2550 item = self[idx] 2551 if offset in item: 2552 break 2553 idx += 1 2554 if idx == len(self): 2555 idx = 0 2556 if idx == self.idx: 2557 # End of File 2558 return None 2559 self.idx = idx 2560 return item 2561 2562#--------------------------------------------- 2563# zipfile compatible TarFile class 2564#--------------------------------------------- 2565TAR_PLAIN = 0 # zipfile.ZIP_STORED 2566TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED 2567class TarFileCompat: 2568 """TarFile class compatible with standard module zipfile's 2569 ZipFile class. 2570 """ 2571 def __init__(self, file, mode="r", compression=TAR_PLAIN): 2572 from warnings import warnpy3k 2573 warnpy3k("the TarFileCompat class has been removed in Python 3.0", 2574 stacklevel=2) 2575 if compression == TAR_PLAIN: 2576 self.tarfile = TarFile.taropen(file, mode) 2577 elif compression == TAR_GZIPPED: 2578 self.tarfile = TarFile.gzopen(file, mode) 2579 else: 2580 raise ValueError("unknown compression constant") 2581 if mode[0:1] == "r": 2582 members = self.tarfile.getmembers() 2583 for m in members: 2584 m.filename = m.name 2585 m.file_size = m.size 2586 m.date_time = time.gmtime(m.mtime)[:6] 2587 def namelist(self): 2588 return map(lambda m: m.name, self.infolist()) 2589 def infolist(self): 2590 return filter(lambda m: m.type in REGULAR_TYPES, 2591 self.tarfile.getmembers()) 2592 def printdir(self): 2593 self.tarfile.list() 2594 def testzip(self): 2595 return 2596 def getinfo(self, name): 2597 return self.tarfile.getmember(name) 2598 def read(self, name): 2599 return self.tarfile.extractfile(self.tarfile.getmember(name)).read() 2600 def write(self, filename, arcname=None, compress_type=None): 2601 self.tarfile.add(filename, arcname) 2602 def writestr(self, zinfo, bytes): 2603 try: 2604 from cStringIO import StringIO 2605 except ImportError: 2606 from StringIO import StringIO 2607 import calendar 2608 tinfo = TarInfo(zinfo.filename) 2609 tinfo.size = len(bytes) 2610 tinfo.mtime = calendar.timegm(zinfo.date_time) 2611 self.tarfile.addfile(tinfo, StringIO(bytes)) 2612 def close(self): 2613 self.tarfile.close() 2614#class TarFileCompat 2615 2616#-------------------- 2617# exported functions 2618#-------------------- 2619def is_tarfile(name): 2620 """Return True if name points to a tar archive that we 2621 are able to handle, else return False. 2622 """ 2623 try: 2624 t = open(name) 2625 t.close() 2626 return True 2627 except TarError: 2628 return False 2629 2630open = TarFile.open 2631