1#!/usr/bin/env python3 2#------------------------------------------------------------------- 3# tarfile.py 4#------------------------------------------------------------------- 5# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de> 6# All rights reserved. 7# 8# Permission is hereby granted, free of charge, to any person 9# obtaining a copy of this software and associated documentation 10# files (the "Software"), to deal in the Software without 11# restriction, including without limitation the rights to use, 12# copy, modify, merge, publish, distribute, sublicense, and/or sell 13# copies of the Software, and to permit persons to whom the 14# Software is furnished to do so, subject to the following 15# conditions: 16# 17# The above copyright notice and this permission notice shall be 18# included in all copies or substantial portions of the Software. 19# 20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 27# OTHER DEALINGS IN THE SOFTWARE. 28# 29"""Read from and write to tar format archives. 30""" 31 32version = "0.9.0" 33__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)" 34__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend." 35 36#--------- 37# Imports 38#--------- 39from builtins import open as bltn_open 40import sys 41import os 42import io 43import shutil 44import stat 45import time 46import struct 47import copy 48import re 49import warnings 50 51try: 52 import pwd 53except ImportError: 54 pwd = None 55try: 56 import grp 57except ImportError: 58 grp = None 59 60# os.symlink on Windows prior to 6.0 raises NotImplementedError 61symlink_exception = (AttributeError, NotImplementedError) 62try: 63 # OSError (winerror=1314) will be raised if the caller does not hold the 64 # SeCreateSymbolicLinkPrivilege privilege 65 symlink_exception += (OSError,) 66except NameError: 67 pass 68 69# from tarfile import * 70__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError", 71 "CompressionError", "StreamError", "ExtractError", "HeaderError", 72 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT", 73 "DEFAULT_FORMAT", "open"] 74 75 76#--------------------------------------------------------- 77# tar constants 78#--------------------------------------------------------- 79NUL = b"\0" # the null character 80BLOCKSIZE = 512 # length of processing blocks 81RECORDSIZE = BLOCKSIZE * 20 # length of records 82GNU_MAGIC = b"ustar \0" # magic gnu tar string 83POSIX_MAGIC = b"ustar\x0000" # magic posix tar string 84 85LENGTH_NAME = 100 # maximum length of a filename 86LENGTH_LINK = 100 # maximum length of a linkname 87LENGTH_PREFIX = 155 # maximum length of the prefix field 88 89REGTYPE = b"0" # regular file 90AREGTYPE = b"\0" # regular file 91LNKTYPE = b"1" # link (inside tarfile) 92SYMTYPE = b"2" # symbolic link 93CHRTYPE = b"3" # character special device 94BLKTYPE = b"4" # block special device 95DIRTYPE = b"5" # directory 96FIFOTYPE = b"6" # fifo special device 97CONTTYPE = b"7" # contiguous file 98 99GNUTYPE_LONGNAME = b"L" # GNU tar longname 100GNUTYPE_LONGLINK = b"K" # GNU tar longlink 101GNUTYPE_SPARSE = b"S" # GNU tar sparse file 102 103XHDTYPE = b"x" # POSIX.1-2001 extended header 104XGLTYPE = b"g" # POSIX.1-2001 global header 105SOLARIS_XHDTYPE = b"X" # Solaris extended header 106 107USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format 108GNU_FORMAT = 1 # GNU tar format 109PAX_FORMAT = 2 # POSIX.1-2001 (pax) format 110DEFAULT_FORMAT = PAX_FORMAT 111 112#--------------------------------------------------------- 113# tarfile constants 114#--------------------------------------------------------- 115# File types that tarfile supports: 116SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, 117 SYMTYPE, DIRTYPE, FIFOTYPE, 118 CONTTYPE, CHRTYPE, BLKTYPE, 119 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 120 GNUTYPE_SPARSE) 121 122# File types that will be treated as a regular file. 123REGULAR_TYPES = (REGTYPE, AREGTYPE, 124 CONTTYPE, GNUTYPE_SPARSE) 125 126# File types that are part of the GNU tar format. 127GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 128 GNUTYPE_SPARSE) 129 130# Fields from a pax header that override a TarInfo attribute. 131PAX_FIELDS = ("path", "linkpath", "size", "mtime", 132 "uid", "gid", "uname", "gname") 133 134# Fields from a pax header that are affected by hdrcharset. 135PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"} 136 137# Fields in a pax header that are numbers, all other fields 138# are treated as strings. 139PAX_NUMBER_FIELDS = { 140 "atime": float, 141 "ctime": float, 142 "mtime": float, 143 "uid": int, 144 "gid": int, 145 "size": int 146} 147 148#--------------------------------------------------------- 149# initialization 150#--------------------------------------------------------- 151if os.name == "nt": 152 ENCODING = "utf-8" 153else: 154 ENCODING = sys.getfilesystemencoding() 155 156#--------------------------------------------------------- 157# Some useful functions 158#--------------------------------------------------------- 159 160def stn(s, length, encoding, errors): 161 """Convert a string to a null-terminated bytes object. 162 """ 163 if s is None: 164 raise ValueError("metadata cannot contain None") 165 s = s.encode(encoding, errors) 166 return s[:length] + (length - len(s)) * NUL 167 168def nts(s, encoding, errors): 169 """Convert a null-terminated bytes object to a string. 170 """ 171 p = s.find(b"\0") 172 if p != -1: 173 s = s[:p] 174 return s.decode(encoding, errors) 175 176def nti(s): 177 """Convert a number field to a python number. 178 """ 179 # There are two possible encodings for a number field, see 180 # itn() below. 181 if s[0] in (0o200, 0o377): 182 n = 0 183 for i in range(len(s) - 1): 184 n <<= 8 185 n += s[i + 1] 186 if s[0] == 0o377: 187 n = -(256 ** (len(s) - 1) - n) 188 else: 189 try: 190 s = nts(s, "ascii", "strict") 191 n = int(s.strip() or "0", 8) 192 except ValueError: 193 raise InvalidHeaderError("invalid header") 194 return n 195 196def itn(n, digits=8, format=DEFAULT_FORMAT): 197 """Convert a python number to a number field. 198 """ 199 # POSIX 1003.1-1988 requires numbers to be encoded as a string of 200 # octal digits followed by a null-byte, this allows values up to 201 # (8**(digits-1))-1. GNU tar allows storing numbers greater than 202 # that if necessary. A leading 0o200 or 0o377 byte indicate this 203 # particular encoding, the following digits-1 bytes are a big-endian 204 # base-256 representation. This allows values up to (256**(digits-1))-1. 205 # A 0o200 byte indicates a positive number, a 0o377 byte a negative 206 # number. 207 original_n = n 208 n = int(n) 209 if 0 <= n < 8 ** (digits - 1): 210 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL 211 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1): 212 if n >= 0: 213 s = bytearray([0o200]) 214 else: 215 s = bytearray([0o377]) 216 n = 256 ** digits + n 217 218 for i in range(digits - 1): 219 s.insert(1, n & 0o377) 220 n >>= 8 221 else: 222 raise ValueError("overflow in number field") 223 224 return s 225 226def calc_chksums(buf): 227 """Calculate the checksum for a member's header by summing up all 228 characters except for the chksum field which is treated as if 229 it was filled with spaces. According to the GNU tar sources, 230 some tars (Sun and NeXT) calculate chksum with signed char, 231 which will be different if there are chars in the buffer with 232 the high bit set. So we calculate two checksums, unsigned and 233 signed. 234 """ 235 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf)) 236 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf)) 237 return unsigned_chksum, signed_chksum 238 239def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None): 240 """Copy length bytes from fileobj src to fileobj dst. 241 If length is None, copy the entire content. 242 """ 243 bufsize = bufsize or 16 * 1024 244 if length == 0: 245 return 246 if length is None: 247 shutil.copyfileobj(src, dst, bufsize) 248 return 249 250 blocks, remainder = divmod(length, bufsize) 251 for b in range(blocks): 252 buf = src.read(bufsize) 253 if len(buf) < bufsize: 254 raise exception("unexpected end of data") 255 dst.write(buf) 256 257 if remainder != 0: 258 buf = src.read(remainder) 259 if len(buf) < remainder: 260 raise exception("unexpected end of data") 261 dst.write(buf) 262 return 263 264def _safe_print(s): 265 encoding = getattr(sys.stdout, 'encoding', None) 266 if encoding is not None: 267 s = s.encode(encoding, 'backslashreplace').decode(encoding) 268 print(s, end=' ') 269 270 271class TarError(Exception): 272 """Base exception.""" 273 pass 274class ExtractError(TarError): 275 """General exception for extract errors.""" 276 pass 277class ReadError(TarError): 278 """Exception for unreadable tar archives.""" 279 pass 280class CompressionError(TarError): 281 """Exception for unavailable compression methods.""" 282 pass 283class StreamError(TarError): 284 """Exception for unsupported operations on stream-like TarFiles.""" 285 pass 286class HeaderError(TarError): 287 """Base exception for header errors.""" 288 pass 289class EmptyHeaderError(HeaderError): 290 """Exception for empty headers.""" 291 pass 292class TruncatedHeaderError(HeaderError): 293 """Exception for truncated headers.""" 294 pass 295class EOFHeaderError(HeaderError): 296 """Exception for end of file headers.""" 297 pass 298class InvalidHeaderError(HeaderError): 299 """Exception for invalid headers.""" 300 pass 301class SubsequentHeaderError(HeaderError): 302 """Exception for missing and invalid extended headers.""" 303 pass 304 305#--------------------------- 306# internal stream interface 307#--------------------------- 308class _LowLevelFile: 309 """Low-level file object. Supports reading and writing. 310 It is used instead of a regular file object for streaming 311 access. 312 """ 313 314 def __init__(self, name, mode): 315 mode = { 316 "r": os.O_RDONLY, 317 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 318 }[mode] 319 if hasattr(os, "O_BINARY"): 320 mode |= os.O_BINARY 321 self.fd = os.open(name, mode, 0o666) 322 323 def close(self): 324 os.close(self.fd) 325 326 def read(self, size): 327 return os.read(self.fd, size) 328 329 def write(self, s): 330 os.write(self.fd, s) 331 332class _Stream: 333 """Class that serves as an adapter between TarFile and 334 a stream-like object. The stream-like object only 335 needs to have a read() or write() method and is accessed 336 blockwise. Use of gzip or bzip2 compression is possible. 337 A stream-like object could be for example: sys.stdin, 338 sys.stdout, a socket, a tape device etc. 339 340 _Stream is intended to be used only internally. 341 """ 342 343 def __init__(self, name, mode, comptype, fileobj, bufsize): 344 """Construct a _Stream object. 345 """ 346 self._extfileobj = True 347 if fileobj is None: 348 fileobj = _LowLevelFile(name, mode) 349 self._extfileobj = False 350 351 if comptype == '*': 352 # Enable transparent compression detection for the 353 # stream interface 354 fileobj = _StreamProxy(fileobj) 355 comptype = fileobj.getcomptype() 356 357 self.name = name or "" 358 self.mode = mode 359 self.comptype = comptype 360 self.fileobj = fileobj 361 self.bufsize = bufsize 362 self.buf = b"" 363 self.pos = 0 364 self.closed = False 365 366 try: 367 if comptype == "gz": 368 try: 369 import zlib 370 except ImportError: 371 raise CompressionError("zlib module is not available") from None 372 self.zlib = zlib 373 self.crc = zlib.crc32(b"") 374 if mode == "r": 375 self._init_read_gz() 376 self.exception = zlib.error 377 else: 378 self._init_write_gz() 379 380 elif comptype == "bz2": 381 try: 382 import bz2 383 except ImportError: 384 raise CompressionError("bz2 module is not available") from None 385 if mode == "r": 386 self.dbuf = b"" 387 self.cmp = bz2.BZ2Decompressor() 388 self.exception = OSError 389 else: 390 self.cmp = bz2.BZ2Compressor() 391 392 elif comptype == "xz": 393 try: 394 import lzma 395 except ImportError: 396 raise CompressionError("lzma module is not available") from None 397 if mode == "r": 398 self.dbuf = b"" 399 self.cmp = lzma.LZMADecompressor() 400 self.exception = lzma.LZMAError 401 else: 402 self.cmp = lzma.LZMACompressor() 403 404 elif comptype != "tar": 405 raise CompressionError("unknown compression type %r" % comptype) 406 407 except: 408 if not self._extfileobj: 409 self.fileobj.close() 410 self.closed = True 411 raise 412 413 def __del__(self): 414 if hasattr(self, "closed") and not self.closed: 415 self.close() 416 417 def _init_write_gz(self): 418 """Initialize for writing with gzip compression. 419 """ 420 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, 421 -self.zlib.MAX_WBITS, 422 self.zlib.DEF_MEM_LEVEL, 423 0) 424 timestamp = struct.pack("<L", int(time.time())) 425 self.__write(b"\037\213\010\010" + timestamp + b"\002\377") 426 if self.name.endswith(".gz"): 427 self.name = self.name[:-3] 428 # Honor "directory components removed" from RFC1952 429 self.name = os.path.basename(self.name) 430 # RFC1952 says we must use ISO-8859-1 for the FNAME field. 431 self.__write(self.name.encode("iso-8859-1", "replace") + NUL) 432 433 def write(self, s): 434 """Write string s to the stream. 435 """ 436 if self.comptype == "gz": 437 self.crc = self.zlib.crc32(s, self.crc) 438 self.pos += len(s) 439 if self.comptype != "tar": 440 s = self.cmp.compress(s) 441 self.__write(s) 442 443 def __write(self, s): 444 """Write string s to the stream if a whole new block 445 is ready to be written. 446 """ 447 self.buf += s 448 while len(self.buf) > self.bufsize: 449 self.fileobj.write(self.buf[:self.bufsize]) 450 self.buf = self.buf[self.bufsize:] 451 452 def close(self): 453 """Close the _Stream object. No operation should be 454 done on it afterwards. 455 """ 456 if self.closed: 457 return 458 459 self.closed = True 460 try: 461 if self.mode == "w" and self.comptype != "tar": 462 self.buf += self.cmp.flush() 463 464 if self.mode == "w" and self.buf: 465 self.fileobj.write(self.buf) 466 self.buf = b"" 467 if self.comptype == "gz": 468 self.fileobj.write(struct.pack("<L", self.crc)) 469 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF)) 470 finally: 471 if not self._extfileobj: 472 self.fileobj.close() 473 474 def _init_read_gz(self): 475 """Initialize for reading a gzip compressed fileobj. 476 """ 477 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS) 478 self.dbuf = b"" 479 480 # taken from gzip.GzipFile with some alterations 481 if self.__read(2) != b"\037\213": 482 raise ReadError("not a gzip file") 483 if self.__read(1) != b"\010": 484 raise CompressionError("unsupported compression method") 485 486 flag = ord(self.__read(1)) 487 self.__read(6) 488 489 if flag & 4: 490 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1)) 491 self.read(xlen) 492 if flag & 8: 493 while True: 494 s = self.__read(1) 495 if not s or s == NUL: 496 break 497 if flag & 16: 498 while True: 499 s = self.__read(1) 500 if not s or s == NUL: 501 break 502 if flag & 2: 503 self.__read(2) 504 505 def tell(self): 506 """Return the stream's file pointer position. 507 """ 508 return self.pos 509 510 def seek(self, pos=0): 511 """Set the stream's file pointer to pos. Negative seeking 512 is forbidden. 513 """ 514 if pos - self.pos >= 0: 515 blocks, remainder = divmod(pos - self.pos, self.bufsize) 516 for i in range(blocks): 517 self.read(self.bufsize) 518 self.read(remainder) 519 else: 520 raise StreamError("seeking backwards is not allowed") 521 return self.pos 522 523 def read(self, size): 524 """Return the next size number of bytes from the stream.""" 525 assert size is not None 526 buf = self._read(size) 527 self.pos += len(buf) 528 return buf 529 530 def _read(self, size): 531 """Return size bytes from the stream. 532 """ 533 if self.comptype == "tar": 534 return self.__read(size) 535 536 c = len(self.dbuf) 537 t = [self.dbuf] 538 while c < size: 539 # Skip underlying buffer to avoid unaligned double buffering. 540 if self.buf: 541 buf = self.buf 542 self.buf = b"" 543 else: 544 buf = self.fileobj.read(self.bufsize) 545 if not buf: 546 break 547 try: 548 buf = self.cmp.decompress(buf) 549 except self.exception as e: 550 raise ReadError("invalid compressed data") from e 551 t.append(buf) 552 c += len(buf) 553 t = b"".join(t) 554 self.dbuf = t[size:] 555 return t[:size] 556 557 def __read(self, size): 558 """Return size bytes from stream. If internal buffer is empty, 559 read another block from the stream. 560 """ 561 c = len(self.buf) 562 t = [self.buf] 563 while c < size: 564 buf = self.fileobj.read(self.bufsize) 565 if not buf: 566 break 567 t.append(buf) 568 c += len(buf) 569 t = b"".join(t) 570 self.buf = t[size:] 571 return t[:size] 572# class _Stream 573 574class _StreamProxy(object): 575 """Small proxy class that enables transparent compression 576 detection for the Stream interface (mode 'r|*'). 577 """ 578 579 def __init__(self, fileobj): 580 self.fileobj = fileobj 581 self.buf = self.fileobj.read(BLOCKSIZE) 582 583 def read(self, size): 584 self.read = self.fileobj.read 585 return self.buf 586 587 def getcomptype(self): 588 if self.buf.startswith(b"\x1f\x8b\x08"): 589 return "gz" 590 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY": 591 return "bz2" 592 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")): 593 return "xz" 594 else: 595 return "tar" 596 597 def close(self): 598 self.fileobj.close() 599# class StreamProxy 600 601#------------------------ 602# Extraction file object 603#------------------------ 604class _FileInFile(object): 605 """A thin wrapper around an existing file object that 606 provides a part of its data as an individual file 607 object. 608 """ 609 610 def __init__(self, fileobj, offset, size, blockinfo=None): 611 self.fileobj = fileobj 612 self.offset = offset 613 self.size = size 614 self.position = 0 615 self.name = getattr(fileobj, "name", None) 616 self.closed = False 617 618 if blockinfo is None: 619 blockinfo = [(0, size)] 620 621 # Construct a map with data and zero blocks. 622 self.map_index = 0 623 self.map = [] 624 lastpos = 0 625 realpos = self.offset 626 for offset, size in blockinfo: 627 if offset > lastpos: 628 self.map.append((False, lastpos, offset, None)) 629 self.map.append((True, offset, offset + size, realpos)) 630 realpos += size 631 lastpos = offset + size 632 if lastpos < self.size: 633 self.map.append((False, lastpos, self.size, None)) 634 635 def flush(self): 636 pass 637 638 def readable(self): 639 return True 640 641 def writable(self): 642 return False 643 644 def seekable(self): 645 return self.fileobj.seekable() 646 647 def tell(self): 648 """Return the current file position. 649 """ 650 return self.position 651 652 def seek(self, position, whence=io.SEEK_SET): 653 """Seek to a position in the file. 654 """ 655 if whence == io.SEEK_SET: 656 self.position = min(max(position, 0), self.size) 657 elif whence == io.SEEK_CUR: 658 if position < 0: 659 self.position = max(self.position + position, 0) 660 else: 661 self.position = min(self.position + position, self.size) 662 elif whence == io.SEEK_END: 663 self.position = max(min(self.size + position, self.size), 0) 664 else: 665 raise ValueError("Invalid argument") 666 return self.position 667 668 def read(self, size=None): 669 """Read data from the file. 670 """ 671 if size is None: 672 size = self.size - self.position 673 else: 674 size = min(size, self.size - self.position) 675 676 buf = b"" 677 while size > 0: 678 while True: 679 data, start, stop, offset = self.map[self.map_index] 680 if start <= self.position < stop: 681 break 682 else: 683 self.map_index += 1 684 if self.map_index == len(self.map): 685 self.map_index = 0 686 length = min(size, stop - self.position) 687 if data: 688 self.fileobj.seek(offset + (self.position - start)) 689 b = self.fileobj.read(length) 690 if len(b) != length: 691 raise ReadError("unexpected end of data") 692 buf += b 693 else: 694 buf += NUL * length 695 size -= length 696 self.position += length 697 return buf 698 699 def readinto(self, b): 700 buf = self.read(len(b)) 701 b[:len(buf)] = buf 702 return len(buf) 703 704 def close(self): 705 self.closed = True 706#class _FileInFile 707 708class ExFileObject(io.BufferedReader): 709 710 def __init__(self, tarfile, tarinfo): 711 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data, 712 tarinfo.size, tarinfo.sparse) 713 super().__init__(fileobj) 714#class ExFileObject 715 716 717#----------------------------- 718# extraction filters (PEP 706) 719#----------------------------- 720 721class FilterError(TarError): 722 pass 723 724class AbsolutePathError(FilterError): 725 def __init__(self, tarinfo): 726 self.tarinfo = tarinfo 727 super().__init__(f'member {tarinfo.name!r} has an absolute path') 728 729class OutsideDestinationError(FilterError): 730 def __init__(self, tarinfo, path): 731 self.tarinfo = tarinfo 732 self._path = path 733 super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, ' 734 + 'which is outside the destination') 735 736class SpecialFileError(FilterError): 737 def __init__(self, tarinfo): 738 self.tarinfo = tarinfo 739 super().__init__(f'{tarinfo.name!r} is a special file') 740 741class AbsoluteLinkError(FilterError): 742 def __init__(self, tarinfo): 743 self.tarinfo = tarinfo 744 super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path') 745 746class LinkOutsideDestinationError(FilterError): 747 def __init__(self, tarinfo, path): 748 self.tarinfo = tarinfo 749 self._path = path 750 super().__init__(f'{tarinfo.name!r} would link to {path!r}, ' 751 + 'which is outside the destination') 752 753def _get_filtered_attrs(member, dest_path, for_data=True): 754 new_attrs = {} 755 name = member.name 756 dest_path = os.path.realpath(dest_path) 757 # Strip leading / (tar's directory separator) from filenames. 758 # Include os.sep (target OS directory separator) as well. 759 if name.startswith(('/', os.sep)): 760 name = new_attrs['name'] = member.path.lstrip('/' + os.sep) 761 if os.path.isabs(name): 762 # Path is absolute even after stripping. 763 # For example, 'C:/foo' on Windows. 764 raise AbsolutePathError(member) 765 # Ensure we stay in the destination 766 target_path = os.path.realpath(os.path.join(dest_path, name)) 767 if os.path.commonpath([target_path, dest_path]) != dest_path: 768 raise OutsideDestinationError(member, target_path) 769 # Limit permissions (no high bits, and go-w) 770 mode = member.mode 771 if mode is not None: 772 # Strip high bits & group/other write bits 773 mode = mode & 0o755 774 if for_data: 775 # For data, handle permissions & file types 776 if member.isreg() or member.islnk(): 777 if not mode & 0o100: 778 # Clear executable bits if not executable by user 779 mode &= ~0o111 780 # Ensure owner can read & write 781 mode |= 0o600 782 elif member.isdir() or member.issym(): 783 # Ignore mode for directories & symlinks 784 mode = None 785 else: 786 # Reject special files 787 raise SpecialFileError(member) 788 if mode != member.mode: 789 new_attrs['mode'] = mode 790 if for_data: 791 # Ignore ownership for 'data' 792 if member.uid is not None: 793 new_attrs['uid'] = None 794 if member.gid is not None: 795 new_attrs['gid'] = None 796 if member.uname is not None: 797 new_attrs['uname'] = None 798 if member.gname is not None: 799 new_attrs['gname'] = None 800 # Check link destination for 'data' 801 if member.islnk() or member.issym(): 802 if os.path.isabs(member.linkname): 803 raise AbsoluteLinkError(member) 804 target_path = os.path.realpath(os.path.join(dest_path, member.linkname)) 805 if os.path.commonpath([target_path, dest_path]) != dest_path: 806 raise LinkOutsideDestinationError(member, target_path) 807 return new_attrs 808 809def fully_trusted_filter(member, dest_path): 810 return member 811 812def tar_filter(member, dest_path): 813 new_attrs = _get_filtered_attrs(member, dest_path, False) 814 if new_attrs: 815 return member.replace(**new_attrs, deep=False) 816 return member 817 818def data_filter(member, dest_path): 819 new_attrs = _get_filtered_attrs(member, dest_path, True) 820 if new_attrs: 821 return member.replace(**new_attrs, deep=False) 822 return member 823 824_NAMED_FILTERS = { 825 "fully_trusted": fully_trusted_filter, 826 "tar": tar_filter, 827 "data": data_filter, 828} 829 830#------------------ 831# Exported Classes 832#------------------ 833 834# Sentinel for replace() defaults, meaning "don't change the attribute" 835_KEEP = object() 836 837# Header length is digits followed by a space. 838_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ") 839 840class TarInfo(object): 841 """Informational class which holds the details about an 842 archive member given by a tar header block. 843 TarInfo objects are returned by TarFile.getmember(), 844 TarFile.getmembers() and TarFile.gettarinfo() and are 845 usually created internally. 846 """ 847 848 __slots__ = dict( 849 name = 'Name of the archive member.', 850 mode = 'Permission bits.', 851 uid = 'User ID of the user who originally stored this member.', 852 gid = 'Group ID of the user who originally stored this member.', 853 size = 'Size in bytes.', 854 mtime = 'Time of last modification.', 855 chksum = 'Header checksum.', 856 type = ('File type. type is usually one of these constants: ' 857 'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, ' 858 'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'), 859 linkname = ('Name of the target file name, which is only present ' 860 'in TarInfo objects of type LNKTYPE and SYMTYPE.'), 861 uname = 'User name.', 862 gname = 'Group name.', 863 devmajor = 'Device major number.', 864 devminor = 'Device minor number.', 865 offset = 'The tar header starts here.', 866 offset_data = "The file's data starts here.", 867 pax_headers = ('A dictionary containing key-value pairs of an ' 868 'associated pax extended header.'), 869 sparse = 'Sparse member information.', 870 tarfile = None, 871 _sparse_structs = None, 872 _link_target = None, 873 ) 874 875 def __init__(self, name=""): 876 """Construct a TarInfo object. name is the optional name 877 of the member. 878 """ 879 self.name = name # member name 880 self.mode = 0o644 # file permissions 881 self.uid = 0 # user id 882 self.gid = 0 # group id 883 self.size = 0 # file size 884 self.mtime = 0 # modification time 885 self.chksum = 0 # header checksum 886 self.type = REGTYPE # member type 887 self.linkname = "" # link name 888 self.uname = "" # user name 889 self.gname = "" # group name 890 self.devmajor = 0 # device major number 891 self.devminor = 0 # device minor number 892 893 self.offset = 0 # the tar header starts here 894 self.offset_data = 0 # the file's data starts here 895 896 self.sparse = None # sparse member information 897 self.pax_headers = {} # pax header information 898 899 @property 900 def path(self): 901 'In pax headers, "name" is called "path".' 902 return self.name 903 904 @path.setter 905 def path(self, name): 906 self.name = name 907 908 @property 909 def linkpath(self): 910 'In pax headers, "linkname" is called "linkpath".' 911 return self.linkname 912 913 @linkpath.setter 914 def linkpath(self, linkname): 915 self.linkname = linkname 916 917 def __repr__(self): 918 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) 919 920 def replace(self, *, 921 name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP, 922 uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP, 923 deep=True, _KEEP=_KEEP): 924 """Return a deep copy of self with the given attributes replaced. 925 """ 926 if deep: 927 result = copy.deepcopy(self) 928 else: 929 result = copy.copy(self) 930 if name is not _KEEP: 931 result.name = name 932 if mtime is not _KEEP: 933 result.mtime = mtime 934 if mode is not _KEEP: 935 result.mode = mode 936 if linkname is not _KEEP: 937 result.linkname = linkname 938 if uid is not _KEEP: 939 result.uid = uid 940 if gid is not _KEEP: 941 result.gid = gid 942 if uname is not _KEEP: 943 result.uname = uname 944 if gname is not _KEEP: 945 result.gname = gname 946 return result 947 948 def get_info(self): 949 """Return the TarInfo's attributes as a dictionary. 950 """ 951 if self.mode is None: 952 mode = None 953 else: 954 mode = self.mode & 0o7777 955 info = { 956 "name": self.name, 957 "mode": mode, 958 "uid": self.uid, 959 "gid": self.gid, 960 "size": self.size, 961 "mtime": self.mtime, 962 "chksum": self.chksum, 963 "type": self.type, 964 "linkname": self.linkname, 965 "uname": self.uname, 966 "gname": self.gname, 967 "devmajor": self.devmajor, 968 "devminor": self.devminor 969 } 970 971 if info["type"] == DIRTYPE and not info["name"].endswith("/"): 972 info["name"] += "/" 973 974 return info 975 976 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"): 977 """Return a tar header as a string of 512 byte blocks. 978 """ 979 info = self.get_info() 980 for name, value in info.items(): 981 if value is None: 982 raise ValueError("%s may not be None" % name) 983 984 if format == USTAR_FORMAT: 985 return self.create_ustar_header(info, encoding, errors) 986 elif format == GNU_FORMAT: 987 return self.create_gnu_header(info, encoding, errors) 988 elif format == PAX_FORMAT: 989 return self.create_pax_header(info, encoding) 990 else: 991 raise ValueError("invalid format") 992 993 def create_ustar_header(self, info, encoding, errors): 994 """Return the object as a ustar header block. 995 """ 996 info["magic"] = POSIX_MAGIC 997 998 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: 999 raise ValueError("linkname is too long") 1000 1001 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: 1002 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors) 1003 1004 return self._create_header(info, USTAR_FORMAT, encoding, errors) 1005 1006 def create_gnu_header(self, info, encoding, errors): 1007 """Return the object as a GNU header block sequence. 1008 """ 1009 info["magic"] = GNU_MAGIC 1010 1011 buf = b"" 1012 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: 1013 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) 1014 1015 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: 1016 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) 1017 1018 return buf + self._create_header(info, GNU_FORMAT, encoding, errors) 1019 1020 def create_pax_header(self, info, encoding): 1021 """Return the object as a ustar header block. If it cannot be 1022 represented this way, prepend a pax extended header sequence 1023 with supplement information. 1024 """ 1025 info["magic"] = POSIX_MAGIC 1026 pax_headers = self.pax_headers.copy() 1027 1028 # Test string fields for values that exceed the field length or cannot 1029 # be represented in ASCII encoding. 1030 for name, hname, length in ( 1031 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), 1032 ("uname", "uname", 32), ("gname", "gname", 32)): 1033 1034 if hname in pax_headers: 1035 # The pax header has priority. 1036 continue 1037 1038 # Try to encode the string as ASCII. 1039 try: 1040 info[name].encode("ascii", "strict") 1041 except UnicodeEncodeError: 1042 pax_headers[hname] = info[name] 1043 continue 1044 1045 if len(info[name]) > length: 1046 pax_headers[hname] = info[name] 1047 1048 # Test number fields for values that exceed the field limit or values 1049 # that like to be stored as float. 1050 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): 1051 needs_pax = False 1052 1053 val = info[name] 1054 val_is_float = isinstance(val, float) 1055 val_int = round(val) if val_is_float else val 1056 if not 0 <= val_int < 8 ** (digits - 1): 1057 # Avoid overflow. 1058 info[name] = 0 1059 needs_pax = True 1060 elif val_is_float: 1061 # Put rounded value in ustar header, and full 1062 # precision value in pax header. 1063 info[name] = val_int 1064 needs_pax = True 1065 1066 # The existing pax header has priority. 1067 if needs_pax and name not in pax_headers: 1068 pax_headers[name] = str(val) 1069 1070 # Create a pax extended header if necessary. 1071 if pax_headers: 1072 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding) 1073 else: 1074 buf = b"" 1075 1076 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace") 1077 1078 @classmethod 1079 def create_pax_global_header(cls, pax_headers): 1080 """Return the object as a pax global header block sequence. 1081 """ 1082 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8") 1083 1084 def _posix_split_name(self, name, encoding, errors): 1085 """Split a name longer than 100 chars into a prefix 1086 and a name part. 1087 """ 1088 components = name.split("/") 1089 for i in range(1, len(components)): 1090 prefix = "/".join(components[:i]) 1091 name = "/".join(components[i:]) 1092 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \ 1093 len(name.encode(encoding, errors)) <= LENGTH_NAME: 1094 break 1095 else: 1096 raise ValueError("name is too long") 1097 1098 return prefix, name 1099 1100 @staticmethod 1101 def _create_header(info, format, encoding, errors): 1102 """Return a header block. info is a dictionary with file 1103 information, format must be one of the *_FORMAT constants. 1104 """ 1105 has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE) 1106 if has_device_fields: 1107 devmajor = itn(info.get("devmajor", 0), 8, format) 1108 devminor = itn(info.get("devminor", 0), 8, format) 1109 else: 1110 devmajor = stn("", 8, encoding, errors) 1111 devminor = stn("", 8, encoding, errors) 1112 1113 # None values in metadata should cause ValueError. 1114 # itn()/stn() do this for all fields except type. 1115 filetype = info.get("type", REGTYPE) 1116 if filetype is None: 1117 raise ValueError("TarInfo.type must not be None") 1118 1119 parts = [ 1120 stn(info.get("name", ""), 100, encoding, errors), 1121 itn(info.get("mode", 0) & 0o7777, 8, format), 1122 itn(info.get("uid", 0), 8, format), 1123 itn(info.get("gid", 0), 8, format), 1124 itn(info.get("size", 0), 12, format), 1125 itn(info.get("mtime", 0), 12, format), 1126 b" ", # checksum field 1127 filetype, 1128 stn(info.get("linkname", ""), 100, encoding, errors), 1129 info.get("magic", POSIX_MAGIC), 1130 stn(info.get("uname", ""), 32, encoding, errors), 1131 stn(info.get("gname", ""), 32, encoding, errors), 1132 devmajor, 1133 devminor, 1134 stn(info.get("prefix", ""), 155, encoding, errors) 1135 ] 1136 1137 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts)) 1138 chksum = calc_chksums(buf[-BLOCKSIZE:])[0] 1139 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:] 1140 return buf 1141 1142 @staticmethod 1143 def _create_payload(payload): 1144 """Return the string payload filled with zero bytes 1145 up to the next 512 byte border. 1146 """ 1147 blocks, remainder = divmod(len(payload), BLOCKSIZE) 1148 if remainder > 0: 1149 payload += (BLOCKSIZE - remainder) * NUL 1150 return payload 1151 1152 @classmethod 1153 def _create_gnu_long_header(cls, name, type, encoding, errors): 1154 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence 1155 for name. 1156 """ 1157 name = name.encode(encoding, errors) + NUL 1158 1159 info = {} 1160 info["name"] = "././@LongLink" 1161 info["type"] = type 1162 info["size"] = len(name) 1163 info["magic"] = GNU_MAGIC 1164 1165 # create extended header + name blocks. 1166 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \ 1167 cls._create_payload(name) 1168 1169 @classmethod 1170 def _create_pax_generic_header(cls, pax_headers, type, encoding): 1171 """Return a POSIX.1-2008 extended or global header sequence 1172 that contains a list of keyword, value pairs. The values 1173 must be strings. 1174 """ 1175 # Check if one of the fields contains surrogate characters and thereby 1176 # forces hdrcharset=BINARY, see _proc_pax() for more information. 1177 binary = False 1178 for keyword, value in pax_headers.items(): 1179 try: 1180 value.encode("utf-8", "strict") 1181 except UnicodeEncodeError: 1182 binary = True 1183 break 1184 1185 records = b"" 1186 if binary: 1187 # Put the hdrcharset field at the beginning of the header. 1188 records += b"21 hdrcharset=BINARY\n" 1189 1190 for keyword, value in pax_headers.items(): 1191 keyword = keyword.encode("utf-8") 1192 if binary: 1193 # Try to restore the original byte representation of `value'. 1194 # Needless to say, that the encoding must match the string. 1195 value = value.encode(encoding, "surrogateescape") 1196 else: 1197 value = value.encode("utf-8") 1198 1199 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' 1200 n = p = 0 1201 while True: 1202 n = l + len(str(p)) 1203 if n == p: 1204 break 1205 p = n 1206 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n" 1207 1208 # We use a hardcoded "././@PaxHeader" name like star does 1209 # instead of the one that POSIX recommends. 1210 info = {} 1211 info["name"] = "././@PaxHeader" 1212 info["type"] = type 1213 info["size"] = len(records) 1214 info["magic"] = POSIX_MAGIC 1215 1216 # Create pax header + record blocks. 1217 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \ 1218 cls._create_payload(records) 1219 1220 @classmethod 1221 def frombuf(cls, buf, encoding, errors): 1222 """Construct a TarInfo object from a 512 byte bytes object. 1223 """ 1224 if len(buf) == 0: 1225 raise EmptyHeaderError("empty header") 1226 if len(buf) != BLOCKSIZE: 1227 raise TruncatedHeaderError("truncated header") 1228 if buf.count(NUL) == BLOCKSIZE: 1229 raise EOFHeaderError("end of file header") 1230 1231 chksum = nti(buf[148:156]) 1232 if chksum not in calc_chksums(buf): 1233 raise InvalidHeaderError("bad checksum") 1234 1235 obj = cls() 1236 obj.name = nts(buf[0:100], encoding, errors) 1237 obj.mode = nti(buf[100:108]) 1238 obj.uid = nti(buf[108:116]) 1239 obj.gid = nti(buf[116:124]) 1240 obj.size = nti(buf[124:136]) 1241 obj.mtime = nti(buf[136:148]) 1242 obj.chksum = chksum 1243 obj.type = buf[156:157] 1244 obj.linkname = nts(buf[157:257], encoding, errors) 1245 obj.uname = nts(buf[265:297], encoding, errors) 1246 obj.gname = nts(buf[297:329], encoding, errors) 1247 obj.devmajor = nti(buf[329:337]) 1248 obj.devminor = nti(buf[337:345]) 1249 prefix = nts(buf[345:500], encoding, errors) 1250 1251 # Old V7 tar format represents a directory as a regular 1252 # file with a trailing slash. 1253 if obj.type == AREGTYPE and obj.name.endswith("/"): 1254 obj.type = DIRTYPE 1255 1256 # The old GNU sparse format occupies some of the unused 1257 # space in the buffer for up to 4 sparse structures. 1258 # Save them for later processing in _proc_sparse(). 1259 if obj.type == GNUTYPE_SPARSE: 1260 pos = 386 1261 structs = [] 1262 for i in range(4): 1263 try: 1264 offset = nti(buf[pos:pos + 12]) 1265 numbytes = nti(buf[pos + 12:pos + 24]) 1266 except ValueError: 1267 break 1268 structs.append((offset, numbytes)) 1269 pos += 24 1270 isextended = bool(buf[482]) 1271 origsize = nti(buf[483:495]) 1272 obj._sparse_structs = (structs, isextended, origsize) 1273 1274 # Remove redundant slashes from directories. 1275 if obj.isdir(): 1276 obj.name = obj.name.rstrip("/") 1277 1278 # Reconstruct a ustar longname. 1279 if prefix and obj.type not in GNU_TYPES: 1280 obj.name = prefix + "/" + obj.name 1281 return obj 1282 1283 @classmethod 1284 def fromtarfile(cls, tarfile): 1285 """Return the next TarInfo object from TarFile object 1286 tarfile. 1287 """ 1288 buf = tarfile.fileobj.read(BLOCKSIZE) 1289 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors) 1290 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE 1291 return obj._proc_member(tarfile) 1292 1293 #-------------------------------------------------------------------------- 1294 # The following are methods that are called depending on the type of a 1295 # member. The entry point is _proc_member() which can be overridden in a 1296 # subclass to add custom _proc_*() methods. A _proc_*() method MUST 1297 # implement the following 1298 # operations: 1299 # 1. Set self.offset_data to the position where the data blocks begin, 1300 # if there is data that follows. 1301 # 2. Set tarfile.offset to the position where the next member's header will 1302 # begin. 1303 # 3. Return self or another valid TarInfo object. 1304 def _proc_member(self, tarfile): 1305 """Choose the right processing method depending on 1306 the type and call it. 1307 """ 1308 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): 1309 return self._proc_gnulong(tarfile) 1310 elif self.type == GNUTYPE_SPARSE: 1311 return self._proc_sparse(tarfile) 1312 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): 1313 return self._proc_pax(tarfile) 1314 else: 1315 return self._proc_builtin(tarfile) 1316 1317 def _proc_builtin(self, tarfile): 1318 """Process a builtin type or an unknown type which 1319 will be treated as a regular file. 1320 """ 1321 self.offset_data = tarfile.fileobj.tell() 1322 offset = self.offset_data 1323 if self.isreg() or self.type not in SUPPORTED_TYPES: 1324 # Skip the following data blocks. 1325 offset += self._block(self.size) 1326 tarfile.offset = offset 1327 1328 # Patch the TarInfo object with saved global 1329 # header information. 1330 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) 1331 1332 # Remove redundant slashes from directories. This is to be consistent 1333 # with frombuf(). 1334 if self.isdir(): 1335 self.name = self.name.rstrip("/") 1336 1337 return self 1338 1339 def _proc_gnulong(self, tarfile): 1340 """Process the blocks that hold a GNU longname 1341 or longlink member. 1342 """ 1343 buf = tarfile.fileobj.read(self._block(self.size)) 1344 1345 # Fetch the next header and process it. 1346 try: 1347 next = self.fromtarfile(tarfile) 1348 except HeaderError as e: 1349 raise SubsequentHeaderError(str(e)) from None 1350 1351 # Patch the TarInfo object from the next header with 1352 # the longname information. 1353 next.offset = self.offset 1354 if self.type == GNUTYPE_LONGNAME: 1355 next.name = nts(buf, tarfile.encoding, tarfile.errors) 1356 elif self.type == GNUTYPE_LONGLINK: 1357 next.linkname = nts(buf, tarfile.encoding, tarfile.errors) 1358 1359 # Remove redundant slashes from directories. This is to be consistent 1360 # with frombuf(). 1361 if next.isdir(): 1362 next.name = next.name.removesuffix("/") 1363 1364 return next 1365 1366 def _proc_sparse(self, tarfile): 1367 """Process a GNU sparse header plus extra headers. 1368 """ 1369 # We already collected some sparse structures in frombuf(). 1370 structs, isextended, origsize = self._sparse_structs 1371 del self._sparse_structs 1372 1373 # Collect sparse structures from extended header blocks. 1374 while isextended: 1375 buf = tarfile.fileobj.read(BLOCKSIZE) 1376 pos = 0 1377 for i in range(21): 1378 try: 1379 offset = nti(buf[pos:pos + 12]) 1380 numbytes = nti(buf[pos + 12:pos + 24]) 1381 except ValueError: 1382 break 1383 if offset and numbytes: 1384 structs.append((offset, numbytes)) 1385 pos += 24 1386 isextended = bool(buf[504]) 1387 self.sparse = structs 1388 1389 self.offset_data = tarfile.fileobj.tell() 1390 tarfile.offset = self.offset_data + self._block(self.size) 1391 self.size = origsize 1392 return self 1393 1394 def _proc_pax(self, tarfile): 1395 """Process an extended or global header as described in 1396 POSIX.1-2008. 1397 """ 1398 # Read the header information. 1399 buf = tarfile.fileobj.read(self._block(self.size)) 1400 1401 # A pax header stores supplemental information for either 1402 # the following file (extended) or all following files 1403 # (global). 1404 if self.type == XGLTYPE: 1405 pax_headers = tarfile.pax_headers 1406 else: 1407 pax_headers = tarfile.pax_headers.copy() 1408 1409 # Parse pax header information. A record looks like that: 1410 # "%d %s=%s\n" % (length, keyword, value). length is the size 1411 # of the complete record including the length field itself and 1412 # the newline. 1413 pos = 0 1414 encoding = None 1415 raw_headers = [] 1416 while len(buf) > pos and buf[pos] != 0x00: 1417 if not (match := _header_length_prefix_re.match(buf, pos)): 1418 raise InvalidHeaderError("invalid header") 1419 try: 1420 length = int(match.group(1)) 1421 except ValueError: 1422 raise InvalidHeaderError("invalid header") 1423 # Headers must be at least 5 bytes, shortest being '5 x=\n'. 1424 # Value is allowed to be empty. 1425 if length < 5: 1426 raise InvalidHeaderError("invalid header") 1427 if pos + length > len(buf): 1428 raise InvalidHeaderError("invalid header") 1429 1430 header_value_end_offset = match.start(1) + length - 1 # Last byte of the header 1431 keyword_and_value = buf[match.end(1) + 1:header_value_end_offset] 1432 raw_keyword, equals, raw_value = keyword_and_value.partition(b"=") 1433 1434 # Check the framing of the header. The last character must be '\n' (0x0A) 1435 if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A: 1436 raise InvalidHeaderError("invalid header") 1437 raw_headers.append((length, raw_keyword, raw_value)) 1438 1439 # Check if the pax header contains a hdrcharset field. This tells us 1440 # the encoding of the path, linkpath, uname and gname fields. Normally, 1441 # these fields are UTF-8 encoded but since POSIX.1-2008 tar 1442 # implementations are allowed to store them as raw binary strings if 1443 # the translation to UTF-8 fails. For the time being, we don't care about 1444 # anything other than "BINARY". The only other value that is currently 1445 # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8. 1446 # Note that we only follow the initial 'hdrcharset' setting to preserve 1447 # the initial behavior of the 'tarfile' module. 1448 if raw_keyword == b"hdrcharset" and encoding is None: 1449 if raw_value == b"BINARY": 1450 encoding = tarfile.encoding 1451 else: # This branch ensures only the first 'hdrcharset' header is used. 1452 encoding = "utf-8" 1453 1454 pos += length 1455 1456 # If no explicit hdrcharset is set, we use UTF-8 as a default. 1457 if encoding is None: 1458 encoding = "utf-8" 1459 1460 # After parsing the raw headers we can decode them to text. 1461 for length, raw_keyword, raw_value in raw_headers: 1462 # Normally, we could just use "utf-8" as the encoding and "strict" 1463 # as the error handler, but we better not take the risk. For 1464 # example, GNU tar <= 1.23 is known to store filenames it cannot 1465 # translate to UTF-8 as raw strings (unfortunately without a 1466 # hdrcharset=BINARY header). 1467 # We first try the strict standard encoding, and if that fails we 1468 # fall back on the user's encoding and error handler. 1469 keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8", 1470 tarfile.errors) 1471 if keyword in PAX_NAME_FIELDS: 1472 value = self._decode_pax_field(raw_value, encoding, tarfile.encoding, 1473 tarfile.errors) 1474 else: 1475 value = self._decode_pax_field(raw_value, "utf-8", "utf-8", 1476 tarfile.errors) 1477 1478 pax_headers[keyword] = value 1479 1480 # Fetch the next header. 1481 try: 1482 next = self.fromtarfile(tarfile) 1483 except HeaderError as e: 1484 raise SubsequentHeaderError(str(e)) from None 1485 1486 # Process GNU sparse information. 1487 if "GNU.sparse.map" in pax_headers: 1488 # GNU extended sparse format version 0.1. 1489 self._proc_gnusparse_01(next, pax_headers) 1490 1491 elif "GNU.sparse.size" in pax_headers: 1492 # GNU extended sparse format version 0.0. 1493 self._proc_gnusparse_00(next, raw_headers) 1494 1495 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": 1496 # GNU extended sparse format version 1.0. 1497 self._proc_gnusparse_10(next, pax_headers, tarfile) 1498 1499 if self.type in (XHDTYPE, SOLARIS_XHDTYPE): 1500 # Patch the TarInfo object with the extended header info. 1501 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) 1502 next.offset = self.offset 1503 1504 if "size" in pax_headers: 1505 # If the extended header replaces the size field, 1506 # we need to recalculate the offset where the next 1507 # header starts. 1508 offset = next.offset_data 1509 if next.isreg() or next.type not in SUPPORTED_TYPES: 1510 offset += next._block(next.size) 1511 tarfile.offset = offset 1512 1513 return next 1514 1515 def _proc_gnusparse_00(self, next, raw_headers): 1516 """Process a GNU tar extended sparse header, version 0.0. 1517 """ 1518 offsets = [] 1519 numbytes = [] 1520 for _, keyword, value in raw_headers: 1521 if keyword == b"GNU.sparse.offset": 1522 try: 1523 offsets.append(int(value.decode())) 1524 except ValueError: 1525 raise InvalidHeaderError("invalid header") 1526 1527 elif keyword == b"GNU.sparse.numbytes": 1528 try: 1529 numbytes.append(int(value.decode())) 1530 except ValueError: 1531 raise InvalidHeaderError("invalid header") 1532 1533 next.sparse = list(zip(offsets, numbytes)) 1534 1535 def _proc_gnusparse_01(self, next, pax_headers): 1536 """Process a GNU tar extended sparse header, version 0.1. 1537 """ 1538 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")] 1539 next.sparse = list(zip(sparse[::2], sparse[1::2])) 1540 1541 def _proc_gnusparse_10(self, next, pax_headers, tarfile): 1542 """Process a GNU tar extended sparse header, version 1.0. 1543 """ 1544 fields = None 1545 sparse = [] 1546 buf = tarfile.fileobj.read(BLOCKSIZE) 1547 fields, buf = buf.split(b"\n", 1) 1548 fields = int(fields) 1549 while len(sparse) < fields * 2: 1550 if b"\n" not in buf: 1551 buf += tarfile.fileobj.read(BLOCKSIZE) 1552 number, buf = buf.split(b"\n", 1) 1553 sparse.append(int(number)) 1554 next.offset_data = tarfile.fileobj.tell() 1555 next.sparse = list(zip(sparse[::2], sparse[1::2])) 1556 1557 def _apply_pax_info(self, pax_headers, encoding, errors): 1558 """Replace fields with supplemental information from a previous 1559 pax extended or global header. 1560 """ 1561 for keyword, value in pax_headers.items(): 1562 if keyword == "GNU.sparse.name": 1563 setattr(self, "path", value) 1564 elif keyword == "GNU.sparse.size": 1565 setattr(self, "size", int(value)) 1566 elif keyword == "GNU.sparse.realsize": 1567 setattr(self, "size", int(value)) 1568 elif keyword in PAX_FIELDS: 1569 if keyword in PAX_NUMBER_FIELDS: 1570 try: 1571 value = PAX_NUMBER_FIELDS[keyword](value) 1572 except ValueError: 1573 value = 0 1574 if keyword == "path": 1575 value = value.rstrip("/") 1576 setattr(self, keyword, value) 1577 1578 self.pax_headers = pax_headers.copy() 1579 1580 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors): 1581 """Decode a single field from a pax record. 1582 """ 1583 try: 1584 return value.decode(encoding, "strict") 1585 except UnicodeDecodeError: 1586 return value.decode(fallback_encoding, fallback_errors) 1587 1588 def _block(self, count): 1589 """Round up a byte count by BLOCKSIZE and return it, 1590 e.g. _block(834) => 1024. 1591 """ 1592 blocks, remainder = divmod(count, BLOCKSIZE) 1593 if remainder: 1594 blocks += 1 1595 return blocks * BLOCKSIZE 1596 1597 def isreg(self): 1598 'Return True if the Tarinfo object is a regular file.' 1599 return self.type in REGULAR_TYPES 1600 1601 def isfile(self): 1602 'Return True if the Tarinfo object is a regular file.' 1603 return self.isreg() 1604 1605 def isdir(self): 1606 'Return True if it is a directory.' 1607 return self.type == DIRTYPE 1608 1609 def issym(self): 1610 'Return True if it is a symbolic link.' 1611 return self.type == SYMTYPE 1612 1613 def islnk(self): 1614 'Return True if it is a hard link.' 1615 return self.type == LNKTYPE 1616 1617 def ischr(self): 1618 'Return True if it is a character device.' 1619 return self.type == CHRTYPE 1620 1621 def isblk(self): 1622 'Return True if it is a block device.' 1623 return self.type == BLKTYPE 1624 1625 def isfifo(self): 1626 'Return True if it is a FIFO.' 1627 return self.type == FIFOTYPE 1628 1629 def issparse(self): 1630 return self.sparse is not None 1631 1632 def isdev(self): 1633 'Return True if it is one of character device, block device or FIFO.' 1634 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) 1635# class TarInfo 1636 1637class TarFile(object): 1638 """The TarFile Class provides an interface to tar archives. 1639 """ 1640 1641 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) 1642 1643 dereference = False # If true, add content of linked file to the 1644 # tar file, else the link. 1645 1646 ignore_zeros = False # If true, skips empty or invalid blocks and 1647 # continues processing. 1648 1649 errorlevel = 1 # If 0, fatal errors only appear in debug 1650 # messages (if debug >= 0). If > 0, errors 1651 # are passed to the caller as exceptions. 1652 1653 format = DEFAULT_FORMAT # The format to use when creating an archive. 1654 1655 encoding = ENCODING # Encoding for 8-bit character strings. 1656 1657 errors = None # Error handler for unicode conversion. 1658 1659 tarinfo = TarInfo # The default TarInfo class to use. 1660 1661 fileobject = ExFileObject # The file-object for extractfile(). 1662 1663 extraction_filter = None # The default filter for extraction. 1664 1665 def __init__(self, name=None, mode="r", fileobj=None, format=None, 1666 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, 1667 errors="surrogateescape", pax_headers=None, debug=None, 1668 errorlevel=None, copybufsize=None): 1669 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to 1670 read from an existing archive, 'a' to append data to an existing 1671 file or 'w' to create a new file overwriting an existing one. `mode' 1672 defaults to 'r'. 1673 If `fileobj' is given, it is used for reading or writing data. If it 1674 can be determined, `mode' is overridden by `fileobj's mode. 1675 `fileobj' is not closed, when TarFile is closed. 1676 """ 1677 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"} 1678 if mode not in modes: 1679 raise ValueError("mode must be 'r', 'a', 'w' or 'x'") 1680 self.mode = mode 1681 self._mode = modes[mode] 1682 1683 if not fileobj: 1684 if self.mode == "a" and not os.path.exists(name): 1685 # Create nonexistent files in append mode. 1686 self.mode = "w" 1687 self._mode = "wb" 1688 fileobj = bltn_open(name, self._mode) 1689 self._extfileobj = False 1690 else: 1691 if (name is None and hasattr(fileobj, "name") and 1692 isinstance(fileobj.name, (str, bytes))): 1693 name = fileobj.name 1694 if hasattr(fileobj, "mode"): 1695 self._mode = fileobj.mode 1696 self._extfileobj = True 1697 self.name = os.path.abspath(name) if name else None 1698 self.fileobj = fileobj 1699 1700 # Init attributes. 1701 if format is not None: 1702 self.format = format 1703 if tarinfo is not None: 1704 self.tarinfo = tarinfo 1705 if dereference is not None: 1706 self.dereference = dereference 1707 if ignore_zeros is not None: 1708 self.ignore_zeros = ignore_zeros 1709 if encoding is not None: 1710 self.encoding = encoding 1711 self.errors = errors 1712 1713 if pax_headers is not None and self.format == PAX_FORMAT: 1714 self.pax_headers = pax_headers 1715 else: 1716 self.pax_headers = {} 1717 1718 if debug is not None: 1719 self.debug = debug 1720 if errorlevel is not None: 1721 self.errorlevel = errorlevel 1722 1723 # Init datastructures. 1724 self.copybufsize = copybufsize 1725 self.closed = False 1726 self.members = [] # list of members as TarInfo objects 1727 self._loaded = False # flag if all members have been read 1728 self.offset = self.fileobj.tell() 1729 # current position in the archive file 1730 self.inodes = {} # dictionary caching the inodes of 1731 # archive members already added 1732 1733 try: 1734 if self.mode == "r": 1735 self.firstmember = None 1736 self.firstmember = self.next() 1737 1738 if self.mode == "a": 1739 # Move to the end of the archive, 1740 # before the first empty block. 1741 while True: 1742 self.fileobj.seek(self.offset) 1743 try: 1744 tarinfo = self.tarinfo.fromtarfile(self) 1745 self.members.append(tarinfo) 1746 except EOFHeaderError: 1747 self.fileobj.seek(self.offset) 1748 break 1749 except HeaderError as e: 1750 raise ReadError(str(e)) from None 1751 1752 if self.mode in ("a", "w", "x"): 1753 self._loaded = True 1754 1755 if self.pax_headers: 1756 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) 1757 self.fileobj.write(buf) 1758 self.offset += len(buf) 1759 except: 1760 if not self._extfileobj: 1761 self.fileobj.close() 1762 self.closed = True 1763 raise 1764 1765 #-------------------------------------------------------------------------- 1766 # Below are the classmethods which act as alternate constructors to the 1767 # TarFile class. The open() method is the only one that is needed for 1768 # public use; it is the "super"-constructor and is able to select an 1769 # adequate "sub"-constructor for a particular compression using the mapping 1770 # from OPEN_METH. 1771 # 1772 # This concept allows one to subclass TarFile without losing the comfort of 1773 # the super-constructor. A sub-constructor is registered and made available 1774 # by adding it to the mapping in OPEN_METH. 1775 1776 @classmethod 1777 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): 1778 """Open a tar archive for reading, writing or appending. Return 1779 an appropriate TarFile class. 1780 1781 mode: 1782 'r' or 'r:*' open for reading with transparent compression 1783 'r:' open for reading exclusively uncompressed 1784 'r:gz' open for reading with gzip compression 1785 'r:bz2' open for reading with bzip2 compression 1786 'r:xz' open for reading with lzma compression 1787 'a' or 'a:' open for appending, creating the file if necessary 1788 'w' or 'w:' open for writing without compression 1789 'w:gz' open for writing with gzip compression 1790 'w:bz2' open for writing with bzip2 compression 1791 'w:xz' open for writing with lzma compression 1792 1793 'x' or 'x:' create a tarfile exclusively without compression, raise 1794 an exception if the file is already created 1795 'x:gz' create a gzip compressed tarfile, raise an exception 1796 if the file is already created 1797 'x:bz2' create a bzip2 compressed tarfile, raise an exception 1798 if the file is already created 1799 'x:xz' create an lzma compressed tarfile, raise an exception 1800 if the file is already created 1801 1802 'r|*' open a stream of tar blocks with transparent compression 1803 'r|' open an uncompressed stream of tar blocks for reading 1804 'r|gz' open a gzip compressed stream of tar blocks 1805 'r|bz2' open a bzip2 compressed stream of tar blocks 1806 'r|xz' open an lzma compressed stream of tar blocks 1807 'w|' open an uncompressed stream for writing 1808 'w|gz' open a gzip compressed stream for writing 1809 'w|bz2' open a bzip2 compressed stream for writing 1810 'w|xz' open an lzma compressed stream for writing 1811 """ 1812 1813 if not name and not fileobj: 1814 raise ValueError("nothing to open") 1815 1816 if mode in ("r", "r:*"): 1817 # Find out which *open() is appropriate for opening the file. 1818 def not_compressed(comptype): 1819 return cls.OPEN_METH[comptype] == 'taropen' 1820 error_msgs = [] 1821 for comptype in sorted(cls.OPEN_METH, key=not_compressed): 1822 func = getattr(cls, cls.OPEN_METH[comptype]) 1823 if fileobj is not None: 1824 saved_pos = fileobj.tell() 1825 try: 1826 return func(name, "r", fileobj, **kwargs) 1827 except (ReadError, CompressionError) as e: 1828 error_msgs.append(f'- method {comptype}: {e!r}') 1829 if fileobj is not None: 1830 fileobj.seek(saved_pos) 1831 continue 1832 error_msgs_summary = '\n'.join(error_msgs) 1833 raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}") 1834 1835 elif ":" in mode: 1836 filemode, comptype = mode.split(":", 1) 1837 filemode = filemode or "r" 1838 comptype = comptype or "tar" 1839 1840 # Select the *open() function according to 1841 # given compression. 1842 if comptype in cls.OPEN_METH: 1843 func = getattr(cls, cls.OPEN_METH[comptype]) 1844 else: 1845 raise CompressionError("unknown compression type %r" % comptype) 1846 return func(name, filemode, fileobj, **kwargs) 1847 1848 elif "|" in mode: 1849 filemode, comptype = mode.split("|", 1) 1850 filemode = filemode or "r" 1851 comptype = comptype or "tar" 1852 1853 if filemode not in ("r", "w"): 1854 raise ValueError("mode must be 'r' or 'w'") 1855 1856 stream = _Stream(name, filemode, comptype, fileobj, bufsize) 1857 try: 1858 t = cls(name, filemode, stream, **kwargs) 1859 except: 1860 stream.close() 1861 raise 1862 t._extfileobj = False 1863 return t 1864 1865 elif mode in ("a", "w", "x"): 1866 return cls.taropen(name, mode, fileobj, **kwargs) 1867 1868 raise ValueError("undiscernible mode") 1869 1870 @classmethod 1871 def taropen(cls, name, mode="r", fileobj=None, **kwargs): 1872 """Open uncompressed tar archive name for reading or writing. 1873 """ 1874 if mode not in ("r", "a", "w", "x"): 1875 raise ValueError("mode must be 'r', 'a', 'w' or 'x'") 1876 return cls(name, mode, fileobj, **kwargs) 1877 1878 @classmethod 1879 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 1880 """Open gzip compressed tar archive name for reading or writing. 1881 Appending is not allowed. 1882 """ 1883 if mode not in ("r", "w", "x"): 1884 raise ValueError("mode must be 'r', 'w' or 'x'") 1885 1886 try: 1887 from gzip import GzipFile 1888 except ImportError: 1889 raise CompressionError("gzip module is not available") from None 1890 1891 try: 1892 fileobj = GzipFile(name, mode + "b", compresslevel, fileobj) 1893 except OSError as e: 1894 if fileobj is not None and mode == 'r': 1895 raise ReadError("not a gzip file") from e 1896 raise 1897 1898 try: 1899 t = cls.taropen(name, mode, fileobj, **kwargs) 1900 except OSError as e: 1901 fileobj.close() 1902 if mode == 'r': 1903 raise ReadError("not a gzip file") from e 1904 raise 1905 except: 1906 fileobj.close() 1907 raise 1908 t._extfileobj = False 1909 return t 1910 1911 @classmethod 1912 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 1913 """Open bzip2 compressed tar archive name for reading or writing. 1914 Appending is not allowed. 1915 """ 1916 if mode not in ("r", "w", "x"): 1917 raise ValueError("mode must be 'r', 'w' or 'x'") 1918 1919 try: 1920 from bz2 import BZ2File 1921 except ImportError: 1922 raise CompressionError("bz2 module is not available") from None 1923 1924 fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel) 1925 1926 try: 1927 t = cls.taropen(name, mode, fileobj, **kwargs) 1928 except (OSError, EOFError) as e: 1929 fileobj.close() 1930 if mode == 'r': 1931 raise ReadError("not a bzip2 file") from e 1932 raise 1933 except: 1934 fileobj.close() 1935 raise 1936 t._extfileobj = False 1937 return t 1938 1939 @classmethod 1940 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs): 1941 """Open lzma compressed tar archive name for reading or writing. 1942 Appending is not allowed. 1943 """ 1944 if mode not in ("r", "w", "x"): 1945 raise ValueError("mode must be 'r', 'w' or 'x'") 1946 1947 try: 1948 from lzma import LZMAFile, LZMAError 1949 except ImportError: 1950 raise CompressionError("lzma module is not available") from None 1951 1952 fileobj = LZMAFile(fileobj or name, mode, preset=preset) 1953 1954 try: 1955 t = cls.taropen(name, mode, fileobj, **kwargs) 1956 except (LZMAError, EOFError) as e: 1957 fileobj.close() 1958 if mode == 'r': 1959 raise ReadError("not an lzma file") from e 1960 raise 1961 except: 1962 fileobj.close() 1963 raise 1964 t._extfileobj = False 1965 return t 1966 1967 # All *open() methods are registered here. 1968 OPEN_METH = { 1969 "tar": "taropen", # uncompressed tar 1970 "gz": "gzopen", # gzip compressed tar 1971 "bz2": "bz2open", # bzip2 compressed tar 1972 "xz": "xzopen" # lzma compressed tar 1973 } 1974 1975 #-------------------------------------------------------------------------- 1976 # The public methods which TarFile provides: 1977 1978 def close(self): 1979 """Close the TarFile. In write-mode, two finishing zero blocks are 1980 appended to the archive. 1981 """ 1982 if self.closed: 1983 return 1984 1985 self.closed = True 1986 try: 1987 if self.mode in ("a", "w", "x"): 1988 self.fileobj.write(NUL * (BLOCKSIZE * 2)) 1989 self.offset += (BLOCKSIZE * 2) 1990 # fill up the end with zero-blocks 1991 # (like option -b20 for tar does) 1992 blocks, remainder = divmod(self.offset, RECORDSIZE) 1993 if remainder > 0: 1994 self.fileobj.write(NUL * (RECORDSIZE - remainder)) 1995 finally: 1996 if not self._extfileobj: 1997 self.fileobj.close() 1998 1999 def getmember(self, name): 2000 """Return a TarInfo object for member `name'. If `name' can not be 2001 found in the archive, KeyError is raised. If a member occurs more 2002 than once in the archive, its last occurrence is assumed to be the 2003 most up-to-date version. 2004 """ 2005 tarinfo = self._getmember(name.rstrip('/')) 2006 if tarinfo is None: 2007 raise KeyError("filename %r not found" % name) 2008 return tarinfo 2009 2010 def getmembers(self): 2011 """Return the members of the archive as a list of TarInfo objects. The 2012 list has the same order as the members in the archive. 2013 """ 2014 self._check() 2015 if not self._loaded: # if we want to obtain a list of 2016 self._load() # all members, we first have to 2017 # scan the whole archive. 2018 return self.members 2019 2020 def getnames(self): 2021 """Return the members of the archive as a list of their names. It has 2022 the same order as the list returned by getmembers(). 2023 """ 2024 return [tarinfo.name for tarinfo in self.getmembers()] 2025 2026 def gettarinfo(self, name=None, arcname=None, fileobj=None): 2027 """Create a TarInfo object from the result of os.stat or equivalent 2028 on an existing file. The file is either named by `name', or 2029 specified as a file object `fileobj' with a file descriptor. If 2030 given, `arcname' specifies an alternative name for the file in the 2031 archive, otherwise, the name is taken from the 'name' attribute of 2032 'fileobj', or the 'name' argument. The name should be a text 2033 string. 2034 """ 2035 self._check("awx") 2036 2037 # When fileobj is given, replace name by 2038 # fileobj's real name. 2039 if fileobj is not None: 2040 name = fileobj.name 2041 2042 # Building the name of the member in the archive. 2043 # Backward slashes are converted to forward slashes, 2044 # Absolute paths are turned to relative paths. 2045 if arcname is None: 2046 arcname = name 2047 drv, arcname = os.path.splitdrive(arcname) 2048 arcname = arcname.replace(os.sep, "/") 2049 arcname = arcname.lstrip("/") 2050 2051 # Now, fill the TarInfo object with 2052 # information specific for the file. 2053 tarinfo = self.tarinfo() 2054 tarinfo.tarfile = self # Not needed 2055 2056 # Use os.stat or os.lstat, depending on if symlinks shall be resolved. 2057 if fileobj is None: 2058 if not self.dereference: 2059 statres = os.lstat(name) 2060 else: 2061 statres = os.stat(name) 2062 else: 2063 statres = os.fstat(fileobj.fileno()) 2064 linkname = "" 2065 2066 stmd = statres.st_mode 2067 if stat.S_ISREG(stmd): 2068 inode = (statres.st_ino, statres.st_dev) 2069 if not self.dereference and statres.st_nlink > 1 and \ 2070 inode in self.inodes and arcname != self.inodes[inode]: 2071 # Is it a hardlink to an already 2072 # archived file? 2073 type = LNKTYPE 2074 linkname = self.inodes[inode] 2075 else: 2076 # The inode is added only if its valid. 2077 # For win32 it is always 0. 2078 type = REGTYPE 2079 if inode[0]: 2080 self.inodes[inode] = arcname 2081 elif stat.S_ISDIR(stmd): 2082 type = DIRTYPE 2083 elif stat.S_ISFIFO(stmd): 2084 type = FIFOTYPE 2085 elif stat.S_ISLNK(stmd): 2086 type = SYMTYPE 2087 linkname = os.readlink(name) 2088 elif stat.S_ISCHR(stmd): 2089 type = CHRTYPE 2090 elif stat.S_ISBLK(stmd): 2091 type = BLKTYPE 2092 else: 2093 return None 2094 2095 # Fill the TarInfo object with all 2096 # information we can get. 2097 tarinfo.name = arcname 2098 tarinfo.mode = stmd 2099 tarinfo.uid = statres.st_uid 2100 tarinfo.gid = statres.st_gid 2101 if type == REGTYPE: 2102 tarinfo.size = statres.st_size 2103 else: 2104 tarinfo.size = 0 2105 tarinfo.mtime = statres.st_mtime 2106 tarinfo.type = type 2107 tarinfo.linkname = linkname 2108 if pwd: 2109 try: 2110 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] 2111 except KeyError: 2112 pass 2113 if grp: 2114 try: 2115 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] 2116 except KeyError: 2117 pass 2118 2119 if type in (CHRTYPE, BLKTYPE): 2120 if hasattr(os, "major") and hasattr(os, "minor"): 2121 tarinfo.devmajor = os.major(statres.st_rdev) 2122 tarinfo.devminor = os.minor(statres.st_rdev) 2123 return tarinfo 2124 2125 def list(self, verbose=True, *, members=None): 2126 """Print a table of contents to sys.stdout. If `verbose' is False, only 2127 the names of the members are printed. If it is True, an `ls -l'-like 2128 output is produced. `members' is optional and must be a subset of the 2129 list returned by getmembers(). 2130 """ 2131 self._check() 2132 2133 if members is None: 2134 members = self 2135 for tarinfo in members: 2136 if verbose: 2137 if tarinfo.mode is None: 2138 _safe_print("??????????") 2139 else: 2140 _safe_print(stat.filemode(tarinfo.mode)) 2141 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid, 2142 tarinfo.gname or tarinfo.gid)) 2143 if tarinfo.ischr() or tarinfo.isblk(): 2144 _safe_print("%10s" % 2145 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor))) 2146 else: 2147 _safe_print("%10d" % tarinfo.size) 2148 if tarinfo.mtime is None: 2149 _safe_print("????-??-?? ??:??:??") 2150 else: 2151 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \ 2152 % time.localtime(tarinfo.mtime)[:6]) 2153 2154 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else "")) 2155 2156 if verbose: 2157 if tarinfo.issym(): 2158 _safe_print("-> " + tarinfo.linkname) 2159 if tarinfo.islnk(): 2160 _safe_print("link to " + tarinfo.linkname) 2161 print() 2162 2163 def add(self, name, arcname=None, recursive=True, *, filter=None): 2164 """Add the file `name' to the archive. `name' may be any type of file 2165 (directory, fifo, symbolic link, etc.). If given, `arcname' 2166 specifies an alternative name for the file in the archive. 2167 Directories are added recursively by default. This can be avoided by 2168 setting `recursive' to False. `filter' is a function 2169 that expects a TarInfo object argument and returns the changed 2170 TarInfo object, if it returns None the TarInfo object will be 2171 excluded from the archive. 2172 """ 2173 self._check("awx") 2174 2175 if arcname is None: 2176 arcname = name 2177 2178 # Skip if somebody tries to archive the archive... 2179 if self.name is not None and os.path.abspath(name) == self.name: 2180 self._dbg(2, "tarfile: Skipped %r" % name) 2181 return 2182 2183 self._dbg(1, name) 2184 2185 # Create a TarInfo object from the file. 2186 tarinfo = self.gettarinfo(name, arcname) 2187 2188 if tarinfo is None: 2189 self._dbg(1, "tarfile: Unsupported type %r" % name) 2190 return 2191 2192 # Change or exclude the TarInfo object. 2193 if filter is not None: 2194 tarinfo = filter(tarinfo) 2195 if tarinfo is None: 2196 self._dbg(2, "tarfile: Excluded %r" % name) 2197 return 2198 2199 # Append the tar header and data to the archive. 2200 if tarinfo.isreg(): 2201 with bltn_open(name, "rb") as f: 2202 self.addfile(tarinfo, f) 2203 2204 elif tarinfo.isdir(): 2205 self.addfile(tarinfo) 2206 if recursive: 2207 for f in sorted(os.listdir(name)): 2208 self.add(os.path.join(name, f), os.path.join(arcname, f), 2209 recursive, filter=filter) 2210 2211 else: 2212 self.addfile(tarinfo) 2213 2214 def addfile(self, tarinfo, fileobj=None): 2215 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is 2216 given, it should be a binary file, and tarinfo.size bytes are read 2217 from it and added to the archive. You can create TarInfo objects 2218 directly, or by using gettarinfo(). 2219 """ 2220 self._check("awx") 2221 2222 tarinfo = copy.copy(tarinfo) 2223 2224 buf = tarinfo.tobuf(self.format, self.encoding, self.errors) 2225 self.fileobj.write(buf) 2226 self.offset += len(buf) 2227 bufsize=self.copybufsize 2228 # If there's data to follow, append it. 2229 if fileobj is not None: 2230 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize) 2231 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) 2232 if remainder > 0: 2233 self.fileobj.write(NUL * (BLOCKSIZE - remainder)) 2234 blocks += 1 2235 self.offset += blocks * BLOCKSIZE 2236 2237 self.members.append(tarinfo) 2238 2239 def _get_filter_function(self, filter): 2240 if filter is None: 2241 filter = self.extraction_filter 2242 if filter is None: 2243 return fully_trusted_filter 2244 if isinstance(filter, str): 2245 raise TypeError( 2246 'String names are not supported for ' 2247 + 'TarFile.extraction_filter. Use a function such as ' 2248 + 'tarfile.data_filter directly.') 2249 return filter 2250 if callable(filter): 2251 return filter 2252 try: 2253 return _NAMED_FILTERS[filter] 2254 except KeyError: 2255 raise ValueError(f"filter {filter!r} not found") from None 2256 2257 def extractall(self, path=".", members=None, *, numeric_owner=False, 2258 filter=None): 2259 """Extract all members from the archive to the current working 2260 directory and set owner, modification time and permissions on 2261 directories afterwards. `path' specifies a different directory 2262 to extract to. `members' is optional and must be a subset of the 2263 list returned by getmembers(). If `numeric_owner` is True, only 2264 the numbers for user/group names are used and not the names. 2265 2266 The `filter` function will be called on each member just 2267 before extraction. 2268 It can return a changed TarInfo or None to skip the member. 2269 String names of common filters are accepted. 2270 """ 2271 directories = [] 2272 2273 filter_function = self._get_filter_function(filter) 2274 if members is None: 2275 members = self 2276 2277 for member in members: 2278 tarinfo = self._get_extract_tarinfo(member, filter_function, path) 2279 if tarinfo is None: 2280 continue 2281 if tarinfo.isdir(): 2282 # For directories, delay setting attributes until later, 2283 # since permissions can interfere with extraction and 2284 # extracting contents can reset mtime. 2285 directories.append(tarinfo) 2286 self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(), 2287 numeric_owner=numeric_owner) 2288 2289 # Reverse sort directories. 2290 directories.sort(key=lambda a: a.name, reverse=True) 2291 2292 # Set correct owner, mtime and filemode on directories. 2293 for tarinfo in directories: 2294 dirpath = os.path.join(path, tarinfo.name) 2295 try: 2296 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner) 2297 self.utime(tarinfo, dirpath) 2298 self.chmod(tarinfo, dirpath) 2299 except ExtractError as e: 2300 self._handle_nonfatal_error(e) 2301 2302 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False, 2303 filter=None): 2304 """Extract a member from the archive to the current working directory, 2305 using its full name. Its file information is extracted as accurately 2306 as possible. `member' may be a filename or a TarInfo object. You can 2307 specify a different directory using `path'. File attributes (owner, 2308 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` 2309 is True, only the numbers for user/group names are used and not 2310 the names. 2311 2312 The `filter` function will be called before extraction. 2313 It can return a changed TarInfo or None to skip the member. 2314 String names of common filters are accepted. 2315 """ 2316 filter_function = self._get_filter_function(filter) 2317 tarinfo = self._get_extract_tarinfo(member, filter_function, path) 2318 if tarinfo is not None: 2319 self._extract_one(tarinfo, path, set_attrs, numeric_owner) 2320 2321 def _get_extract_tarinfo(self, member, filter_function, path): 2322 """Get filtered TarInfo (or None) from member, which might be a str""" 2323 if isinstance(member, str): 2324 tarinfo = self.getmember(member) 2325 else: 2326 tarinfo = member 2327 2328 unfiltered = tarinfo 2329 try: 2330 tarinfo = filter_function(tarinfo, path) 2331 except (OSError, FilterError) as e: 2332 self._handle_fatal_error(e) 2333 except ExtractError as e: 2334 self._handle_nonfatal_error(e) 2335 if tarinfo is None: 2336 self._dbg(2, "tarfile: Excluded %r" % unfiltered.name) 2337 return None 2338 # Prepare the link target for makelink(). 2339 if tarinfo.islnk(): 2340 tarinfo = copy.copy(tarinfo) 2341 tarinfo._link_target = os.path.join(path, tarinfo.linkname) 2342 return tarinfo 2343 2344 def _extract_one(self, tarinfo, path, set_attrs, numeric_owner): 2345 """Extract from filtered tarinfo to disk""" 2346 self._check("r") 2347 2348 try: 2349 self._extract_member(tarinfo, os.path.join(path, tarinfo.name), 2350 set_attrs=set_attrs, 2351 numeric_owner=numeric_owner) 2352 except OSError as e: 2353 self._handle_fatal_error(e) 2354 except ExtractError as e: 2355 self._handle_nonfatal_error(e) 2356 2357 def _handle_nonfatal_error(self, e): 2358 """Handle non-fatal error (ExtractError) according to errorlevel""" 2359 if self.errorlevel > 1: 2360 raise 2361 else: 2362 self._dbg(1, "tarfile: %s" % e) 2363 2364 def _handle_fatal_error(self, e): 2365 """Handle "fatal" error according to self.errorlevel""" 2366 if self.errorlevel > 0: 2367 raise 2368 elif isinstance(e, OSError): 2369 if e.filename is None: 2370 self._dbg(1, "tarfile: %s" % e.strerror) 2371 else: 2372 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) 2373 else: 2374 self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e)) 2375 2376 def extractfile(self, member): 2377 """Extract a member from the archive as a file object. `member' may be 2378 a filename or a TarInfo object. If `member' is a regular file or 2379 a link, an io.BufferedReader object is returned. For all other 2380 existing members, None is returned. If `member' does not appear 2381 in the archive, KeyError is raised. 2382 """ 2383 self._check("r") 2384 2385 if isinstance(member, str): 2386 tarinfo = self.getmember(member) 2387 else: 2388 tarinfo = member 2389 2390 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES: 2391 # Members with unknown types are treated as regular files. 2392 return self.fileobject(self, tarinfo) 2393 2394 elif tarinfo.islnk() or tarinfo.issym(): 2395 if isinstance(self.fileobj, _Stream): 2396 # A small but ugly workaround for the case that someone tries 2397 # to extract a (sym)link as a file-object from a non-seekable 2398 # stream of tar blocks. 2399 raise StreamError("cannot extract (sym)link as file object") 2400 else: 2401 # A (sym)link's file object is its target's file object. 2402 return self.extractfile(self._find_link_target(tarinfo)) 2403 else: 2404 # If there's no data associated with the member (directory, chrdev, 2405 # blkdev, etc.), return None instead of a file object. 2406 return None 2407 2408 def _extract_member(self, tarinfo, targetpath, set_attrs=True, 2409 numeric_owner=False): 2410 """Extract the TarInfo object tarinfo to a physical 2411 file called targetpath. 2412 """ 2413 # Fetch the TarInfo object for the given name 2414 # and build the destination pathname, replacing 2415 # forward slashes to platform specific separators. 2416 targetpath = targetpath.rstrip("/") 2417 targetpath = targetpath.replace("/", os.sep) 2418 2419 # Create all upper directories. 2420 upperdirs = os.path.dirname(targetpath) 2421 if upperdirs and not os.path.exists(upperdirs): 2422 # Create directories that are not part of the archive with 2423 # default permissions. 2424 os.makedirs(upperdirs) 2425 2426 if tarinfo.islnk() or tarinfo.issym(): 2427 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) 2428 else: 2429 self._dbg(1, tarinfo.name) 2430 2431 if tarinfo.isreg(): 2432 self.makefile(tarinfo, targetpath) 2433 elif tarinfo.isdir(): 2434 self.makedir(tarinfo, targetpath) 2435 elif tarinfo.isfifo(): 2436 self.makefifo(tarinfo, targetpath) 2437 elif tarinfo.ischr() or tarinfo.isblk(): 2438 self.makedev(tarinfo, targetpath) 2439 elif tarinfo.islnk() or tarinfo.issym(): 2440 self.makelink(tarinfo, targetpath) 2441 elif tarinfo.type not in SUPPORTED_TYPES: 2442 self.makeunknown(tarinfo, targetpath) 2443 else: 2444 self.makefile(tarinfo, targetpath) 2445 2446 if set_attrs: 2447 self.chown(tarinfo, targetpath, numeric_owner) 2448 if not tarinfo.issym(): 2449 self.chmod(tarinfo, targetpath) 2450 self.utime(tarinfo, targetpath) 2451 2452 #-------------------------------------------------------------------------- 2453 # Below are the different file methods. They are called via 2454 # _extract_member() when extract() is called. They can be replaced in a 2455 # subclass to implement other functionality. 2456 2457 def makedir(self, tarinfo, targetpath): 2458 """Make a directory called targetpath. 2459 """ 2460 try: 2461 if tarinfo.mode is None: 2462 # Use the system's default mode 2463 os.mkdir(targetpath) 2464 else: 2465 # Use a safe mode for the directory, the real mode is set 2466 # later in _extract_member(). 2467 os.mkdir(targetpath, 0o700) 2468 except FileExistsError: 2469 pass 2470 2471 def makefile(self, tarinfo, targetpath): 2472 """Make a file called targetpath. 2473 """ 2474 source = self.fileobj 2475 source.seek(tarinfo.offset_data) 2476 bufsize = self.copybufsize 2477 with bltn_open(targetpath, "wb") as target: 2478 if tarinfo.sparse is not None: 2479 for offset, size in tarinfo.sparse: 2480 target.seek(offset) 2481 copyfileobj(source, target, size, ReadError, bufsize) 2482 target.seek(tarinfo.size) 2483 target.truncate() 2484 else: 2485 copyfileobj(source, target, tarinfo.size, ReadError, bufsize) 2486 2487 def makeunknown(self, tarinfo, targetpath): 2488 """Make a file from a TarInfo object with an unknown type 2489 at targetpath. 2490 """ 2491 self.makefile(tarinfo, targetpath) 2492 self._dbg(1, "tarfile: Unknown file type %r, " \ 2493 "extracted as regular file." % tarinfo.type) 2494 2495 def makefifo(self, tarinfo, targetpath): 2496 """Make a fifo called targetpath. 2497 """ 2498 if hasattr(os, "mkfifo"): 2499 os.mkfifo(targetpath) 2500 else: 2501 raise ExtractError("fifo not supported by system") 2502 2503 def makedev(self, tarinfo, targetpath): 2504 """Make a character or block device called targetpath. 2505 """ 2506 if not hasattr(os, "mknod") or not hasattr(os, "makedev"): 2507 raise ExtractError("special devices not supported by system") 2508 2509 mode = tarinfo.mode 2510 if mode is None: 2511 # Use mknod's default 2512 mode = 0o600 2513 if tarinfo.isblk(): 2514 mode |= stat.S_IFBLK 2515 else: 2516 mode |= stat.S_IFCHR 2517 2518 os.mknod(targetpath, mode, 2519 os.makedev(tarinfo.devmajor, tarinfo.devminor)) 2520 2521 def makelink(self, tarinfo, targetpath): 2522 """Make a (symbolic) link called targetpath. If it cannot be created 2523 (platform limitation), we try to make a copy of the referenced file 2524 instead of a link. 2525 """ 2526 try: 2527 # For systems that support symbolic and hard links. 2528 if tarinfo.issym(): 2529 if os.path.lexists(targetpath): 2530 # Avoid FileExistsError on following os.symlink. 2531 os.unlink(targetpath) 2532 os.symlink(tarinfo.linkname, targetpath) 2533 else: 2534 if os.path.exists(tarinfo._link_target): 2535 os.link(tarinfo._link_target, targetpath) 2536 else: 2537 self._extract_member(self._find_link_target(tarinfo), 2538 targetpath) 2539 except symlink_exception: 2540 try: 2541 self._extract_member(self._find_link_target(tarinfo), 2542 targetpath) 2543 except KeyError: 2544 raise ExtractError("unable to resolve link inside archive") from None 2545 2546 def chown(self, tarinfo, targetpath, numeric_owner): 2547 """Set owner of targetpath according to tarinfo. If numeric_owner 2548 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner 2549 is False, fall back to .gid/.uid when the search based on name 2550 fails. 2551 """ 2552 if hasattr(os, "geteuid") and os.geteuid() == 0: 2553 # We have to be root to do so. 2554 g = tarinfo.gid 2555 u = tarinfo.uid 2556 if not numeric_owner: 2557 try: 2558 if grp and tarinfo.gname: 2559 g = grp.getgrnam(tarinfo.gname)[2] 2560 except KeyError: 2561 pass 2562 try: 2563 if pwd and tarinfo.uname: 2564 u = pwd.getpwnam(tarinfo.uname)[2] 2565 except KeyError: 2566 pass 2567 if g is None: 2568 g = -1 2569 if u is None: 2570 u = -1 2571 try: 2572 if tarinfo.issym() and hasattr(os, "lchown"): 2573 os.lchown(targetpath, u, g) 2574 else: 2575 os.chown(targetpath, u, g) 2576 except OSError as e: 2577 raise ExtractError("could not change owner") from e 2578 2579 def chmod(self, tarinfo, targetpath): 2580 """Set file permissions of targetpath according to tarinfo. 2581 """ 2582 if tarinfo.mode is None: 2583 return 2584 try: 2585 os.chmod(targetpath, tarinfo.mode) 2586 except OSError as e: 2587 raise ExtractError("could not change mode") from e 2588 2589 def utime(self, tarinfo, targetpath): 2590 """Set modification time of targetpath according to tarinfo. 2591 """ 2592 mtime = tarinfo.mtime 2593 if mtime is None: 2594 return 2595 if not hasattr(os, 'utime'): 2596 return 2597 try: 2598 os.utime(targetpath, (mtime, mtime)) 2599 except OSError as e: 2600 raise ExtractError("could not change modification time") from e 2601 2602 #-------------------------------------------------------------------------- 2603 def next(self): 2604 """Return the next member of the archive as a TarInfo object, when 2605 TarFile is opened for reading. Return None if there is no more 2606 available. 2607 """ 2608 self._check("ra") 2609 if self.firstmember is not None: 2610 m = self.firstmember 2611 self.firstmember = None 2612 return m 2613 2614 # Advance the file pointer. 2615 if self.offset != self.fileobj.tell(): 2616 if self.offset == 0: 2617 return None 2618 self.fileobj.seek(self.offset - 1) 2619 if not self.fileobj.read(1): 2620 raise ReadError("unexpected end of data") 2621 2622 # Read the next block. 2623 tarinfo = None 2624 while True: 2625 try: 2626 tarinfo = self.tarinfo.fromtarfile(self) 2627 except EOFHeaderError as e: 2628 if self.ignore_zeros: 2629 self._dbg(2, "0x%X: %s" % (self.offset, e)) 2630 self.offset += BLOCKSIZE 2631 continue 2632 except InvalidHeaderError as e: 2633 if self.ignore_zeros: 2634 self._dbg(2, "0x%X: %s" % (self.offset, e)) 2635 self.offset += BLOCKSIZE 2636 continue 2637 elif self.offset == 0: 2638 raise ReadError(str(e)) from None 2639 except EmptyHeaderError: 2640 if self.offset == 0: 2641 raise ReadError("empty file") from None 2642 except TruncatedHeaderError as e: 2643 if self.offset == 0: 2644 raise ReadError(str(e)) from None 2645 except SubsequentHeaderError as e: 2646 raise ReadError(str(e)) from None 2647 except Exception as e: 2648 try: 2649 import zlib 2650 if isinstance(e, zlib.error): 2651 raise ReadError(f'zlib error: {e}') from None 2652 else: 2653 raise e 2654 except ImportError: 2655 raise e 2656 break 2657 2658 if tarinfo is not None: 2659 self.members.append(tarinfo) 2660 else: 2661 self._loaded = True 2662 2663 return tarinfo 2664 2665 #-------------------------------------------------------------------------- 2666 # Little helper methods: 2667 2668 def _getmember(self, name, tarinfo=None, normalize=False): 2669 """Find an archive member by name from bottom to top. 2670 If tarinfo is given, it is used as the starting point. 2671 """ 2672 # Ensure that all members have been loaded. 2673 members = self.getmembers() 2674 2675 # Limit the member search list up to tarinfo. 2676 skipping = False 2677 if tarinfo is not None: 2678 try: 2679 index = members.index(tarinfo) 2680 except ValueError: 2681 # The given starting point might be a (modified) copy. 2682 # We'll later skip members until we find an equivalent. 2683 skipping = True 2684 else: 2685 # Happy fast path 2686 members = members[:index] 2687 2688 if normalize: 2689 name = os.path.normpath(name) 2690 2691 for member in reversed(members): 2692 if skipping: 2693 if tarinfo.offset == member.offset: 2694 skipping = False 2695 continue 2696 if normalize: 2697 member_name = os.path.normpath(member.name) 2698 else: 2699 member_name = member.name 2700 2701 if name == member_name: 2702 return member 2703 2704 if skipping: 2705 # Starting point was not found 2706 raise ValueError(tarinfo) 2707 2708 def _load(self): 2709 """Read through the entire archive file and look for readable 2710 members. 2711 """ 2712 while True: 2713 tarinfo = self.next() 2714 if tarinfo is None: 2715 break 2716 self._loaded = True 2717 2718 def _check(self, mode=None): 2719 """Check if TarFile is still open, and if the operation's mode 2720 corresponds to TarFile's mode. 2721 """ 2722 if self.closed: 2723 raise OSError("%s is closed" % self.__class__.__name__) 2724 if mode is not None and self.mode not in mode: 2725 raise OSError("bad operation for mode %r" % self.mode) 2726 2727 def _find_link_target(self, tarinfo): 2728 """Find the target member of a symlink or hardlink member in the 2729 archive. 2730 """ 2731 if tarinfo.issym(): 2732 # Always search the entire archive. 2733 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname))) 2734 limit = None 2735 else: 2736 # Search the archive before the link, because a hard link is 2737 # just a reference to an already archived file. 2738 linkname = tarinfo.linkname 2739 limit = tarinfo 2740 2741 member = self._getmember(linkname, tarinfo=limit, normalize=True) 2742 if member is None: 2743 raise KeyError("linkname %r not found" % linkname) 2744 return member 2745 2746 def __iter__(self): 2747 """Provide an iterator object. 2748 """ 2749 if self._loaded: 2750 yield from self.members 2751 return 2752 2753 # Yield items using TarFile's next() method. 2754 # When all members have been read, set TarFile as _loaded. 2755 index = 0 2756 # Fix for SF #1100429: Under rare circumstances it can 2757 # happen that getmembers() is called during iteration, 2758 # which will have already exhausted the next() method. 2759 if self.firstmember is not None: 2760 tarinfo = self.next() 2761 index += 1 2762 yield tarinfo 2763 2764 while True: 2765 if index < len(self.members): 2766 tarinfo = self.members[index] 2767 elif not self._loaded: 2768 tarinfo = self.next() 2769 if not tarinfo: 2770 self._loaded = True 2771 return 2772 else: 2773 return 2774 index += 1 2775 yield tarinfo 2776 2777 def _dbg(self, level, msg): 2778 """Write debugging output to sys.stderr. 2779 """ 2780 if level <= self.debug: 2781 print(msg, file=sys.stderr) 2782 2783 def __enter__(self): 2784 self._check() 2785 return self 2786 2787 def __exit__(self, type, value, traceback): 2788 if type is None: 2789 self.close() 2790 else: 2791 # An exception occurred. We must not call close() because 2792 # it would try to write end-of-archive blocks and padding. 2793 if not self._extfileobj: 2794 self.fileobj.close() 2795 self.closed = True 2796 2797#-------------------- 2798# exported functions 2799#-------------------- 2800 2801def is_tarfile(name): 2802 """Return True if name points to a tar archive that we 2803 are able to handle, else return False. 2804 2805 'name' should be a string, file, or file-like object. 2806 """ 2807 try: 2808 if hasattr(name, "read"): 2809 pos = name.tell() 2810 t = open(fileobj=name) 2811 name.seek(pos) 2812 else: 2813 t = open(name) 2814 t.close() 2815 return True 2816 except TarError: 2817 return False 2818 2819open = TarFile.open 2820 2821 2822def main(): 2823 import argparse 2824 2825 description = 'A simple command-line interface for tarfile module.' 2826 parser = argparse.ArgumentParser(description=description) 2827 parser.add_argument('-v', '--verbose', action='store_true', default=False, 2828 help='Verbose output') 2829 parser.add_argument('--filter', metavar='<filtername>', 2830 choices=_NAMED_FILTERS, 2831 help='Filter for extraction') 2832 2833 group = parser.add_mutually_exclusive_group(required=True) 2834 group.add_argument('-l', '--list', metavar='<tarfile>', 2835 help='Show listing of a tarfile') 2836 group.add_argument('-e', '--extract', nargs='+', 2837 metavar=('<tarfile>', '<output_dir>'), 2838 help='Extract tarfile into target dir') 2839 group.add_argument('-c', '--create', nargs='+', 2840 metavar=('<name>', '<file>'), 2841 help='Create tarfile from sources') 2842 group.add_argument('-t', '--test', metavar='<tarfile>', 2843 help='Test if a tarfile is valid') 2844 2845 args = parser.parse_args() 2846 2847 if args.filter and args.extract is None: 2848 parser.exit(1, '--filter is only valid for extraction\n') 2849 2850 if args.test is not None: 2851 src = args.test 2852 if is_tarfile(src): 2853 with open(src, 'r') as tar: 2854 tar.getmembers() 2855 print(tar.getmembers(), file=sys.stderr) 2856 if args.verbose: 2857 print('{!r} is a tar archive.'.format(src)) 2858 else: 2859 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 2860 2861 elif args.list is not None: 2862 src = args.list 2863 if is_tarfile(src): 2864 with TarFile.open(src, 'r:*') as tf: 2865 tf.list(verbose=args.verbose) 2866 else: 2867 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 2868 2869 elif args.extract is not None: 2870 if len(args.extract) == 1: 2871 src = args.extract[0] 2872 curdir = os.curdir 2873 elif len(args.extract) == 2: 2874 src, curdir = args.extract 2875 else: 2876 parser.exit(1, parser.format_help()) 2877 2878 if is_tarfile(src): 2879 with TarFile.open(src, 'r:*') as tf: 2880 tf.extractall(path=curdir, filter=args.filter) 2881 if args.verbose: 2882 if curdir == '.': 2883 msg = '{!r} file is extracted.'.format(src) 2884 else: 2885 msg = ('{!r} file is extracted ' 2886 'into {!r} directory.').format(src, curdir) 2887 print(msg) 2888 else: 2889 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 2890 2891 elif args.create is not None: 2892 tar_name = args.create.pop(0) 2893 _, ext = os.path.splitext(tar_name) 2894 compressions = { 2895 # gz 2896 '.gz': 'gz', 2897 '.tgz': 'gz', 2898 # xz 2899 '.xz': 'xz', 2900 '.txz': 'xz', 2901 # bz2 2902 '.bz2': 'bz2', 2903 '.tbz': 'bz2', 2904 '.tbz2': 'bz2', 2905 '.tb2': 'bz2', 2906 } 2907 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w' 2908 tar_files = args.create 2909 2910 with TarFile.open(tar_name, tar_mode) as tf: 2911 for file_name in tar_files: 2912 tf.add(file_name) 2913 2914 if args.verbose: 2915 print('{!r} file created.'.format(tar_name)) 2916 2917if __name__ == '__main__': 2918 main() 2919