1#!/usr/bin/env python3 2#------------------------------------------------------------------- 3# tarfile.py 4#------------------------------------------------------------------- 5# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de> 6# All rights reserved. 7# 8# Permission is hereby granted, free of charge, to any person 9# obtaining a copy of this software and associated documentation 10# files (the "Software"), to deal in the Software without 11# restriction, including without limitation the rights to use, 12# copy, modify, merge, publish, distribute, sublicense, and/or sell 13# copies of the Software, and to permit persons to whom the 14# Software is furnished to do so, subject to the following 15# conditions: 16# 17# The above copyright notice and this permission notice shall be 18# included in all copies or substantial portions of the Software. 19# 20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 27# OTHER DEALINGS IN THE SOFTWARE. 28# 29"""Read from and write to tar format archives. 30""" 31 32version = "0.9.0" 33__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)" 34__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend." 35 36#--------- 37# Imports 38#--------- 39from builtins import open as bltn_open 40import sys 41import os 42import io 43import shutil 44import stat 45import time 46import struct 47import copy 48import re 49 50try: 51 import pwd 52except ImportError: 53 pwd = None 54try: 55 import grp 56except ImportError: 57 grp = None 58 59# os.symlink on Windows prior to 6.0 raises NotImplementedError 60# OSError (winerror=1314) will be raised if the caller does not hold the 61# SeCreateSymbolicLinkPrivilege privilege 62symlink_exception = (AttributeError, NotImplementedError, OSError) 63 64# from tarfile import * 65__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError", 66 "CompressionError", "StreamError", "ExtractError", "HeaderError", 67 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT", 68 "DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter", 69 "tar_filter", "FilterError", "AbsoluteLinkError", 70 "OutsideDestinationError", "SpecialFileError", "AbsolutePathError", 71 "LinkOutsideDestinationError"] 72 73 74#--------------------------------------------------------- 75# tar constants 76#--------------------------------------------------------- 77NUL = b"\0" # the null character 78BLOCKSIZE = 512 # length of processing blocks 79RECORDSIZE = BLOCKSIZE * 20 # length of records 80GNU_MAGIC = b"ustar \0" # magic gnu tar string 81POSIX_MAGIC = b"ustar\x0000" # magic posix tar string 82 83LENGTH_NAME = 100 # maximum length of a filename 84LENGTH_LINK = 100 # maximum length of a linkname 85LENGTH_PREFIX = 155 # maximum length of the prefix field 86 87REGTYPE = b"0" # regular file 88AREGTYPE = b"\0" # regular file 89LNKTYPE = b"1" # link (inside tarfile) 90SYMTYPE = b"2" # symbolic link 91CHRTYPE = b"3" # character special device 92BLKTYPE = b"4" # block special device 93DIRTYPE = b"5" # directory 94FIFOTYPE = b"6" # fifo special device 95CONTTYPE = b"7" # contiguous file 96 97GNUTYPE_LONGNAME = b"L" # GNU tar longname 98GNUTYPE_LONGLINK = b"K" # GNU tar longlink 99GNUTYPE_SPARSE = b"S" # GNU tar sparse file 100 101XHDTYPE = b"x" # POSIX.1-2001 extended header 102XGLTYPE = b"g" # POSIX.1-2001 global header 103SOLARIS_XHDTYPE = b"X" # Solaris extended header 104 105USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format 106GNU_FORMAT = 1 # GNU tar format 107PAX_FORMAT = 2 # POSIX.1-2001 (pax) format 108DEFAULT_FORMAT = PAX_FORMAT 109 110#--------------------------------------------------------- 111# tarfile constants 112#--------------------------------------------------------- 113# File types that tarfile supports: 114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, 115 SYMTYPE, DIRTYPE, FIFOTYPE, 116 CONTTYPE, CHRTYPE, BLKTYPE, 117 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 118 GNUTYPE_SPARSE) 119 120# File types that will be treated as a regular file. 121REGULAR_TYPES = (REGTYPE, AREGTYPE, 122 CONTTYPE, GNUTYPE_SPARSE) 123 124# File types that are part of the GNU tar format. 125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 126 GNUTYPE_SPARSE) 127 128# Fields from a pax header that override a TarInfo attribute. 129PAX_FIELDS = ("path", "linkpath", "size", "mtime", 130 "uid", "gid", "uname", "gname") 131 132# Fields from a pax header that are affected by hdrcharset. 133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"} 134 135# Fields in a pax header that are numbers, all other fields 136# are treated as strings. 137PAX_NUMBER_FIELDS = { 138 "atime": float, 139 "ctime": float, 140 "mtime": float, 141 "uid": int, 142 "gid": int, 143 "size": int 144} 145 146#--------------------------------------------------------- 147# initialization 148#--------------------------------------------------------- 149if os.name == "nt": 150 ENCODING = "utf-8" 151else: 152 ENCODING = sys.getfilesystemencoding() 153 154#--------------------------------------------------------- 155# Some useful functions 156#--------------------------------------------------------- 157 158def stn(s, length, encoding, errors): 159 """Convert a string to a null-terminated bytes object. 160 """ 161 if s is None: 162 raise ValueError("metadata cannot contain None") 163 s = s.encode(encoding, errors) 164 return s[:length] + (length - len(s)) * NUL 165 166def nts(s, encoding, errors): 167 """Convert a null-terminated bytes object to a string. 168 """ 169 p = s.find(b"\0") 170 if p != -1: 171 s = s[:p] 172 return s.decode(encoding, errors) 173 174def nti(s): 175 """Convert a number field to a python number. 176 """ 177 # There are two possible encodings for a number field, see 178 # itn() below. 179 if s[0] in (0o200, 0o377): 180 n = 0 181 for i in range(len(s) - 1): 182 n <<= 8 183 n += s[i + 1] 184 if s[0] == 0o377: 185 n = -(256 ** (len(s) - 1) - n) 186 else: 187 try: 188 s = nts(s, "ascii", "strict") 189 n = int(s.strip() or "0", 8) 190 except ValueError: 191 raise InvalidHeaderError("invalid header") 192 return n 193 194def itn(n, digits=8, format=DEFAULT_FORMAT): 195 """Convert a python number to a number field. 196 """ 197 # POSIX 1003.1-1988 requires numbers to be encoded as a string of 198 # octal digits followed by a null-byte, this allows values up to 199 # (8**(digits-1))-1. GNU tar allows storing numbers greater than 200 # that if necessary. A leading 0o200 or 0o377 byte indicate this 201 # particular encoding, the following digits-1 bytes are a big-endian 202 # base-256 representation. This allows values up to (256**(digits-1))-1. 203 # A 0o200 byte indicates a positive number, a 0o377 byte a negative 204 # number. 205 original_n = n 206 n = int(n) 207 if 0 <= n < 8 ** (digits - 1): 208 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL 209 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1): 210 if n >= 0: 211 s = bytearray([0o200]) 212 else: 213 s = bytearray([0o377]) 214 n = 256 ** digits + n 215 216 for i in range(digits - 1): 217 s.insert(1, n & 0o377) 218 n >>= 8 219 else: 220 raise ValueError("overflow in number field") 221 222 return s 223 224def calc_chksums(buf): 225 """Calculate the checksum for a member's header by summing up all 226 characters except for the chksum field which is treated as if 227 it was filled with spaces. According to the GNU tar sources, 228 some tars (Sun and NeXT) calculate chksum with signed char, 229 which will be different if there are chars in the buffer with 230 the high bit set. So we calculate two checksums, unsigned and 231 signed. 232 """ 233 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf)) 234 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf)) 235 return unsigned_chksum, signed_chksum 236 237def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None): 238 """Copy length bytes from fileobj src to fileobj dst. 239 If length is None, copy the entire content. 240 """ 241 bufsize = bufsize or 16 * 1024 242 if length == 0: 243 return 244 if length is None: 245 shutil.copyfileobj(src, dst, bufsize) 246 return 247 248 blocks, remainder = divmod(length, bufsize) 249 for b in range(blocks): 250 buf = src.read(bufsize) 251 if len(buf) < bufsize: 252 raise exception("unexpected end of data") 253 dst.write(buf) 254 255 if remainder != 0: 256 buf = src.read(remainder) 257 if len(buf) < remainder: 258 raise exception("unexpected end of data") 259 dst.write(buf) 260 return 261 262def _safe_print(s): 263 encoding = getattr(sys.stdout, 'encoding', None) 264 if encoding is not None: 265 s = s.encode(encoding, 'backslashreplace').decode(encoding) 266 print(s, end=' ') 267 268 269class TarError(Exception): 270 """Base exception.""" 271 pass 272class ExtractError(TarError): 273 """General exception for extract errors.""" 274 pass 275class ReadError(TarError): 276 """Exception for unreadable tar archives.""" 277 pass 278class CompressionError(TarError): 279 """Exception for unavailable compression methods.""" 280 pass 281class StreamError(TarError): 282 """Exception for unsupported operations on stream-like TarFiles.""" 283 pass 284class HeaderError(TarError): 285 """Base exception for header errors.""" 286 pass 287class EmptyHeaderError(HeaderError): 288 """Exception for empty headers.""" 289 pass 290class TruncatedHeaderError(HeaderError): 291 """Exception for truncated headers.""" 292 pass 293class EOFHeaderError(HeaderError): 294 """Exception for end of file headers.""" 295 pass 296class InvalidHeaderError(HeaderError): 297 """Exception for invalid headers.""" 298 pass 299class SubsequentHeaderError(HeaderError): 300 """Exception for missing and invalid extended headers.""" 301 pass 302 303#--------------------------- 304# internal stream interface 305#--------------------------- 306class _LowLevelFile: 307 """Low-level file object. Supports reading and writing. 308 It is used instead of a regular file object for streaming 309 access. 310 """ 311 312 def __init__(self, name, mode): 313 mode = { 314 "r": os.O_RDONLY, 315 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 316 }[mode] 317 if hasattr(os, "O_BINARY"): 318 mode |= os.O_BINARY 319 self.fd = os.open(name, mode, 0o666) 320 321 def close(self): 322 os.close(self.fd) 323 324 def read(self, size): 325 return os.read(self.fd, size) 326 327 def write(self, s): 328 os.write(self.fd, s) 329 330class _Stream: 331 """Class that serves as an adapter between TarFile and 332 a stream-like object. The stream-like object only 333 needs to have a read() or write() method that works with bytes, 334 and the method is accessed blockwise. 335 Use of gzip or bzip2 compression is possible. 336 A stream-like object could be for example: sys.stdin.buffer, 337 sys.stdout.buffer, a socket, a tape device etc. 338 339 _Stream is intended to be used only internally. 340 """ 341 342 def __init__(self, name, mode, comptype, fileobj, bufsize, 343 compresslevel): 344 """Construct a _Stream object. 345 """ 346 self._extfileobj = True 347 if fileobj is None: 348 fileobj = _LowLevelFile(name, mode) 349 self._extfileobj = False 350 351 if comptype == '*': 352 # Enable transparent compression detection for the 353 # stream interface 354 fileobj = _StreamProxy(fileobj) 355 comptype = fileobj.getcomptype() 356 357 self.name = name or "" 358 self.mode = mode 359 self.comptype = comptype 360 self.fileobj = fileobj 361 self.bufsize = bufsize 362 self.buf = b"" 363 self.pos = 0 364 self.closed = False 365 366 try: 367 if comptype == "gz": 368 try: 369 import zlib 370 except ImportError: 371 raise CompressionError("zlib module is not available") from None 372 self.zlib = zlib 373 self.crc = zlib.crc32(b"") 374 if mode == "r": 375 self.exception = zlib.error 376 self._init_read_gz() 377 else: 378 self._init_write_gz(compresslevel) 379 380 elif comptype == "bz2": 381 try: 382 import bz2 383 except ImportError: 384 raise CompressionError("bz2 module is not available") from None 385 if mode == "r": 386 self.dbuf = b"" 387 self.cmp = bz2.BZ2Decompressor() 388 self.exception = OSError 389 else: 390 self.cmp = bz2.BZ2Compressor(compresslevel) 391 392 elif comptype == "xz": 393 try: 394 import lzma 395 except ImportError: 396 raise CompressionError("lzma module is not available") from None 397 if mode == "r": 398 self.dbuf = b"" 399 self.cmp = lzma.LZMADecompressor() 400 self.exception = lzma.LZMAError 401 else: 402 self.cmp = lzma.LZMACompressor() 403 404 elif comptype != "tar": 405 raise CompressionError("unknown compression type %r" % comptype) 406 407 except: 408 if not self._extfileobj: 409 self.fileobj.close() 410 self.closed = True 411 raise 412 413 def __del__(self): 414 if hasattr(self, "closed") and not self.closed: 415 self.close() 416 417 def _init_write_gz(self, compresslevel): 418 """Initialize for writing with gzip compression. 419 """ 420 self.cmp = self.zlib.compressobj(compresslevel, 421 self.zlib.DEFLATED, 422 -self.zlib.MAX_WBITS, 423 self.zlib.DEF_MEM_LEVEL, 424 0) 425 timestamp = struct.pack("<L", int(time.time())) 426 self.__write(b"\037\213\010\010" + timestamp + b"\002\377") 427 if self.name.endswith(".gz"): 428 self.name = self.name[:-3] 429 # Honor "directory components removed" from RFC1952 430 self.name = os.path.basename(self.name) 431 # RFC1952 says we must use ISO-8859-1 for the FNAME field. 432 self.__write(self.name.encode("iso-8859-1", "replace") + NUL) 433 434 def write(self, s): 435 """Write string s to the stream. 436 """ 437 if self.comptype == "gz": 438 self.crc = self.zlib.crc32(s, self.crc) 439 self.pos += len(s) 440 if self.comptype != "tar": 441 s = self.cmp.compress(s) 442 self.__write(s) 443 444 def __write(self, s): 445 """Write string s to the stream if a whole new block 446 is ready to be written. 447 """ 448 self.buf += s 449 while len(self.buf) > self.bufsize: 450 self.fileobj.write(self.buf[:self.bufsize]) 451 self.buf = self.buf[self.bufsize:] 452 453 def close(self): 454 """Close the _Stream object. No operation should be 455 done on it afterwards. 456 """ 457 if self.closed: 458 return 459 460 self.closed = True 461 try: 462 if self.mode == "w" and self.comptype != "tar": 463 self.buf += self.cmp.flush() 464 465 if self.mode == "w" and self.buf: 466 self.fileobj.write(self.buf) 467 self.buf = b"" 468 if self.comptype == "gz": 469 self.fileobj.write(struct.pack("<L", self.crc)) 470 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF)) 471 finally: 472 if not self._extfileobj: 473 self.fileobj.close() 474 475 def _init_read_gz(self): 476 """Initialize for reading a gzip compressed fileobj. 477 """ 478 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS) 479 self.dbuf = b"" 480 481 # taken from gzip.GzipFile with some alterations 482 if self.__read(2) != b"\037\213": 483 raise ReadError("not a gzip file") 484 if self.__read(1) != b"\010": 485 raise CompressionError("unsupported compression method") 486 487 flag = ord(self.__read(1)) 488 self.__read(6) 489 490 if flag & 4: 491 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1)) 492 self.read(xlen) 493 if flag & 8: 494 while True: 495 s = self.__read(1) 496 if not s or s == NUL: 497 break 498 if flag & 16: 499 while True: 500 s = self.__read(1) 501 if not s or s == NUL: 502 break 503 if flag & 2: 504 self.__read(2) 505 506 def tell(self): 507 """Return the stream's file pointer position. 508 """ 509 return self.pos 510 511 def seek(self, pos=0): 512 """Set the stream's file pointer to pos. Negative seeking 513 is forbidden. 514 """ 515 if pos - self.pos >= 0: 516 blocks, remainder = divmod(pos - self.pos, self.bufsize) 517 for i in range(blocks): 518 self.read(self.bufsize) 519 self.read(remainder) 520 else: 521 raise StreamError("seeking backwards is not allowed") 522 return self.pos 523 524 def read(self, size): 525 """Return the next size number of bytes from the stream.""" 526 assert size is not None 527 buf = self._read(size) 528 self.pos += len(buf) 529 return buf 530 531 def _read(self, size): 532 """Return size bytes from the stream. 533 """ 534 if self.comptype == "tar": 535 return self.__read(size) 536 537 c = len(self.dbuf) 538 t = [self.dbuf] 539 while c < size: 540 # Skip underlying buffer to avoid unaligned double buffering. 541 if self.buf: 542 buf = self.buf 543 self.buf = b"" 544 else: 545 buf = self.fileobj.read(self.bufsize) 546 if not buf: 547 break 548 try: 549 buf = self.cmp.decompress(buf) 550 except self.exception as e: 551 raise ReadError("invalid compressed data") from e 552 t.append(buf) 553 c += len(buf) 554 t = b"".join(t) 555 self.dbuf = t[size:] 556 return t[:size] 557 558 def __read(self, size): 559 """Return size bytes from stream. If internal buffer is empty, 560 read another block from the stream. 561 """ 562 c = len(self.buf) 563 t = [self.buf] 564 while c < size: 565 buf = self.fileobj.read(self.bufsize) 566 if not buf: 567 break 568 t.append(buf) 569 c += len(buf) 570 t = b"".join(t) 571 self.buf = t[size:] 572 return t[:size] 573# class _Stream 574 575class _StreamProxy(object): 576 """Small proxy class that enables transparent compression 577 detection for the Stream interface (mode 'r|*'). 578 """ 579 580 def __init__(self, fileobj): 581 self.fileobj = fileobj 582 self.buf = self.fileobj.read(BLOCKSIZE) 583 584 def read(self, size): 585 self.read = self.fileobj.read 586 return self.buf 587 588 def getcomptype(self): 589 if self.buf.startswith(b"\x1f\x8b\x08"): 590 return "gz" 591 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY": 592 return "bz2" 593 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")): 594 return "xz" 595 else: 596 return "tar" 597 598 def close(self): 599 self.fileobj.close() 600# class StreamProxy 601 602#------------------------ 603# Extraction file object 604#------------------------ 605class _FileInFile(object): 606 """A thin wrapper around an existing file object that 607 provides a part of its data as an individual file 608 object. 609 """ 610 611 def __init__(self, fileobj, offset, size, name, blockinfo=None): 612 self.fileobj = fileobj 613 self.offset = offset 614 self.size = size 615 self.position = 0 616 self.name = name 617 self.closed = False 618 619 if blockinfo is None: 620 blockinfo = [(0, size)] 621 622 # Construct a map with data and zero blocks. 623 self.map_index = 0 624 self.map = [] 625 lastpos = 0 626 realpos = self.offset 627 for offset, size in blockinfo: 628 if offset > lastpos: 629 self.map.append((False, lastpos, offset, None)) 630 self.map.append((True, offset, offset + size, realpos)) 631 realpos += size 632 lastpos = offset + size 633 if lastpos < self.size: 634 self.map.append((False, lastpos, self.size, None)) 635 636 def flush(self): 637 pass 638 639 @property 640 def mode(self): 641 return 'rb' 642 643 def readable(self): 644 return True 645 646 def writable(self): 647 return False 648 649 def seekable(self): 650 return self.fileobj.seekable() 651 652 def tell(self): 653 """Return the current file position. 654 """ 655 return self.position 656 657 def seek(self, position, whence=io.SEEK_SET): 658 """Seek to a position in the file. 659 """ 660 if whence == io.SEEK_SET: 661 self.position = min(max(position, 0), self.size) 662 elif whence == io.SEEK_CUR: 663 if position < 0: 664 self.position = max(self.position + position, 0) 665 else: 666 self.position = min(self.position + position, self.size) 667 elif whence == io.SEEK_END: 668 self.position = max(min(self.size + position, self.size), 0) 669 else: 670 raise ValueError("Invalid argument") 671 return self.position 672 673 def read(self, size=None): 674 """Read data from the file. 675 """ 676 if size is None: 677 size = self.size - self.position 678 else: 679 size = min(size, self.size - self.position) 680 681 buf = b"" 682 while size > 0: 683 while True: 684 data, start, stop, offset = self.map[self.map_index] 685 if start <= self.position < stop: 686 break 687 else: 688 self.map_index += 1 689 if self.map_index == len(self.map): 690 self.map_index = 0 691 length = min(size, stop - self.position) 692 if data: 693 self.fileobj.seek(offset + (self.position - start)) 694 b = self.fileobj.read(length) 695 if len(b) != length: 696 raise ReadError("unexpected end of data") 697 buf += b 698 else: 699 buf += NUL * length 700 size -= length 701 self.position += length 702 return buf 703 704 def readinto(self, b): 705 buf = self.read(len(b)) 706 b[:len(buf)] = buf 707 return len(buf) 708 709 def close(self): 710 self.closed = True 711#class _FileInFile 712 713class ExFileObject(io.BufferedReader): 714 715 def __init__(self, tarfile, tarinfo): 716 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data, 717 tarinfo.size, tarinfo.name, tarinfo.sparse) 718 super().__init__(fileobj) 719#class ExFileObject 720 721 722#----------------------------- 723# extraction filters (PEP 706) 724#----------------------------- 725 726class FilterError(TarError): 727 pass 728 729class AbsolutePathError(FilterError): 730 def __init__(self, tarinfo): 731 self.tarinfo = tarinfo 732 super().__init__(f'member {tarinfo.name!r} has an absolute path') 733 734class OutsideDestinationError(FilterError): 735 def __init__(self, tarinfo, path): 736 self.tarinfo = tarinfo 737 self._path = path 738 super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, ' 739 + 'which is outside the destination') 740 741class SpecialFileError(FilterError): 742 def __init__(self, tarinfo): 743 self.tarinfo = tarinfo 744 super().__init__(f'{tarinfo.name!r} is a special file') 745 746class AbsoluteLinkError(FilterError): 747 def __init__(self, tarinfo): 748 self.tarinfo = tarinfo 749 super().__init__(f'{tarinfo.name!r} is a link to an absolute path') 750 751class LinkOutsideDestinationError(FilterError): 752 def __init__(self, tarinfo, path): 753 self.tarinfo = tarinfo 754 self._path = path 755 super().__init__(f'{tarinfo.name!r} would link to {path!r}, ' 756 + 'which is outside the destination') 757 758def _get_filtered_attrs(member, dest_path, for_data=True): 759 new_attrs = {} 760 name = member.name 761 dest_path = os.path.realpath(dest_path) 762 # Strip leading / (tar's directory separator) from filenames. 763 # Include os.sep (target OS directory separator) as well. 764 if name.startswith(('/', os.sep)): 765 name = new_attrs['name'] = member.path.lstrip('/' + os.sep) 766 if os.path.isabs(name): 767 # Path is absolute even after stripping. 768 # For example, 'C:/foo' on Windows. 769 raise AbsolutePathError(member) 770 # Ensure we stay in the destination 771 target_path = os.path.realpath(os.path.join(dest_path, name)) 772 if os.path.commonpath([target_path, dest_path]) != dest_path: 773 raise OutsideDestinationError(member, target_path) 774 # Limit permissions (no high bits, and go-w) 775 mode = member.mode 776 if mode is not None: 777 # Strip high bits & group/other write bits 778 mode = mode & 0o755 779 if for_data: 780 # For data, handle permissions & file types 781 if member.isreg() or member.islnk(): 782 if not mode & 0o100: 783 # Clear executable bits if not executable by user 784 mode &= ~0o111 785 # Ensure owner can read & write 786 mode |= 0o600 787 elif member.isdir() or member.issym(): 788 # Ignore mode for directories & symlinks 789 mode = None 790 else: 791 # Reject special files 792 raise SpecialFileError(member) 793 if mode != member.mode: 794 new_attrs['mode'] = mode 795 if for_data: 796 # Ignore ownership for 'data' 797 if member.uid is not None: 798 new_attrs['uid'] = None 799 if member.gid is not None: 800 new_attrs['gid'] = None 801 if member.uname is not None: 802 new_attrs['uname'] = None 803 if member.gname is not None: 804 new_attrs['gname'] = None 805 # Check link destination for 'data' 806 if member.islnk() or member.issym(): 807 if os.path.isabs(member.linkname): 808 raise AbsoluteLinkError(member) 809 if member.issym(): 810 target_path = os.path.join(dest_path, 811 os.path.dirname(name), 812 member.linkname) 813 else: 814 target_path = os.path.join(dest_path, 815 member.linkname) 816 target_path = os.path.realpath(target_path) 817 if os.path.commonpath([target_path, dest_path]) != dest_path: 818 raise LinkOutsideDestinationError(member, target_path) 819 return new_attrs 820 821def fully_trusted_filter(member, dest_path): 822 return member 823 824def tar_filter(member, dest_path): 825 new_attrs = _get_filtered_attrs(member, dest_path, False) 826 if new_attrs: 827 return member.replace(**new_attrs, deep=False) 828 return member 829 830def data_filter(member, dest_path): 831 new_attrs = _get_filtered_attrs(member, dest_path, True) 832 if new_attrs: 833 return member.replace(**new_attrs, deep=False) 834 return member 835 836_NAMED_FILTERS = { 837 "fully_trusted": fully_trusted_filter, 838 "tar": tar_filter, 839 "data": data_filter, 840} 841 842#------------------ 843# Exported Classes 844#------------------ 845 846# Sentinel for replace() defaults, meaning "don't change the attribute" 847_KEEP = object() 848 849# Header length is digits followed by a space. 850_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ") 851 852class TarInfo(object): 853 """Informational class which holds the details about an 854 archive member given by a tar header block. 855 TarInfo objects are returned by TarFile.getmember(), 856 TarFile.getmembers() and TarFile.gettarinfo() and are 857 usually created internally. 858 """ 859 860 __slots__ = dict( 861 name = 'Name of the archive member.', 862 mode = 'Permission bits.', 863 uid = 'User ID of the user who originally stored this member.', 864 gid = 'Group ID of the user who originally stored this member.', 865 size = 'Size in bytes.', 866 mtime = 'Time of last modification.', 867 chksum = 'Header checksum.', 868 type = ('File type. type is usually one of these constants: ' 869 'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, ' 870 'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'), 871 linkname = ('Name of the target file name, which is only present ' 872 'in TarInfo objects of type LNKTYPE and SYMTYPE.'), 873 uname = 'User name.', 874 gname = 'Group name.', 875 devmajor = 'Device major number.', 876 devminor = 'Device minor number.', 877 offset = 'The tar header starts here.', 878 offset_data = "The file's data starts here.", 879 pax_headers = ('A dictionary containing key-value pairs of an ' 880 'associated pax extended header.'), 881 sparse = 'Sparse member information.', 882 _tarfile = None, 883 _sparse_structs = None, 884 _link_target = None, 885 ) 886 887 def __init__(self, name=""): 888 """Construct a TarInfo object. name is the optional name 889 of the member. 890 """ 891 self.name = name # member name 892 self.mode = 0o644 # file permissions 893 self.uid = 0 # user id 894 self.gid = 0 # group id 895 self.size = 0 # file size 896 self.mtime = 0 # modification time 897 self.chksum = 0 # header checksum 898 self.type = REGTYPE # member type 899 self.linkname = "" # link name 900 self.uname = "" # user name 901 self.gname = "" # group name 902 self.devmajor = 0 # device major number 903 self.devminor = 0 # device minor number 904 905 self.offset = 0 # the tar header starts here 906 self.offset_data = 0 # the file's data starts here 907 908 self.sparse = None # sparse member information 909 self.pax_headers = {} # pax header information 910 911 @property 912 def tarfile(self): 913 import warnings 914 warnings.warn( 915 'The undocumented "tarfile" attribute of TarInfo objects ' 916 + 'is deprecated and will be removed in Python 3.16', 917 DeprecationWarning, stacklevel=2) 918 return self._tarfile 919 920 @tarfile.setter 921 def tarfile(self, tarfile): 922 import warnings 923 warnings.warn( 924 'The undocumented "tarfile" attribute of TarInfo objects ' 925 + 'is deprecated and will be removed in Python 3.16', 926 DeprecationWarning, stacklevel=2) 927 self._tarfile = tarfile 928 929 @property 930 def path(self): 931 'In pax headers, "name" is called "path".' 932 return self.name 933 934 @path.setter 935 def path(self, name): 936 self.name = name 937 938 @property 939 def linkpath(self): 940 'In pax headers, "linkname" is called "linkpath".' 941 return self.linkname 942 943 @linkpath.setter 944 def linkpath(self, linkname): 945 self.linkname = linkname 946 947 def __repr__(self): 948 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) 949 950 def replace(self, *, 951 name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP, 952 uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP, 953 deep=True, _KEEP=_KEEP): 954 """Return a deep copy of self with the given attributes replaced. 955 """ 956 if deep: 957 result = copy.deepcopy(self) 958 else: 959 result = copy.copy(self) 960 if name is not _KEEP: 961 result.name = name 962 if mtime is not _KEEP: 963 result.mtime = mtime 964 if mode is not _KEEP: 965 result.mode = mode 966 if linkname is not _KEEP: 967 result.linkname = linkname 968 if uid is not _KEEP: 969 result.uid = uid 970 if gid is not _KEEP: 971 result.gid = gid 972 if uname is not _KEEP: 973 result.uname = uname 974 if gname is not _KEEP: 975 result.gname = gname 976 return result 977 978 def get_info(self): 979 """Return the TarInfo's attributes as a dictionary. 980 """ 981 if self.mode is None: 982 mode = None 983 else: 984 mode = self.mode & 0o7777 985 info = { 986 "name": self.name, 987 "mode": mode, 988 "uid": self.uid, 989 "gid": self.gid, 990 "size": self.size, 991 "mtime": self.mtime, 992 "chksum": self.chksum, 993 "type": self.type, 994 "linkname": self.linkname, 995 "uname": self.uname, 996 "gname": self.gname, 997 "devmajor": self.devmajor, 998 "devminor": self.devminor 999 } 1000 1001 if info["type"] == DIRTYPE and not info["name"].endswith("/"): 1002 info["name"] += "/" 1003 1004 return info 1005 1006 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"): 1007 """Return a tar header as a string of 512 byte blocks. 1008 """ 1009 info = self.get_info() 1010 for name, value in info.items(): 1011 if value is None: 1012 raise ValueError("%s may not be None" % name) 1013 1014 if format == USTAR_FORMAT: 1015 return self.create_ustar_header(info, encoding, errors) 1016 elif format == GNU_FORMAT: 1017 return self.create_gnu_header(info, encoding, errors) 1018 elif format == PAX_FORMAT: 1019 return self.create_pax_header(info, encoding) 1020 else: 1021 raise ValueError("invalid format") 1022 1023 def create_ustar_header(self, info, encoding, errors): 1024 """Return the object as a ustar header block. 1025 """ 1026 info["magic"] = POSIX_MAGIC 1027 1028 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: 1029 raise ValueError("linkname is too long") 1030 1031 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: 1032 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors) 1033 1034 return self._create_header(info, USTAR_FORMAT, encoding, errors) 1035 1036 def create_gnu_header(self, info, encoding, errors): 1037 """Return the object as a GNU header block sequence. 1038 """ 1039 info["magic"] = GNU_MAGIC 1040 1041 buf = b"" 1042 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: 1043 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) 1044 1045 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: 1046 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) 1047 1048 return buf + self._create_header(info, GNU_FORMAT, encoding, errors) 1049 1050 def create_pax_header(self, info, encoding): 1051 """Return the object as a ustar header block. If it cannot be 1052 represented this way, prepend a pax extended header sequence 1053 with supplement information. 1054 """ 1055 info["magic"] = POSIX_MAGIC 1056 pax_headers = self.pax_headers.copy() 1057 1058 # Test string fields for values that exceed the field length or cannot 1059 # be represented in ASCII encoding. 1060 for name, hname, length in ( 1061 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), 1062 ("uname", "uname", 32), ("gname", "gname", 32)): 1063 1064 if hname in pax_headers: 1065 # The pax header has priority. 1066 continue 1067 1068 # Try to encode the string as ASCII. 1069 try: 1070 info[name].encode("ascii", "strict") 1071 except UnicodeEncodeError: 1072 pax_headers[hname] = info[name] 1073 continue 1074 1075 if len(info[name]) > length: 1076 pax_headers[hname] = info[name] 1077 1078 # Test number fields for values that exceed the field limit or values 1079 # that like to be stored as float. 1080 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): 1081 needs_pax = False 1082 1083 val = info[name] 1084 val_is_float = isinstance(val, float) 1085 val_int = round(val) if val_is_float else val 1086 if not 0 <= val_int < 8 ** (digits - 1): 1087 # Avoid overflow. 1088 info[name] = 0 1089 needs_pax = True 1090 elif val_is_float: 1091 # Put rounded value in ustar header, and full 1092 # precision value in pax header. 1093 info[name] = val_int 1094 needs_pax = True 1095 1096 # The existing pax header has priority. 1097 if needs_pax and name not in pax_headers: 1098 pax_headers[name] = str(val) 1099 1100 # Create a pax extended header if necessary. 1101 if pax_headers: 1102 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding) 1103 else: 1104 buf = b"" 1105 1106 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace") 1107 1108 @classmethod 1109 def create_pax_global_header(cls, pax_headers): 1110 """Return the object as a pax global header block sequence. 1111 """ 1112 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8") 1113 1114 def _posix_split_name(self, name, encoding, errors): 1115 """Split a name longer than 100 chars into a prefix 1116 and a name part. 1117 """ 1118 components = name.split("/") 1119 for i in range(1, len(components)): 1120 prefix = "/".join(components[:i]) 1121 name = "/".join(components[i:]) 1122 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \ 1123 len(name.encode(encoding, errors)) <= LENGTH_NAME: 1124 break 1125 else: 1126 raise ValueError("name is too long") 1127 1128 return prefix, name 1129 1130 @staticmethod 1131 def _create_header(info, format, encoding, errors): 1132 """Return a header block. info is a dictionary with file 1133 information, format must be one of the *_FORMAT constants. 1134 """ 1135 has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE) 1136 if has_device_fields: 1137 devmajor = itn(info.get("devmajor", 0), 8, format) 1138 devminor = itn(info.get("devminor", 0), 8, format) 1139 else: 1140 devmajor = stn("", 8, encoding, errors) 1141 devminor = stn("", 8, encoding, errors) 1142 1143 # None values in metadata should cause ValueError. 1144 # itn()/stn() do this for all fields except type. 1145 filetype = info.get("type", REGTYPE) 1146 if filetype is None: 1147 raise ValueError("TarInfo.type must not be None") 1148 1149 parts = [ 1150 stn(info.get("name", ""), 100, encoding, errors), 1151 itn(info.get("mode", 0) & 0o7777, 8, format), 1152 itn(info.get("uid", 0), 8, format), 1153 itn(info.get("gid", 0), 8, format), 1154 itn(info.get("size", 0), 12, format), 1155 itn(info.get("mtime", 0), 12, format), 1156 b" ", # checksum field 1157 filetype, 1158 stn(info.get("linkname", ""), 100, encoding, errors), 1159 info.get("magic", POSIX_MAGIC), 1160 stn(info.get("uname", ""), 32, encoding, errors), 1161 stn(info.get("gname", ""), 32, encoding, errors), 1162 devmajor, 1163 devminor, 1164 stn(info.get("prefix", ""), 155, encoding, errors) 1165 ] 1166 1167 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts)) 1168 chksum = calc_chksums(buf[-BLOCKSIZE:])[0] 1169 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:] 1170 return buf 1171 1172 @staticmethod 1173 def _create_payload(payload): 1174 """Return the string payload filled with zero bytes 1175 up to the next 512 byte border. 1176 """ 1177 blocks, remainder = divmod(len(payload), BLOCKSIZE) 1178 if remainder > 0: 1179 payload += (BLOCKSIZE - remainder) * NUL 1180 return payload 1181 1182 @classmethod 1183 def _create_gnu_long_header(cls, name, type, encoding, errors): 1184 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence 1185 for name. 1186 """ 1187 name = name.encode(encoding, errors) + NUL 1188 1189 info = {} 1190 info["name"] = "././@LongLink" 1191 info["type"] = type 1192 info["size"] = len(name) 1193 info["magic"] = GNU_MAGIC 1194 1195 # create extended header + name blocks. 1196 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \ 1197 cls._create_payload(name) 1198 1199 @classmethod 1200 def _create_pax_generic_header(cls, pax_headers, type, encoding): 1201 """Return a POSIX.1-2008 extended or global header sequence 1202 that contains a list of keyword, value pairs. The values 1203 must be strings. 1204 """ 1205 # Check if one of the fields contains surrogate characters and thereby 1206 # forces hdrcharset=BINARY, see _proc_pax() for more information. 1207 binary = False 1208 for keyword, value in pax_headers.items(): 1209 try: 1210 value.encode("utf-8", "strict") 1211 except UnicodeEncodeError: 1212 binary = True 1213 break 1214 1215 records = b"" 1216 if binary: 1217 # Put the hdrcharset field at the beginning of the header. 1218 records += b"21 hdrcharset=BINARY\n" 1219 1220 for keyword, value in pax_headers.items(): 1221 keyword = keyword.encode("utf-8") 1222 if binary: 1223 # Try to restore the original byte representation of `value'. 1224 # Needless to say, that the encoding must match the string. 1225 value = value.encode(encoding, "surrogateescape") 1226 else: 1227 value = value.encode("utf-8") 1228 1229 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' 1230 n = p = 0 1231 while True: 1232 n = l + len(str(p)) 1233 if n == p: 1234 break 1235 p = n 1236 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n" 1237 1238 # We use a hardcoded "././@PaxHeader" name like star does 1239 # instead of the one that POSIX recommends. 1240 info = {} 1241 info["name"] = "././@PaxHeader" 1242 info["type"] = type 1243 info["size"] = len(records) 1244 info["magic"] = POSIX_MAGIC 1245 1246 # Create pax header + record blocks. 1247 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \ 1248 cls._create_payload(records) 1249 1250 @classmethod 1251 def frombuf(cls, buf, encoding, errors): 1252 """Construct a TarInfo object from a 512 byte bytes object. 1253 """ 1254 if len(buf) == 0: 1255 raise EmptyHeaderError("empty header") 1256 if len(buf) != BLOCKSIZE: 1257 raise TruncatedHeaderError("truncated header") 1258 if buf.count(NUL) == BLOCKSIZE: 1259 raise EOFHeaderError("end of file header") 1260 1261 chksum = nti(buf[148:156]) 1262 if chksum not in calc_chksums(buf): 1263 raise InvalidHeaderError("bad checksum") 1264 1265 obj = cls() 1266 obj.name = nts(buf[0:100], encoding, errors) 1267 obj.mode = nti(buf[100:108]) 1268 obj.uid = nti(buf[108:116]) 1269 obj.gid = nti(buf[116:124]) 1270 obj.size = nti(buf[124:136]) 1271 obj.mtime = nti(buf[136:148]) 1272 obj.chksum = chksum 1273 obj.type = buf[156:157] 1274 obj.linkname = nts(buf[157:257], encoding, errors) 1275 obj.uname = nts(buf[265:297], encoding, errors) 1276 obj.gname = nts(buf[297:329], encoding, errors) 1277 obj.devmajor = nti(buf[329:337]) 1278 obj.devminor = nti(buf[337:345]) 1279 prefix = nts(buf[345:500], encoding, errors) 1280 1281 # Old V7 tar format represents a directory as a regular 1282 # file with a trailing slash. 1283 if obj.type == AREGTYPE and obj.name.endswith("/"): 1284 obj.type = DIRTYPE 1285 1286 # The old GNU sparse format occupies some of the unused 1287 # space in the buffer for up to 4 sparse structures. 1288 # Save them for later processing in _proc_sparse(). 1289 if obj.type == GNUTYPE_SPARSE: 1290 pos = 386 1291 structs = [] 1292 for i in range(4): 1293 try: 1294 offset = nti(buf[pos:pos + 12]) 1295 numbytes = nti(buf[pos + 12:pos + 24]) 1296 except ValueError: 1297 break 1298 structs.append((offset, numbytes)) 1299 pos += 24 1300 isextended = bool(buf[482]) 1301 origsize = nti(buf[483:495]) 1302 obj._sparse_structs = (structs, isextended, origsize) 1303 1304 # Remove redundant slashes from directories. 1305 if obj.isdir(): 1306 obj.name = obj.name.rstrip("/") 1307 1308 # Reconstruct a ustar longname. 1309 if prefix and obj.type not in GNU_TYPES: 1310 obj.name = prefix + "/" + obj.name 1311 return obj 1312 1313 @classmethod 1314 def fromtarfile(cls, tarfile): 1315 """Return the next TarInfo object from TarFile object 1316 tarfile. 1317 """ 1318 buf = tarfile.fileobj.read(BLOCKSIZE) 1319 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors) 1320 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE 1321 return obj._proc_member(tarfile) 1322 1323 #-------------------------------------------------------------------------- 1324 # The following are methods that are called depending on the type of a 1325 # member. The entry point is _proc_member() which can be overridden in a 1326 # subclass to add custom _proc_*() methods. A _proc_*() method MUST 1327 # implement the following 1328 # operations: 1329 # 1. Set self.offset_data to the position where the data blocks begin, 1330 # if there is data that follows. 1331 # 2. Set tarfile.offset to the position where the next member's header will 1332 # begin. 1333 # 3. Return self or another valid TarInfo object. 1334 def _proc_member(self, tarfile): 1335 """Choose the right processing method depending on 1336 the type and call it. 1337 """ 1338 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): 1339 return self._proc_gnulong(tarfile) 1340 elif self.type == GNUTYPE_SPARSE: 1341 return self._proc_sparse(tarfile) 1342 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): 1343 return self._proc_pax(tarfile) 1344 else: 1345 return self._proc_builtin(tarfile) 1346 1347 def _proc_builtin(self, tarfile): 1348 """Process a builtin type or an unknown type which 1349 will be treated as a regular file. 1350 """ 1351 self.offset_data = tarfile.fileobj.tell() 1352 offset = self.offset_data 1353 if self.isreg() or self.type not in SUPPORTED_TYPES: 1354 # Skip the following data blocks. 1355 offset += self._block(self.size) 1356 tarfile.offset = offset 1357 1358 # Patch the TarInfo object with saved global 1359 # header information. 1360 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) 1361 1362 # Remove redundant slashes from directories. This is to be consistent 1363 # with frombuf(). 1364 if self.isdir(): 1365 self.name = self.name.rstrip("/") 1366 1367 return self 1368 1369 def _proc_gnulong(self, tarfile): 1370 """Process the blocks that hold a GNU longname 1371 or longlink member. 1372 """ 1373 buf = tarfile.fileobj.read(self._block(self.size)) 1374 1375 # Fetch the next header and process it. 1376 try: 1377 next = self.fromtarfile(tarfile) 1378 except HeaderError as e: 1379 raise SubsequentHeaderError(str(e)) from None 1380 1381 # Patch the TarInfo object from the next header with 1382 # the longname information. 1383 next.offset = self.offset 1384 if self.type == GNUTYPE_LONGNAME: 1385 next.name = nts(buf, tarfile.encoding, tarfile.errors) 1386 elif self.type == GNUTYPE_LONGLINK: 1387 next.linkname = nts(buf, tarfile.encoding, tarfile.errors) 1388 1389 # Remove redundant slashes from directories. This is to be consistent 1390 # with frombuf(). 1391 if next.isdir(): 1392 next.name = next.name.removesuffix("/") 1393 1394 return next 1395 1396 def _proc_sparse(self, tarfile): 1397 """Process a GNU sparse header plus extra headers. 1398 """ 1399 # We already collected some sparse structures in frombuf(). 1400 structs, isextended, origsize = self._sparse_structs 1401 del self._sparse_structs 1402 1403 # Collect sparse structures from extended header blocks. 1404 while isextended: 1405 buf = tarfile.fileobj.read(BLOCKSIZE) 1406 pos = 0 1407 for i in range(21): 1408 try: 1409 offset = nti(buf[pos:pos + 12]) 1410 numbytes = nti(buf[pos + 12:pos + 24]) 1411 except ValueError: 1412 break 1413 if offset and numbytes: 1414 structs.append((offset, numbytes)) 1415 pos += 24 1416 isextended = bool(buf[504]) 1417 self.sparse = structs 1418 1419 self.offset_data = tarfile.fileobj.tell() 1420 tarfile.offset = self.offset_data + self._block(self.size) 1421 self.size = origsize 1422 return self 1423 1424 def _proc_pax(self, tarfile): 1425 """Process an extended or global header as described in 1426 POSIX.1-2008. 1427 """ 1428 # Read the header information. 1429 buf = tarfile.fileobj.read(self._block(self.size)) 1430 1431 # A pax header stores supplemental information for either 1432 # the following file (extended) or all following files 1433 # (global). 1434 if self.type == XGLTYPE: 1435 pax_headers = tarfile.pax_headers 1436 else: 1437 pax_headers = tarfile.pax_headers.copy() 1438 1439 # Parse pax header information. A record looks like that: 1440 # "%d %s=%s\n" % (length, keyword, value). length is the size 1441 # of the complete record including the length field itself and 1442 # the newline. 1443 pos = 0 1444 encoding = None 1445 raw_headers = [] 1446 while len(buf) > pos and buf[pos] != 0x00: 1447 if not (match := _header_length_prefix_re.match(buf, pos)): 1448 raise InvalidHeaderError("invalid header") 1449 try: 1450 length = int(match.group(1)) 1451 except ValueError: 1452 raise InvalidHeaderError("invalid header") 1453 # Headers must be at least 5 bytes, shortest being '5 x=\n'. 1454 # Value is allowed to be empty. 1455 if length < 5: 1456 raise InvalidHeaderError("invalid header") 1457 if pos + length > len(buf): 1458 raise InvalidHeaderError("invalid header") 1459 1460 header_value_end_offset = match.start(1) + length - 1 # Last byte of the header 1461 keyword_and_value = buf[match.end(1) + 1:header_value_end_offset] 1462 raw_keyword, equals, raw_value = keyword_and_value.partition(b"=") 1463 1464 # Check the framing of the header. The last character must be '\n' (0x0A) 1465 if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A: 1466 raise InvalidHeaderError("invalid header") 1467 raw_headers.append((length, raw_keyword, raw_value)) 1468 1469 # Check if the pax header contains a hdrcharset field. This tells us 1470 # the encoding of the path, linkpath, uname and gname fields. Normally, 1471 # these fields are UTF-8 encoded but since POSIX.1-2008 tar 1472 # implementations are allowed to store them as raw binary strings if 1473 # the translation to UTF-8 fails. For the time being, we don't care about 1474 # anything other than "BINARY". The only other value that is currently 1475 # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8. 1476 # Note that we only follow the initial 'hdrcharset' setting to preserve 1477 # the initial behavior of the 'tarfile' module. 1478 if raw_keyword == b"hdrcharset" and encoding is None: 1479 if raw_value == b"BINARY": 1480 encoding = tarfile.encoding 1481 else: # This branch ensures only the first 'hdrcharset' header is used. 1482 encoding = "utf-8" 1483 1484 pos += length 1485 1486 # If no explicit hdrcharset is set, we use UTF-8 as a default. 1487 if encoding is None: 1488 encoding = "utf-8" 1489 1490 # After parsing the raw headers we can decode them to text. 1491 for length, raw_keyword, raw_value in raw_headers: 1492 # Normally, we could just use "utf-8" as the encoding and "strict" 1493 # as the error handler, but we better not take the risk. For 1494 # example, GNU tar <= 1.23 is known to store filenames it cannot 1495 # translate to UTF-8 as raw strings (unfortunately without a 1496 # hdrcharset=BINARY header). 1497 # We first try the strict standard encoding, and if that fails we 1498 # fall back on the user's encoding and error handler. 1499 keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8", 1500 tarfile.errors) 1501 if keyword in PAX_NAME_FIELDS: 1502 value = self._decode_pax_field(raw_value, encoding, tarfile.encoding, 1503 tarfile.errors) 1504 else: 1505 value = self._decode_pax_field(raw_value, "utf-8", "utf-8", 1506 tarfile.errors) 1507 1508 pax_headers[keyword] = value 1509 1510 # Fetch the next header. 1511 try: 1512 next = self.fromtarfile(tarfile) 1513 except HeaderError as e: 1514 raise SubsequentHeaderError(str(e)) from None 1515 1516 # Process GNU sparse information. 1517 if "GNU.sparse.map" in pax_headers: 1518 # GNU extended sparse format version 0.1. 1519 self._proc_gnusparse_01(next, pax_headers) 1520 1521 elif "GNU.sparse.size" in pax_headers: 1522 # GNU extended sparse format version 0.0. 1523 self._proc_gnusparse_00(next, raw_headers) 1524 1525 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": 1526 # GNU extended sparse format version 1.0. 1527 self._proc_gnusparse_10(next, pax_headers, tarfile) 1528 1529 if self.type in (XHDTYPE, SOLARIS_XHDTYPE): 1530 # Patch the TarInfo object with the extended header info. 1531 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) 1532 next.offset = self.offset 1533 1534 if "size" in pax_headers: 1535 # If the extended header replaces the size field, 1536 # we need to recalculate the offset where the next 1537 # header starts. 1538 offset = next.offset_data 1539 if next.isreg() or next.type not in SUPPORTED_TYPES: 1540 offset += next._block(next.size) 1541 tarfile.offset = offset 1542 1543 return next 1544 1545 def _proc_gnusparse_00(self, next, raw_headers): 1546 """Process a GNU tar extended sparse header, version 0.0. 1547 """ 1548 offsets = [] 1549 numbytes = [] 1550 for _, keyword, value in raw_headers: 1551 if keyword == b"GNU.sparse.offset": 1552 try: 1553 offsets.append(int(value.decode())) 1554 except ValueError: 1555 raise InvalidHeaderError("invalid header") 1556 1557 elif keyword == b"GNU.sparse.numbytes": 1558 try: 1559 numbytes.append(int(value.decode())) 1560 except ValueError: 1561 raise InvalidHeaderError("invalid header") 1562 1563 next.sparse = list(zip(offsets, numbytes)) 1564 1565 def _proc_gnusparse_01(self, next, pax_headers): 1566 """Process a GNU tar extended sparse header, version 0.1. 1567 """ 1568 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")] 1569 next.sparse = list(zip(sparse[::2], sparse[1::2])) 1570 1571 def _proc_gnusparse_10(self, next, pax_headers, tarfile): 1572 """Process a GNU tar extended sparse header, version 1.0. 1573 """ 1574 fields = None 1575 sparse = [] 1576 buf = tarfile.fileobj.read(BLOCKSIZE) 1577 fields, buf = buf.split(b"\n", 1) 1578 fields = int(fields) 1579 while len(sparse) < fields * 2: 1580 if b"\n" not in buf: 1581 buf += tarfile.fileobj.read(BLOCKSIZE) 1582 number, buf = buf.split(b"\n", 1) 1583 sparse.append(int(number)) 1584 next.offset_data = tarfile.fileobj.tell() 1585 next.sparse = list(zip(sparse[::2], sparse[1::2])) 1586 1587 def _apply_pax_info(self, pax_headers, encoding, errors): 1588 """Replace fields with supplemental information from a previous 1589 pax extended or global header. 1590 """ 1591 for keyword, value in pax_headers.items(): 1592 if keyword == "GNU.sparse.name": 1593 setattr(self, "path", value) 1594 elif keyword == "GNU.sparse.size": 1595 setattr(self, "size", int(value)) 1596 elif keyword == "GNU.sparse.realsize": 1597 setattr(self, "size", int(value)) 1598 elif keyword in PAX_FIELDS: 1599 if keyword in PAX_NUMBER_FIELDS: 1600 try: 1601 value = PAX_NUMBER_FIELDS[keyword](value) 1602 except ValueError: 1603 value = 0 1604 if keyword == "path": 1605 value = value.rstrip("/") 1606 setattr(self, keyword, value) 1607 1608 self.pax_headers = pax_headers.copy() 1609 1610 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors): 1611 """Decode a single field from a pax record. 1612 """ 1613 try: 1614 return value.decode(encoding, "strict") 1615 except UnicodeDecodeError: 1616 return value.decode(fallback_encoding, fallback_errors) 1617 1618 def _block(self, count): 1619 """Round up a byte count by BLOCKSIZE and return it, 1620 e.g. _block(834) => 1024. 1621 """ 1622 blocks, remainder = divmod(count, BLOCKSIZE) 1623 if remainder: 1624 blocks += 1 1625 return blocks * BLOCKSIZE 1626 1627 def isreg(self): 1628 'Return True if the Tarinfo object is a regular file.' 1629 return self.type in REGULAR_TYPES 1630 1631 def isfile(self): 1632 'Return True if the Tarinfo object is a regular file.' 1633 return self.isreg() 1634 1635 def isdir(self): 1636 'Return True if it is a directory.' 1637 return self.type == DIRTYPE 1638 1639 def issym(self): 1640 'Return True if it is a symbolic link.' 1641 return self.type == SYMTYPE 1642 1643 def islnk(self): 1644 'Return True if it is a hard link.' 1645 return self.type == LNKTYPE 1646 1647 def ischr(self): 1648 'Return True if it is a character device.' 1649 return self.type == CHRTYPE 1650 1651 def isblk(self): 1652 'Return True if it is a block device.' 1653 return self.type == BLKTYPE 1654 1655 def isfifo(self): 1656 'Return True if it is a FIFO.' 1657 return self.type == FIFOTYPE 1658 1659 def issparse(self): 1660 return self.sparse is not None 1661 1662 def isdev(self): 1663 'Return True if it is one of character device, block device or FIFO.' 1664 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) 1665# class TarInfo 1666 1667class TarFile(object): 1668 """The TarFile Class provides an interface to tar archives. 1669 """ 1670 1671 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) 1672 1673 dereference = False # If true, add content of linked file to the 1674 # tar file, else the link. 1675 1676 ignore_zeros = False # If true, skips empty or invalid blocks and 1677 # continues processing. 1678 1679 errorlevel = 1 # If 0, fatal errors only appear in debug 1680 # messages (if debug >= 0). If > 0, errors 1681 # are passed to the caller as exceptions. 1682 1683 format = DEFAULT_FORMAT # The format to use when creating an archive. 1684 1685 encoding = ENCODING # Encoding for 8-bit character strings. 1686 1687 errors = None # Error handler for unicode conversion. 1688 1689 tarinfo = TarInfo # The default TarInfo class to use. 1690 1691 fileobject = ExFileObject # The file-object for extractfile(). 1692 1693 extraction_filter = None # The default filter for extraction. 1694 1695 def __init__(self, name=None, mode="r", fileobj=None, format=None, 1696 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, 1697 errors="surrogateescape", pax_headers=None, debug=None, 1698 errorlevel=None, copybufsize=None, stream=False): 1699 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to 1700 read from an existing archive, 'a' to append data to an existing 1701 file or 'w' to create a new file overwriting an existing one. `mode' 1702 defaults to 'r'. 1703 If `fileobj' is given, it is used for reading or writing data. If it 1704 can be determined, `mode' is overridden by `fileobj's mode. 1705 `fileobj' is not closed, when TarFile is closed. 1706 """ 1707 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"} 1708 if mode not in modes: 1709 raise ValueError("mode must be 'r', 'a', 'w' or 'x'") 1710 self.mode = mode 1711 self._mode = modes[mode] 1712 1713 if not fileobj: 1714 if self.mode == "a" and not os.path.exists(name): 1715 # Create nonexistent files in append mode. 1716 self.mode = "w" 1717 self._mode = "wb" 1718 fileobj = bltn_open(name, self._mode) 1719 self._extfileobj = False 1720 else: 1721 if (name is None and hasattr(fileobj, "name") and 1722 isinstance(fileobj.name, (str, bytes))): 1723 name = fileobj.name 1724 if hasattr(fileobj, "mode"): 1725 self._mode = fileobj.mode 1726 self._extfileobj = True 1727 self.name = os.path.abspath(name) if name else None 1728 self.fileobj = fileobj 1729 1730 self.stream = stream 1731 1732 # Init attributes. 1733 if format is not None: 1734 self.format = format 1735 if tarinfo is not None: 1736 self.tarinfo = tarinfo 1737 if dereference is not None: 1738 self.dereference = dereference 1739 if ignore_zeros is not None: 1740 self.ignore_zeros = ignore_zeros 1741 if encoding is not None: 1742 self.encoding = encoding 1743 self.errors = errors 1744 1745 if pax_headers is not None and self.format == PAX_FORMAT: 1746 self.pax_headers = pax_headers 1747 else: 1748 self.pax_headers = {} 1749 1750 if debug is not None: 1751 self.debug = debug 1752 if errorlevel is not None: 1753 self.errorlevel = errorlevel 1754 1755 # Init datastructures. 1756 self.copybufsize = copybufsize 1757 self.closed = False 1758 self.members = [] # list of members as TarInfo objects 1759 self._loaded = False # flag if all members have been read 1760 self.offset = self.fileobj.tell() 1761 # current position in the archive file 1762 self.inodes = {} # dictionary caching the inodes of 1763 # archive members already added 1764 1765 try: 1766 if self.mode == "r": 1767 self.firstmember = None 1768 self.firstmember = self.next() 1769 1770 if self.mode == "a": 1771 # Move to the end of the archive, 1772 # before the first empty block. 1773 while True: 1774 self.fileobj.seek(self.offset) 1775 try: 1776 tarinfo = self.tarinfo.fromtarfile(self) 1777 self.members.append(tarinfo) 1778 except EOFHeaderError: 1779 self.fileobj.seek(self.offset) 1780 break 1781 except HeaderError as e: 1782 raise ReadError(str(e)) from None 1783 1784 if self.mode in ("a", "w", "x"): 1785 self._loaded = True 1786 1787 if self.pax_headers: 1788 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) 1789 self.fileobj.write(buf) 1790 self.offset += len(buf) 1791 except: 1792 if not self._extfileobj: 1793 self.fileobj.close() 1794 self.closed = True 1795 raise 1796 1797 #-------------------------------------------------------------------------- 1798 # Below are the classmethods which act as alternate constructors to the 1799 # TarFile class. The open() method is the only one that is needed for 1800 # public use; it is the "super"-constructor and is able to select an 1801 # adequate "sub"-constructor for a particular compression using the mapping 1802 # from OPEN_METH. 1803 # 1804 # This concept allows one to subclass TarFile without losing the comfort of 1805 # the super-constructor. A sub-constructor is registered and made available 1806 # by adding it to the mapping in OPEN_METH. 1807 1808 @classmethod 1809 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): 1810 """Open a tar archive for reading, writing or appending. Return 1811 an appropriate TarFile class. 1812 1813 mode: 1814 'r' or 'r:*' open for reading with transparent compression 1815 'r:' open for reading exclusively uncompressed 1816 'r:gz' open for reading with gzip compression 1817 'r:bz2' open for reading with bzip2 compression 1818 'r:xz' open for reading with lzma compression 1819 'a' or 'a:' open for appending, creating the file if necessary 1820 'w' or 'w:' open for writing without compression 1821 'w:gz' open for writing with gzip compression 1822 'w:bz2' open for writing with bzip2 compression 1823 'w:xz' open for writing with lzma compression 1824 1825 'x' or 'x:' create a tarfile exclusively without compression, raise 1826 an exception if the file is already created 1827 'x:gz' create a gzip compressed tarfile, raise an exception 1828 if the file is already created 1829 'x:bz2' create a bzip2 compressed tarfile, raise an exception 1830 if the file is already created 1831 'x:xz' create an lzma compressed tarfile, raise an exception 1832 if the file is already created 1833 1834 'r|*' open a stream of tar blocks with transparent compression 1835 'r|' open an uncompressed stream of tar blocks for reading 1836 'r|gz' open a gzip compressed stream of tar blocks 1837 'r|bz2' open a bzip2 compressed stream of tar blocks 1838 'r|xz' open an lzma compressed stream of tar blocks 1839 'w|' open an uncompressed stream for writing 1840 'w|gz' open a gzip compressed stream for writing 1841 'w|bz2' open a bzip2 compressed stream for writing 1842 'w|xz' open an lzma compressed stream for writing 1843 """ 1844 1845 if not name and not fileobj: 1846 raise ValueError("nothing to open") 1847 1848 if mode in ("r", "r:*"): 1849 # Find out which *open() is appropriate for opening the file. 1850 def not_compressed(comptype): 1851 return cls.OPEN_METH[comptype] == 'taropen' 1852 error_msgs = [] 1853 for comptype in sorted(cls.OPEN_METH, key=not_compressed): 1854 func = getattr(cls, cls.OPEN_METH[comptype]) 1855 if fileobj is not None: 1856 saved_pos = fileobj.tell() 1857 try: 1858 return func(name, "r", fileobj, **kwargs) 1859 except (ReadError, CompressionError) as e: 1860 error_msgs.append(f'- method {comptype}: {e!r}') 1861 if fileobj is not None: 1862 fileobj.seek(saved_pos) 1863 continue 1864 error_msgs_summary = '\n'.join(error_msgs) 1865 raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}") 1866 1867 elif ":" in mode: 1868 filemode, comptype = mode.split(":", 1) 1869 filemode = filemode or "r" 1870 comptype = comptype or "tar" 1871 1872 # Select the *open() function according to 1873 # given compression. 1874 if comptype in cls.OPEN_METH: 1875 func = getattr(cls, cls.OPEN_METH[comptype]) 1876 else: 1877 raise CompressionError("unknown compression type %r" % comptype) 1878 return func(name, filemode, fileobj, **kwargs) 1879 1880 elif "|" in mode: 1881 filemode, comptype = mode.split("|", 1) 1882 filemode = filemode or "r" 1883 comptype = comptype or "tar" 1884 1885 if filemode not in ("r", "w"): 1886 raise ValueError("mode must be 'r' or 'w'") 1887 1888 compresslevel = kwargs.pop("compresslevel", 9) 1889 stream = _Stream(name, filemode, comptype, fileobj, bufsize, 1890 compresslevel) 1891 try: 1892 t = cls(name, filemode, stream, **kwargs) 1893 except: 1894 stream.close() 1895 raise 1896 t._extfileobj = False 1897 return t 1898 1899 elif mode in ("a", "w", "x"): 1900 return cls.taropen(name, mode, fileobj, **kwargs) 1901 1902 raise ValueError("undiscernible mode") 1903 1904 @classmethod 1905 def taropen(cls, name, mode="r", fileobj=None, **kwargs): 1906 """Open uncompressed tar archive name for reading or writing. 1907 """ 1908 if mode not in ("r", "a", "w", "x"): 1909 raise ValueError("mode must be 'r', 'a', 'w' or 'x'") 1910 return cls(name, mode, fileobj, **kwargs) 1911 1912 @classmethod 1913 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 1914 """Open gzip compressed tar archive name for reading or writing. 1915 Appending is not allowed. 1916 """ 1917 if mode not in ("r", "w", "x"): 1918 raise ValueError("mode must be 'r', 'w' or 'x'") 1919 1920 try: 1921 from gzip import GzipFile 1922 except ImportError: 1923 raise CompressionError("gzip module is not available") from None 1924 1925 try: 1926 fileobj = GzipFile(name, mode + "b", compresslevel, fileobj) 1927 except OSError as e: 1928 if fileobj is not None and mode == 'r': 1929 raise ReadError("not a gzip file") from e 1930 raise 1931 1932 try: 1933 t = cls.taropen(name, mode, fileobj, **kwargs) 1934 except OSError as e: 1935 fileobj.close() 1936 if mode == 'r': 1937 raise ReadError("not a gzip file") from e 1938 raise 1939 except: 1940 fileobj.close() 1941 raise 1942 t._extfileobj = False 1943 return t 1944 1945 @classmethod 1946 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 1947 """Open bzip2 compressed tar archive name for reading or writing. 1948 Appending is not allowed. 1949 """ 1950 if mode not in ("r", "w", "x"): 1951 raise ValueError("mode must be 'r', 'w' or 'x'") 1952 1953 try: 1954 from bz2 import BZ2File 1955 except ImportError: 1956 raise CompressionError("bz2 module is not available") from None 1957 1958 fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel) 1959 1960 try: 1961 t = cls.taropen(name, mode, fileobj, **kwargs) 1962 except (OSError, EOFError) as e: 1963 fileobj.close() 1964 if mode == 'r': 1965 raise ReadError("not a bzip2 file") from e 1966 raise 1967 except: 1968 fileobj.close() 1969 raise 1970 t._extfileobj = False 1971 return t 1972 1973 @classmethod 1974 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs): 1975 """Open lzma compressed tar archive name for reading or writing. 1976 Appending is not allowed. 1977 """ 1978 if mode not in ("r", "w", "x"): 1979 raise ValueError("mode must be 'r', 'w' or 'x'") 1980 1981 try: 1982 from lzma import LZMAFile, LZMAError 1983 except ImportError: 1984 raise CompressionError("lzma module is not available") from None 1985 1986 fileobj = LZMAFile(fileobj or name, mode, preset=preset) 1987 1988 try: 1989 t = cls.taropen(name, mode, fileobj, **kwargs) 1990 except (LZMAError, EOFError) as e: 1991 fileobj.close() 1992 if mode == 'r': 1993 raise ReadError("not an lzma file") from e 1994 raise 1995 except: 1996 fileobj.close() 1997 raise 1998 t._extfileobj = False 1999 return t 2000 2001 # All *open() methods are registered here. 2002 OPEN_METH = { 2003 "tar": "taropen", # uncompressed tar 2004 "gz": "gzopen", # gzip compressed tar 2005 "bz2": "bz2open", # bzip2 compressed tar 2006 "xz": "xzopen" # lzma compressed tar 2007 } 2008 2009 #-------------------------------------------------------------------------- 2010 # The public methods which TarFile provides: 2011 2012 def close(self): 2013 """Close the TarFile. In write-mode, two finishing zero blocks are 2014 appended to the archive. 2015 """ 2016 if self.closed: 2017 return 2018 2019 self.closed = True 2020 try: 2021 if self.mode in ("a", "w", "x"): 2022 self.fileobj.write(NUL * (BLOCKSIZE * 2)) 2023 self.offset += (BLOCKSIZE * 2) 2024 # fill up the end with zero-blocks 2025 # (like option -b20 for tar does) 2026 blocks, remainder = divmod(self.offset, RECORDSIZE) 2027 if remainder > 0: 2028 self.fileobj.write(NUL * (RECORDSIZE - remainder)) 2029 finally: 2030 if not self._extfileobj: 2031 self.fileobj.close() 2032 2033 def getmember(self, name): 2034 """Return a TarInfo object for member `name'. If `name' can not be 2035 found in the archive, KeyError is raised. If a member occurs more 2036 than once in the archive, its last occurrence is assumed to be the 2037 most up-to-date version. 2038 """ 2039 tarinfo = self._getmember(name.rstrip('/')) 2040 if tarinfo is None: 2041 raise KeyError("filename %r not found" % name) 2042 return tarinfo 2043 2044 def getmembers(self): 2045 """Return the members of the archive as a list of TarInfo objects. The 2046 list has the same order as the members in the archive. 2047 """ 2048 self._check() 2049 if not self._loaded: # if we want to obtain a list of 2050 self._load() # all members, we first have to 2051 # scan the whole archive. 2052 return self.members 2053 2054 def getnames(self): 2055 """Return the members of the archive as a list of their names. It has 2056 the same order as the list returned by getmembers(). 2057 """ 2058 return [tarinfo.name for tarinfo in self.getmembers()] 2059 2060 def gettarinfo(self, name=None, arcname=None, fileobj=None): 2061 """Create a TarInfo object from the result of os.stat or equivalent 2062 on an existing file. The file is either named by `name', or 2063 specified as a file object `fileobj' with a file descriptor. If 2064 given, `arcname' specifies an alternative name for the file in the 2065 archive, otherwise, the name is taken from the 'name' attribute of 2066 'fileobj', or the 'name' argument. The name should be a text 2067 string. 2068 """ 2069 self._check("awx") 2070 2071 # When fileobj is given, replace name by 2072 # fileobj's real name. 2073 if fileobj is not None: 2074 name = fileobj.name 2075 2076 # Building the name of the member in the archive. 2077 # Backward slashes are converted to forward slashes, 2078 # Absolute paths are turned to relative paths. 2079 if arcname is None: 2080 arcname = name 2081 drv, arcname = os.path.splitdrive(arcname) 2082 arcname = arcname.replace(os.sep, "/") 2083 arcname = arcname.lstrip("/") 2084 2085 # Now, fill the TarInfo object with 2086 # information specific for the file. 2087 tarinfo = self.tarinfo() 2088 tarinfo._tarfile = self # To be removed in 3.16. 2089 2090 # Use os.stat or os.lstat, depending on if symlinks shall be resolved. 2091 if fileobj is None: 2092 if not self.dereference: 2093 statres = os.lstat(name) 2094 else: 2095 statres = os.stat(name) 2096 else: 2097 statres = os.fstat(fileobj.fileno()) 2098 linkname = "" 2099 2100 stmd = statres.st_mode 2101 if stat.S_ISREG(stmd): 2102 inode = (statres.st_ino, statres.st_dev) 2103 if not self.dereference and statres.st_nlink > 1 and \ 2104 inode in self.inodes and arcname != self.inodes[inode]: 2105 # Is it a hardlink to an already 2106 # archived file? 2107 type = LNKTYPE 2108 linkname = self.inodes[inode] 2109 else: 2110 # The inode is added only if its valid. 2111 # For win32 it is always 0. 2112 type = REGTYPE 2113 if inode[0]: 2114 self.inodes[inode] = arcname 2115 elif stat.S_ISDIR(stmd): 2116 type = DIRTYPE 2117 elif stat.S_ISFIFO(stmd): 2118 type = FIFOTYPE 2119 elif stat.S_ISLNK(stmd): 2120 type = SYMTYPE 2121 linkname = os.readlink(name) 2122 elif stat.S_ISCHR(stmd): 2123 type = CHRTYPE 2124 elif stat.S_ISBLK(stmd): 2125 type = BLKTYPE 2126 else: 2127 return None 2128 2129 # Fill the TarInfo object with all 2130 # information we can get. 2131 tarinfo.name = arcname 2132 tarinfo.mode = stmd 2133 tarinfo.uid = statres.st_uid 2134 tarinfo.gid = statres.st_gid 2135 if type == REGTYPE: 2136 tarinfo.size = statres.st_size 2137 else: 2138 tarinfo.size = 0 2139 tarinfo.mtime = statres.st_mtime 2140 tarinfo.type = type 2141 tarinfo.linkname = linkname 2142 if pwd: 2143 try: 2144 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] 2145 except KeyError: 2146 pass 2147 if grp: 2148 try: 2149 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] 2150 except KeyError: 2151 pass 2152 2153 if type in (CHRTYPE, BLKTYPE): 2154 if hasattr(os, "major") and hasattr(os, "minor"): 2155 tarinfo.devmajor = os.major(statres.st_rdev) 2156 tarinfo.devminor = os.minor(statres.st_rdev) 2157 return tarinfo 2158 2159 def list(self, verbose=True, *, members=None): 2160 """Print a table of contents to sys.stdout. If `verbose' is False, only 2161 the names of the members are printed. If it is True, an `ls -l'-like 2162 output is produced. `members' is optional and must be a subset of the 2163 list returned by getmembers(). 2164 """ 2165 # Convert tarinfo type to stat type. 2166 type2mode = {REGTYPE: stat.S_IFREG, SYMTYPE: stat.S_IFLNK, 2167 FIFOTYPE: stat.S_IFIFO, CHRTYPE: stat.S_IFCHR, 2168 DIRTYPE: stat.S_IFDIR, BLKTYPE: stat.S_IFBLK} 2169 self._check() 2170 2171 if members is None: 2172 members = self 2173 for tarinfo in members: 2174 if verbose: 2175 if tarinfo.mode is None: 2176 _safe_print("??????????") 2177 else: 2178 modetype = type2mode.get(tarinfo.type, 0) 2179 _safe_print(stat.filemode(modetype | tarinfo.mode)) 2180 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid, 2181 tarinfo.gname or tarinfo.gid)) 2182 if tarinfo.ischr() or tarinfo.isblk(): 2183 _safe_print("%10s" % 2184 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor))) 2185 else: 2186 _safe_print("%10d" % tarinfo.size) 2187 if tarinfo.mtime is None: 2188 _safe_print("????-??-?? ??:??:??") 2189 else: 2190 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \ 2191 % time.localtime(tarinfo.mtime)[:6]) 2192 2193 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else "")) 2194 2195 if verbose: 2196 if tarinfo.issym(): 2197 _safe_print("-> " + tarinfo.linkname) 2198 if tarinfo.islnk(): 2199 _safe_print("link to " + tarinfo.linkname) 2200 print() 2201 2202 def add(self, name, arcname=None, recursive=True, *, filter=None): 2203 """Add the file `name' to the archive. `name' may be any type of file 2204 (directory, fifo, symbolic link, etc.). If given, `arcname' 2205 specifies an alternative name for the file in the archive. 2206 Directories are added recursively by default. This can be avoided by 2207 setting `recursive' to False. `filter' is a function 2208 that expects a TarInfo object argument and returns the changed 2209 TarInfo object, if it returns None the TarInfo object will be 2210 excluded from the archive. 2211 """ 2212 self._check("awx") 2213 2214 if arcname is None: 2215 arcname = name 2216 2217 # Skip if somebody tries to archive the archive... 2218 if self.name is not None and os.path.abspath(name) == self.name: 2219 self._dbg(2, "tarfile: Skipped %r" % name) 2220 return 2221 2222 self._dbg(1, name) 2223 2224 # Create a TarInfo object from the file. 2225 tarinfo = self.gettarinfo(name, arcname) 2226 2227 if tarinfo is None: 2228 self._dbg(1, "tarfile: Unsupported type %r" % name) 2229 return 2230 2231 # Change or exclude the TarInfo object. 2232 if filter is not None: 2233 tarinfo = filter(tarinfo) 2234 if tarinfo is None: 2235 self._dbg(2, "tarfile: Excluded %r" % name) 2236 return 2237 2238 # Append the tar header and data to the archive. 2239 if tarinfo.isreg(): 2240 with bltn_open(name, "rb") as f: 2241 self.addfile(tarinfo, f) 2242 2243 elif tarinfo.isdir(): 2244 self.addfile(tarinfo) 2245 if recursive: 2246 for f in sorted(os.listdir(name)): 2247 self.add(os.path.join(name, f), os.path.join(arcname, f), 2248 recursive, filter=filter) 2249 2250 else: 2251 self.addfile(tarinfo) 2252 2253 def addfile(self, tarinfo, fileobj=None): 2254 """Add the TarInfo object `tarinfo' to the archive. If `tarinfo' represents 2255 a non zero-size regular file, the `fileobj' argument should be a binary file, 2256 and tarinfo.size bytes are read from it and added to the archive. 2257 You can create TarInfo objects directly, or by using gettarinfo(). 2258 """ 2259 self._check("awx") 2260 2261 if fileobj is None and tarinfo.isreg() and tarinfo.size != 0: 2262 raise ValueError("fileobj not provided for non zero-size regular file") 2263 2264 tarinfo = copy.copy(tarinfo) 2265 2266 buf = tarinfo.tobuf(self.format, self.encoding, self.errors) 2267 self.fileobj.write(buf) 2268 self.offset += len(buf) 2269 bufsize=self.copybufsize 2270 # If there's data to follow, append it. 2271 if fileobj is not None: 2272 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize) 2273 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) 2274 if remainder > 0: 2275 self.fileobj.write(NUL * (BLOCKSIZE - remainder)) 2276 blocks += 1 2277 self.offset += blocks * BLOCKSIZE 2278 2279 self.members.append(tarinfo) 2280 2281 def _get_filter_function(self, filter): 2282 if filter is None: 2283 filter = self.extraction_filter 2284 if filter is None: 2285 import warnings 2286 warnings.warn( 2287 'Python 3.14 will, by default, filter extracted tar ' 2288 + 'archives and reject files or modify their metadata. ' 2289 + 'Use the filter argument to control this behavior.', 2290 DeprecationWarning, stacklevel=3) 2291 return fully_trusted_filter 2292 if isinstance(filter, str): 2293 raise TypeError( 2294 'String names are not supported for ' 2295 + 'TarFile.extraction_filter. Use a function such as ' 2296 + 'tarfile.data_filter directly.') 2297 return filter 2298 if callable(filter): 2299 return filter 2300 try: 2301 return _NAMED_FILTERS[filter] 2302 except KeyError: 2303 raise ValueError(f"filter {filter!r} not found") from None 2304 2305 def extractall(self, path=".", members=None, *, numeric_owner=False, 2306 filter=None): 2307 """Extract all members from the archive to the current working 2308 directory and set owner, modification time and permissions on 2309 directories afterwards. `path' specifies a different directory 2310 to extract to. `members' is optional and must be a subset of the 2311 list returned by getmembers(). If `numeric_owner` is True, only 2312 the numbers for user/group names are used and not the names. 2313 2314 The `filter` function will be called on each member just 2315 before extraction. 2316 It can return a changed TarInfo or None to skip the member. 2317 String names of common filters are accepted. 2318 """ 2319 directories = [] 2320 2321 filter_function = self._get_filter_function(filter) 2322 if members is None: 2323 members = self 2324 2325 for member in members: 2326 tarinfo = self._get_extract_tarinfo(member, filter_function, path) 2327 if tarinfo is None: 2328 continue 2329 if tarinfo.isdir(): 2330 # For directories, delay setting attributes until later, 2331 # since permissions can interfere with extraction and 2332 # extracting contents can reset mtime. 2333 directories.append(tarinfo) 2334 self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(), 2335 numeric_owner=numeric_owner) 2336 2337 # Reverse sort directories. 2338 directories.sort(key=lambda a: a.name, reverse=True) 2339 2340 # Set correct owner, mtime and filemode on directories. 2341 for tarinfo in directories: 2342 dirpath = os.path.join(path, tarinfo.name) 2343 try: 2344 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner) 2345 self.utime(tarinfo, dirpath) 2346 self.chmod(tarinfo, dirpath) 2347 except ExtractError as e: 2348 self._handle_nonfatal_error(e) 2349 2350 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False, 2351 filter=None): 2352 """Extract a member from the archive to the current working directory, 2353 using its full name. Its file information is extracted as accurately 2354 as possible. `member' may be a filename or a TarInfo object. You can 2355 specify a different directory using `path'. File attributes (owner, 2356 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` 2357 is True, only the numbers for user/group names are used and not 2358 the names. 2359 2360 The `filter` function will be called before extraction. 2361 It can return a changed TarInfo or None to skip the member. 2362 String names of common filters are accepted. 2363 """ 2364 filter_function = self._get_filter_function(filter) 2365 tarinfo = self._get_extract_tarinfo(member, filter_function, path) 2366 if tarinfo is not None: 2367 self._extract_one(tarinfo, path, set_attrs, numeric_owner) 2368 2369 def _get_extract_tarinfo(self, member, filter_function, path): 2370 """Get filtered TarInfo (or None) from member, which might be a str""" 2371 if isinstance(member, str): 2372 tarinfo = self.getmember(member) 2373 else: 2374 tarinfo = member 2375 2376 unfiltered = tarinfo 2377 try: 2378 tarinfo = filter_function(tarinfo, path) 2379 except (OSError, FilterError) as e: 2380 self._handle_fatal_error(e) 2381 except ExtractError as e: 2382 self._handle_nonfatal_error(e) 2383 if tarinfo is None: 2384 self._dbg(2, "tarfile: Excluded %r" % unfiltered.name) 2385 return None 2386 # Prepare the link target for makelink(). 2387 if tarinfo.islnk(): 2388 tarinfo = copy.copy(tarinfo) 2389 tarinfo._link_target = os.path.join(path, tarinfo.linkname) 2390 return tarinfo 2391 2392 def _extract_one(self, tarinfo, path, set_attrs, numeric_owner): 2393 """Extract from filtered tarinfo to disk""" 2394 self._check("r") 2395 2396 try: 2397 self._extract_member(tarinfo, os.path.join(path, tarinfo.name), 2398 set_attrs=set_attrs, 2399 numeric_owner=numeric_owner) 2400 except OSError as e: 2401 self._handle_fatal_error(e) 2402 except ExtractError as e: 2403 self._handle_nonfatal_error(e) 2404 2405 def _handle_nonfatal_error(self, e): 2406 """Handle non-fatal error (ExtractError) according to errorlevel""" 2407 if self.errorlevel > 1: 2408 raise 2409 else: 2410 self._dbg(1, "tarfile: %s" % e) 2411 2412 def _handle_fatal_error(self, e): 2413 """Handle "fatal" error according to self.errorlevel""" 2414 if self.errorlevel > 0: 2415 raise 2416 elif isinstance(e, OSError): 2417 if e.filename is None: 2418 self._dbg(1, "tarfile: %s" % e.strerror) 2419 else: 2420 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) 2421 else: 2422 self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e)) 2423 2424 def extractfile(self, member): 2425 """Extract a member from the archive as a file object. `member' may be 2426 a filename or a TarInfo object. If `member' is a regular file or 2427 a link, an io.BufferedReader object is returned. For all other 2428 existing members, None is returned. If `member' does not appear 2429 in the archive, KeyError is raised. 2430 """ 2431 self._check("r") 2432 2433 if isinstance(member, str): 2434 tarinfo = self.getmember(member) 2435 else: 2436 tarinfo = member 2437 2438 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES: 2439 # Members with unknown types are treated as regular files. 2440 return self.fileobject(self, tarinfo) 2441 2442 elif tarinfo.islnk() or tarinfo.issym(): 2443 if isinstance(self.fileobj, _Stream): 2444 # A small but ugly workaround for the case that someone tries 2445 # to extract a (sym)link as a file-object from a non-seekable 2446 # stream of tar blocks. 2447 raise StreamError("cannot extract (sym)link as file object") 2448 else: 2449 # A (sym)link's file object is its target's file object. 2450 return self.extractfile(self._find_link_target(tarinfo)) 2451 else: 2452 # If there's no data associated with the member (directory, chrdev, 2453 # blkdev, etc.), return None instead of a file object. 2454 return None 2455 2456 def _extract_member(self, tarinfo, targetpath, set_attrs=True, 2457 numeric_owner=False): 2458 """Extract the TarInfo object tarinfo to a physical 2459 file called targetpath. 2460 """ 2461 # Fetch the TarInfo object for the given name 2462 # and build the destination pathname, replacing 2463 # forward slashes to platform specific separators. 2464 targetpath = targetpath.rstrip("/") 2465 targetpath = targetpath.replace("/", os.sep) 2466 2467 # Create all upper directories. 2468 upperdirs = os.path.dirname(targetpath) 2469 if upperdirs and not os.path.exists(upperdirs): 2470 # Create directories that are not part of the archive with 2471 # default permissions. 2472 os.makedirs(upperdirs, exist_ok=True) 2473 2474 if tarinfo.islnk() or tarinfo.issym(): 2475 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) 2476 else: 2477 self._dbg(1, tarinfo.name) 2478 2479 if tarinfo.isreg(): 2480 self.makefile(tarinfo, targetpath) 2481 elif tarinfo.isdir(): 2482 self.makedir(tarinfo, targetpath) 2483 elif tarinfo.isfifo(): 2484 self.makefifo(tarinfo, targetpath) 2485 elif tarinfo.ischr() or tarinfo.isblk(): 2486 self.makedev(tarinfo, targetpath) 2487 elif tarinfo.islnk() or tarinfo.issym(): 2488 self.makelink(tarinfo, targetpath) 2489 elif tarinfo.type not in SUPPORTED_TYPES: 2490 self.makeunknown(tarinfo, targetpath) 2491 else: 2492 self.makefile(tarinfo, targetpath) 2493 2494 if set_attrs: 2495 self.chown(tarinfo, targetpath, numeric_owner) 2496 if not tarinfo.issym(): 2497 self.chmod(tarinfo, targetpath) 2498 self.utime(tarinfo, targetpath) 2499 2500 #-------------------------------------------------------------------------- 2501 # Below are the different file methods. They are called via 2502 # _extract_member() when extract() is called. They can be replaced in a 2503 # subclass to implement other functionality. 2504 2505 def makedir(self, tarinfo, targetpath): 2506 """Make a directory called targetpath. 2507 """ 2508 try: 2509 if tarinfo.mode is None: 2510 # Use the system's default mode 2511 os.mkdir(targetpath) 2512 else: 2513 # Use a safe mode for the directory, the real mode is set 2514 # later in _extract_member(). 2515 os.mkdir(targetpath, 0o700) 2516 except FileExistsError: 2517 if not os.path.isdir(targetpath): 2518 raise 2519 2520 def makefile(self, tarinfo, targetpath): 2521 """Make a file called targetpath. 2522 """ 2523 source = self.fileobj 2524 source.seek(tarinfo.offset_data) 2525 bufsize = self.copybufsize 2526 with bltn_open(targetpath, "wb") as target: 2527 if tarinfo.sparse is not None: 2528 for offset, size in tarinfo.sparse: 2529 target.seek(offset) 2530 copyfileobj(source, target, size, ReadError, bufsize) 2531 target.seek(tarinfo.size) 2532 target.truncate() 2533 else: 2534 copyfileobj(source, target, tarinfo.size, ReadError, bufsize) 2535 2536 def makeunknown(self, tarinfo, targetpath): 2537 """Make a file from a TarInfo object with an unknown type 2538 at targetpath. 2539 """ 2540 self.makefile(tarinfo, targetpath) 2541 self._dbg(1, "tarfile: Unknown file type %r, " \ 2542 "extracted as regular file." % tarinfo.type) 2543 2544 def makefifo(self, tarinfo, targetpath): 2545 """Make a fifo called targetpath. 2546 """ 2547 if hasattr(os, "mkfifo"): 2548 os.mkfifo(targetpath) 2549 else: 2550 raise ExtractError("fifo not supported by system") 2551 2552 def makedev(self, tarinfo, targetpath): 2553 """Make a character or block device called targetpath. 2554 """ 2555 if not hasattr(os, "mknod") or not hasattr(os, "makedev"): 2556 raise ExtractError("special devices not supported by system") 2557 2558 mode = tarinfo.mode 2559 if mode is None: 2560 # Use mknod's default 2561 mode = 0o600 2562 if tarinfo.isblk(): 2563 mode |= stat.S_IFBLK 2564 else: 2565 mode |= stat.S_IFCHR 2566 2567 os.mknod(targetpath, mode, 2568 os.makedev(tarinfo.devmajor, tarinfo.devminor)) 2569 2570 def makelink(self, tarinfo, targetpath): 2571 """Make a (symbolic) link called targetpath. If it cannot be created 2572 (platform limitation), we try to make a copy of the referenced file 2573 instead of a link. 2574 """ 2575 try: 2576 # For systems that support symbolic and hard links. 2577 if tarinfo.issym(): 2578 if os.path.lexists(targetpath): 2579 # Avoid FileExistsError on following os.symlink. 2580 os.unlink(targetpath) 2581 os.symlink(tarinfo.linkname, targetpath) 2582 else: 2583 if os.path.exists(tarinfo._link_target): 2584 os.link(tarinfo._link_target, targetpath) 2585 else: 2586 self._extract_member(self._find_link_target(tarinfo), 2587 targetpath) 2588 except symlink_exception: 2589 try: 2590 self._extract_member(self._find_link_target(tarinfo), 2591 targetpath) 2592 except KeyError: 2593 raise ExtractError("unable to resolve link inside archive") from None 2594 2595 def chown(self, tarinfo, targetpath, numeric_owner): 2596 """Set owner of targetpath according to tarinfo. If numeric_owner 2597 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner 2598 is False, fall back to .gid/.uid when the search based on name 2599 fails. 2600 """ 2601 if hasattr(os, "geteuid") and os.geteuid() == 0: 2602 # We have to be root to do so. 2603 g = tarinfo.gid 2604 u = tarinfo.uid 2605 if not numeric_owner: 2606 try: 2607 if grp and tarinfo.gname: 2608 g = grp.getgrnam(tarinfo.gname)[2] 2609 except KeyError: 2610 pass 2611 try: 2612 if pwd and tarinfo.uname: 2613 u = pwd.getpwnam(tarinfo.uname)[2] 2614 except KeyError: 2615 pass 2616 if g is None: 2617 g = -1 2618 if u is None: 2619 u = -1 2620 try: 2621 if tarinfo.issym() and hasattr(os, "lchown"): 2622 os.lchown(targetpath, u, g) 2623 else: 2624 os.chown(targetpath, u, g) 2625 except (OSError, OverflowError) as e: 2626 # OverflowError can be raised if an ID doesn't fit in `id_t` 2627 raise ExtractError("could not change owner") from e 2628 2629 def chmod(self, tarinfo, targetpath): 2630 """Set file permissions of targetpath according to tarinfo. 2631 """ 2632 if tarinfo.mode is None: 2633 return 2634 try: 2635 os.chmod(targetpath, tarinfo.mode) 2636 except OSError as e: 2637 raise ExtractError("could not change mode") from e 2638 2639 def utime(self, tarinfo, targetpath): 2640 """Set modification time of targetpath according to tarinfo. 2641 """ 2642 mtime = tarinfo.mtime 2643 if mtime is None: 2644 return 2645 if not hasattr(os, 'utime'): 2646 return 2647 try: 2648 os.utime(targetpath, (mtime, mtime)) 2649 except OSError as e: 2650 raise ExtractError("could not change modification time") from e 2651 2652 #-------------------------------------------------------------------------- 2653 def next(self): 2654 """Return the next member of the archive as a TarInfo object, when 2655 TarFile is opened for reading. Return None if there is no more 2656 available. 2657 """ 2658 self._check("ra") 2659 if self.firstmember is not None: 2660 m = self.firstmember 2661 self.firstmember = None 2662 return m 2663 2664 # Advance the file pointer. 2665 if self.offset != self.fileobj.tell(): 2666 if self.offset == 0: 2667 return None 2668 self.fileobj.seek(self.offset - 1) 2669 if not self.fileobj.read(1): 2670 raise ReadError("unexpected end of data") 2671 2672 # Read the next block. 2673 tarinfo = None 2674 while True: 2675 try: 2676 tarinfo = self.tarinfo.fromtarfile(self) 2677 except EOFHeaderError as e: 2678 if self.ignore_zeros: 2679 self._dbg(2, "0x%X: %s" % (self.offset, e)) 2680 self.offset += BLOCKSIZE 2681 continue 2682 except InvalidHeaderError as e: 2683 if self.ignore_zeros: 2684 self._dbg(2, "0x%X: %s" % (self.offset, e)) 2685 self.offset += BLOCKSIZE 2686 continue 2687 elif self.offset == 0: 2688 raise ReadError(str(e)) from None 2689 except EmptyHeaderError: 2690 if self.offset == 0: 2691 raise ReadError("empty file") from None 2692 except TruncatedHeaderError as e: 2693 if self.offset == 0: 2694 raise ReadError(str(e)) from None 2695 except SubsequentHeaderError as e: 2696 raise ReadError(str(e)) from None 2697 except Exception as e: 2698 try: 2699 import zlib 2700 if isinstance(e, zlib.error): 2701 raise ReadError(f'zlib error: {e}') from None 2702 else: 2703 raise e 2704 except ImportError: 2705 raise e 2706 break 2707 2708 if tarinfo is not None: 2709 # if streaming the file we do not want to cache the tarinfo 2710 if not self.stream: 2711 self.members.append(tarinfo) 2712 else: 2713 self._loaded = True 2714 2715 return tarinfo 2716 2717 #-------------------------------------------------------------------------- 2718 # Little helper methods: 2719 2720 def _getmember(self, name, tarinfo=None, normalize=False): 2721 """Find an archive member by name from bottom to top. 2722 If tarinfo is given, it is used as the starting point. 2723 """ 2724 # Ensure that all members have been loaded. 2725 members = self.getmembers() 2726 2727 # Limit the member search list up to tarinfo. 2728 skipping = False 2729 if tarinfo is not None: 2730 try: 2731 index = members.index(tarinfo) 2732 except ValueError: 2733 # The given starting point might be a (modified) copy. 2734 # We'll later skip members until we find an equivalent. 2735 skipping = True 2736 else: 2737 # Happy fast path 2738 members = members[:index] 2739 2740 if normalize: 2741 name = os.path.normpath(name) 2742 2743 for member in reversed(members): 2744 if skipping: 2745 if tarinfo.offset == member.offset: 2746 skipping = False 2747 continue 2748 if normalize: 2749 member_name = os.path.normpath(member.name) 2750 else: 2751 member_name = member.name 2752 2753 if name == member_name: 2754 return member 2755 2756 if skipping: 2757 # Starting point was not found 2758 raise ValueError(tarinfo) 2759 2760 def _load(self): 2761 """Read through the entire archive file and look for readable 2762 members. This should not run if the file is set to stream. 2763 """ 2764 if not self.stream: 2765 while self.next() is not None: 2766 pass 2767 self._loaded = True 2768 2769 def _check(self, mode=None): 2770 """Check if TarFile is still open, and if the operation's mode 2771 corresponds to TarFile's mode. 2772 """ 2773 if self.closed: 2774 raise OSError("%s is closed" % self.__class__.__name__) 2775 if mode is not None and self.mode not in mode: 2776 raise OSError("bad operation for mode %r" % self.mode) 2777 2778 def _find_link_target(self, tarinfo): 2779 """Find the target member of a symlink or hardlink member in the 2780 archive. 2781 """ 2782 if tarinfo.issym(): 2783 # Always search the entire archive. 2784 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname))) 2785 limit = None 2786 else: 2787 # Search the archive before the link, because a hard link is 2788 # just a reference to an already archived file. 2789 linkname = tarinfo.linkname 2790 limit = tarinfo 2791 2792 member = self._getmember(linkname, tarinfo=limit, normalize=True) 2793 if member is None: 2794 raise KeyError("linkname %r not found" % linkname) 2795 return member 2796 2797 def __iter__(self): 2798 """Provide an iterator object. 2799 """ 2800 if self._loaded: 2801 yield from self.members 2802 return 2803 2804 # Yield items using TarFile's next() method. 2805 # When all members have been read, set TarFile as _loaded. 2806 index = 0 2807 # Fix for SF #1100429: Under rare circumstances it can 2808 # happen that getmembers() is called during iteration, 2809 # which will have already exhausted the next() method. 2810 if self.firstmember is not None: 2811 tarinfo = self.next() 2812 index += 1 2813 yield tarinfo 2814 2815 while True: 2816 if index < len(self.members): 2817 tarinfo = self.members[index] 2818 elif not self._loaded: 2819 tarinfo = self.next() 2820 if not tarinfo: 2821 self._loaded = True 2822 return 2823 else: 2824 return 2825 index += 1 2826 yield tarinfo 2827 2828 def _dbg(self, level, msg): 2829 """Write debugging output to sys.stderr. 2830 """ 2831 if level <= self.debug: 2832 print(msg, file=sys.stderr) 2833 2834 def __enter__(self): 2835 self._check() 2836 return self 2837 2838 def __exit__(self, type, value, traceback): 2839 if type is None: 2840 self.close() 2841 else: 2842 # An exception occurred. We must not call close() because 2843 # it would try to write end-of-archive blocks and padding. 2844 if not self._extfileobj: 2845 self.fileobj.close() 2846 self.closed = True 2847 2848#-------------------- 2849# exported functions 2850#-------------------- 2851 2852def is_tarfile(name): 2853 """Return True if name points to a tar archive that we 2854 are able to handle, else return False. 2855 2856 'name' should be a string, file, or file-like object. 2857 """ 2858 try: 2859 if hasattr(name, "read"): 2860 pos = name.tell() 2861 t = open(fileobj=name) 2862 name.seek(pos) 2863 else: 2864 t = open(name) 2865 t.close() 2866 return True 2867 except TarError: 2868 return False 2869 2870open = TarFile.open 2871 2872 2873def main(): 2874 import argparse 2875 2876 description = 'A simple command-line interface for tarfile module.' 2877 parser = argparse.ArgumentParser(description=description) 2878 parser.add_argument('-v', '--verbose', action='store_true', default=False, 2879 help='Verbose output') 2880 parser.add_argument('--filter', metavar='<filtername>', 2881 choices=_NAMED_FILTERS, 2882 help='Filter for extraction') 2883 2884 group = parser.add_mutually_exclusive_group(required=True) 2885 group.add_argument('-l', '--list', metavar='<tarfile>', 2886 help='Show listing of a tarfile') 2887 group.add_argument('-e', '--extract', nargs='+', 2888 metavar=('<tarfile>', '<output_dir>'), 2889 help='Extract tarfile into target dir') 2890 group.add_argument('-c', '--create', nargs='+', 2891 metavar=('<name>', '<file>'), 2892 help='Create tarfile from sources') 2893 group.add_argument('-t', '--test', metavar='<tarfile>', 2894 help='Test if a tarfile is valid') 2895 2896 args = parser.parse_args() 2897 2898 if args.filter and args.extract is None: 2899 parser.exit(1, '--filter is only valid for extraction\n') 2900 2901 if args.test is not None: 2902 src = args.test 2903 if is_tarfile(src): 2904 with open(src, 'r') as tar: 2905 tar.getmembers() 2906 print(tar.getmembers(), file=sys.stderr) 2907 if args.verbose: 2908 print('{!r} is a tar archive.'.format(src)) 2909 else: 2910 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 2911 2912 elif args.list is not None: 2913 src = args.list 2914 if is_tarfile(src): 2915 with TarFile.open(src, 'r:*') as tf: 2916 tf.list(verbose=args.verbose) 2917 else: 2918 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 2919 2920 elif args.extract is not None: 2921 if len(args.extract) == 1: 2922 src = args.extract[0] 2923 curdir = os.curdir 2924 elif len(args.extract) == 2: 2925 src, curdir = args.extract 2926 else: 2927 parser.exit(1, parser.format_help()) 2928 2929 if is_tarfile(src): 2930 with TarFile.open(src, 'r:*') as tf: 2931 tf.extractall(path=curdir, filter=args.filter) 2932 if args.verbose: 2933 if curdir == '.': 2934 msg = '{!r} file is extracted.'.format(src) 2935 else: 2936 msg = ('{!r} file is extracted ' 2937 'into {!r} directory.').format(src, curdir) 2938 print(msg) 2939 else: 2940 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 2941 2942 elif args.create is not None: 2943 tar_name = args.create.pop(0) 2944 _, ext = os.path.splitext(tar_name) 2945 compressions = { 2946 # gz 2947 '.gz': 'gz', 2948 '.tgz': 'gz', 2949 # xz 2950 '.xz': 'xz', 2951 '.txz': 'xz', 2952 # bz2 2953 '.bz2': 'bz2', 2954 '.tbz': 'bz2', 2955 '.tbz2': 'bz2', 2956 '.tb2': 'bz2', 2957 } 2958 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w' 2959 tar_files = args.create 2960 2961 with TarFile.open(tar_name, tar_mode) as tf: 2962 for file_name in tar_files: 2963 tf.add(file_name) 2964 2965 if args.verbose: 2966 print('{!r} file created.'.format(tar_name)) 2967 2968if __name__ == '__main__': 2969 main() 2970