• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
5# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
6# All rights reserved.
7#
8# Permission  is  hereby granted,  free  of charge,  to  any person
9# obtaining a  copy of  this software  and associated documentation
10# files  (the  "Software"),  to   deal  in  the  Software   without
11# restriction,  including  without limitation  the  rights to  use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies  of  the  Software,  and to  permit  persons  to  whom the
14# Software  is  furnished  to  do  so,  subject  to  the  following
15# conditions:
16#
17# The above copyright  notice and this  permission notice shall  be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
21# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
22# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
23# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
24# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
25# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
32version     = "0.9.0"
33__author__  = "Lars Gust\u00e4bel (lars@gustaebel.de)"
34__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
35
36#---------
37# Imports
38#---------
39from builtins import open as bltn_open
40import sys
41import os
42import io
43import shutil
44import stat
45import time
46import struct
47import copy
48import re
49
50try:
51    import pwd
52except ImportError:
53    pwd = None
54try:
55    import grp
56except ImportError:
57    grp = None
58
59# os.symlink on Windows prior to 6.0 raises NotImplementedError
60# OSError (winerror=1314) will be raised if the caller does not hold the
61# SeCreateSymbolicLinkPrivilege privilege
62symlink_exception = (AttributeError, NotImplementedError, OSError)
63
64# from tarfile import *
65__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
66           "CompressionError", "StreamError", "ExtractError", "HeaderError",
67           "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
68           "DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter",
69           "tar_filter", "FilterError", "AbsoluteLinkError",
70           "OutsideDestinationError", "SpecialFileError", "AbsolutePathError",
71           "LinkOutsideDestinationError"]
72
73
74#---------------------------------------------------------
75# tar constants
76#---------------------------------------------------------
77NUL = b"\0"                     # the null character
78BLOCKSIZE = 512                 # length of processing blocks
79RECORDSIZE = BLOCKSIZE * 20     # length of records
80GNU_MAGIC = b"ustar  \0"        # magic gnu tar string
81POSIX_MAGIC = b"ustar\x0000"    # magic posix tar string
82
83LENGTH_NAME = 100               # maximum length of a filename
84LENGTH_LINK = 100               # maximum length of a linkname
85LENGTH_PREFIX = 155             # maximum length of the prefix field
86
87REGTYPE = b"0"                  # regular file
88AREGTYPE = b"\0"                # regular file
89LNKTYPE = b"1"                  # link (inside tarfile)
90SYMTYPE = b"2"                  # symbolic link
91CHRTYPE = b"3"                  # character special device
92BLKTYPE = b"4"                  # block special device
93DIRTYPE = b"5"                  # directory
94FIFOTYPE = b"6"                 # fifo special device
95CONTTYPE = b"7"                 # contiguous file
96
97GNUTYPE_LONGNAME = b"L"         # GNU tar longname
98GNUTYPE_LONGLINK = b"K"         # GNU tar longlink
99GNUTYPE_SPARSE = b"S"           # GNU tar sparse file
100
101XHDTYPE = b"x"                  # POSIX.1-2001 extended header
102XGLTYPE = b"g"                  # POSIX.1-2001 global header
103SOLARIS_XHDTYPE = b"X"          # Solaris extended header
104
105USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
106GNU_FORMAT = 1                  # GNU tar format
107PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
108DEFAULT_FORMAT = PAX_FORMAT
109
110#---------------------------------------------------------
111# tarfile constants
112#---------------------------------------------------------
113# File types that tarfile supports:
114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
115                   SYMTYPE, DIRTYPE, FIFOTYPE,
116                   CONTTYPE, CHRTYPE, BLKTYPE,
117                   GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
118                   GNUTYPE_SPARSE)
119
120# File types that will be treated as a regular file.
121REGULAR_TYPES = (REGTYPE, AREGTYPE,
122                 CONTTYPE, GNUTYPE_SPARSE)
123
124# File types that are part of the GNU tar format.
125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
126             GNUTYPE_SPARSE)
127
128# Fields from a pax header that override a TarInfo attribute.
129PAX_FIELDS = ("path", "linkpath", "size", "mtime",
130              "uid", "gid", "uname", "gname")
131
132# Fields from a pax header that are affected by hdrcharset.
133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
134
135# Fields in a pax header that are numbers, all other fields
136# are treated as strings.
137PAX_NUMBER_FIELDS = {
138    "atime": float,
139    "ctime": float,
140    "mtime": float,
141    "uid": int,
142    "gid": int,
143    "size": int
144}
145
146#---------------------------------------------------------
147# initialization
148#---------------------------------------------------------
149if os.name == "nt":
150    ENCODING = "utf-8"
151else:
152    ENCODING = sys.getfilesystemencoding()
153
154#---------------------------------------------------------
155# Some useful functions
156#---------------------------------------------------------
157
158def stn(s, length, encoding, errors):
159    """Convert a string to a null-terminated bytes object.
160    """
161    if s is None:
162        raise ValueError("metadata cannot contain None")
163    s = s.encode(encoding, errors)
164    return s[:length] + (length - len(s)) * NUL
165
166def nts(s, encoding, errors):
167    """Convert a null-terminated bytes object to a string.
168    """
169    p = s.find(b"\0")
170    if p != -1:
171        s = s[:p]
172    return s.decode(encoding, errors)
173
174def nti(s):
175    """Convert a number field to a python number.
176    """
177    # There are two possible encodings for a number field, see
178    # itn() below.
179    if s[0] in (0o200, 0o377):
180        n = 0
181        for i in range(len(s) - 1):
182            n <<= 8
183            n += s[i + 1]
184        if s[0] == 0o377:
185            n = -(256 ** (len(s) - 1) - n)
186    else:
187        try:
188            s = nts(s, "ascii", "strict")
189            n = int(s.strip() or "0", 8)
190        except ValueError:
191            raise InvalidHeaderError("invalid header")
192    return n
193
194def itn(n, digits=8, format=DEFAULT_FORMAT):
195    """Convert a python number to a number field.
196    """
197    # POSIX 1003.1-1988 requires numbers to be encoded as a string of
198    # octal digits followed by a null-byte, this allows values up to
199    # (8**(digits-1))-1. GNU tar allows storing numbers greater than
200    # that if necessary. A leading 0o200 or 0o377 byte indicate this
201    # particular encoding, the following digits-1 bytes are a big-endian
202    # base-256 representation. This allows values up to (256**(digits-1))-1.
203    # A 0o200 byte indicates a positive number, a 0o377 byte a negative
204    # number.
205    original_n = n
206    n = int(n)
207    if 0 <= n < 8 ** (digits - 1):
208        s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
209    elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
210        if n >= 0:
211            s = bytearray([0o200])
212        else:
213            s = bytearray([0o377])
214            n = 256 ** digits + n
215
216        for i in range(digits - 1):
217            s.insert(1, n & 0o377)
218            n >>= 8
219    else:
220        raise ValueError("overflow in number field")
221
222    return s
223
224def calc_chksums(buf):
225    """Calculate the checksum for a member's header by summing up all
226       characters except for the chksum field which is treated as if
227       it was filled with spaces. According to the GNU tar sources,
228       some tars (Sun and NeXT) calculate chksum with signed char,
229       which will be different if there are chars in the buffer with
230       the high bit set. So we calculate two checksums, unsigned and
231       signed.
232    """
233    unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
234    signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
235    return unsigned_chksum, signed_chksum
236
237def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
238    """Copy length bytes from fileobj src to fileobj dst.
239       If length is None, copy the entire content.
240    """
241    bufsize = bufsize or 16 * 1024
242    if length == 0:
243        return
244    if length is None:
245        shutil.copyfileobj(src, dst, bufsize)
246        return
247
248    blocks, remainder = divmod(length, bufsize)
249    for b in range(blocks):
250        buf = src.read(bufsize)
251        if len(buf) < bufsize:
252            raise exception("unexpected end of data")
253        dst.write(buf)
254
255    if remainder != 0:
256        buf = src.read(remainder)
257        if len(buf) < remainder:
258            raise exception("unexpected end of data")
259        dst.write(buf)
260    return
261
262def _safe_print(s):
263    encoding = getattr(sys.stdout, 'encoding', None)
264    if encoding is not None:
265        s = s.encode(encoding, 'backslashreplace').decode(encoding)
266    print(s, end=' ')
267
268
269class TarError(Exception):
270    """Base exception."""
271    pass
272class ExtractError(TarError):
273    """General exception for extract errors."""
274    pass
275class ReadError(TarError):
276    """Exception for unreadable tar archives."""
277    pass
278class CompressionError(TarError):
279    """Exception for unavailable compression methods."""
280    pass
281class StreamError(TarError):
282    """Exception for unsupported operations on stream-like TarFiles."""
283    pass
284class HeaderError(TarError):
285    """Base exception for header errors."""
286    pass
287class EmptyHeaderError(HeaderError):
288    """Exception for empty headers."""
289    pass
290class TruncatedHeaderError(HeaderError):
291    """Exception for truncated headers."""
292    pass
293class EOFHeaderError(HeaderError):
294    """Exception for end of file headers."""
295    pass
296class InvalidHeaderError(HeaderError):
297    """Exception for invalid headers."""
298    pass
299class SubsequentHeaderError(HeaderError):
300    """Exception for missing and invalid extended headers."""
301    pass
302
303#---------------------------
304# internal stream interface
305#---------------------------
306class _LowLevelFile:
307    """Low-level file object. Supports reading and writing.
308       It is used instead of a regular file object for streaming
309       access.
310    """
311
312    def __init__(self, name, mode):
313        mode = {
314            "r": os.O_RDONLY,
315            "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
316        }[mode]
317        if hasattr(os, "O_BINARY"):
318            mode |= os.O_BINARY
319        self.fd = os.open(name, mode, 0o666)
320
321    def close(self):
322        os.close(self.fd)
323
324    def read(self, size):
325        return os.read(self.fd, size)
326
327    def write(self, s):
328        os.write(self.fd, s)
329
330class _Stream:
331    """Class that serves as an adapter between TarFile and
332       a stream-like object.  The stream-like object only
333       needs to have a read() or write() method that works with bytes,
334       and the method is accessed blockwise.
335       Use of gzip or bzip2 compression is possible.
336       A stream-like object could be for example: sys.stdin.buffer,
337       sys.stdout.buffer, a socket, a tape device etc.
338
339       _Stream is intended to be used only internally.
340    """
341
342    def __init__(self, name, mode, comptype, fileobj, bufsize,
343                 compresslevel):
344        """Construct a _Stream object.
345        """
346        self._extfileobj = True
347        if fileobj is None:
348            fileobj = _LowLevelFile(name, mode)
349            self._extfileobj = False
350
351        if comptype == '*':
352            # Enable transparent compression detection for the
353            # stream interface
354            fileobj = _StreamProxy(fileobj)
355            comptype = fileobj.getcomptype()
356
357        self.name     = name or ""
358        self.mode     = mode
359        self.comptype = comptype
360        self.fileobj  = fileobj
361        self.bufsize  = bufsize
362        self.buf      = b""
363        self.pos      = 0
364        self.closed   = False
365
366        try:
367            if comptype == "gz":
368                try:
369                    import zlib
370                except ImportError:
371                    raise CompressionError("zlib module is not available") from None
372                self.zlib = zlib
373                self.crc = zlib.crc32(b"")
374                if mode == "r":
375                    self.exception = zlib.error
376                    self._init_read_gz()
377                else:
378                    self._init_write_gz(compresslevel)
379
380            elif comptype == "bz2":
381                try:
382                    import bz2
383                except ImportError:
384                    raise CompressionError("bz2 module is not available") from None
385                if mode == "r":
386                    self.dbuf = b""
387                    self.cmp = bz2.BZ2Decompressor()
388                    self.exception = OSError
389                else:
390                    self.cmp = bz2.BZ2Compressor(compresslevel)
391
392            elif comptype == "xz":
393                try:
394                    import lzma
395                except ImportError:
396                    raise CompressionError("lzma module is not available") from None
397                if mode == "r":
398                    self.dbuf = b""
399                    self.cmp = lzma.LZMADecompressor()
400                    self.exception = lzma.LZMAError
401                else:
402                    self.cmp = lzma.LZMACompressor()
403
404            elif comptype != "tar":
405                raise CompressionError("unknown compression type %r" % comptype)
406
407        except:
408            if not self._extfileobj:
409                self.fileobj.close()
410            self.closed = True
411            raise
412
413    def __del__(self):
414        if hasattr(self, "closed") and not self.closed:
415            self.close()
416
417    def _init_write_gz(self, compresslevel):
418        """Initialize for writing with gzip compression.
419        """
420        self.cmp = self.zlib.compressobj(compresslevel,
421                                         self.zlib.DEFLATED,
422                                         -self.zlib.MAX_WBITS,
423                                         self.zlib.DEF_MEM_LEVEL,
424                                         0)
425        timestamp = struct.pack("<L", int(time.time()))
426        self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
427        if self.name.endswith(".gz"):
428            self.name = self.name[:-3]
429        # Honor "directory components removed" from RFC1952
430        self.name = os.path.basename(self.name)
431        # RFC1952 says we must use ISO-8859-1 for the FNAME field.
432        self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
433
434    def write(self, s):
435        """Write string s to the stream.
436        """
437        if self.comptype == "gz":
438            self.crc = self.zlib.crc32(s, self.crc)
439        self.pos += len(s)
440        if self.comptype != "tar":
441            s = self.cmp.compress(s)
442        self.__write(s)
443
444    def __write(self, s):
445        """Write string s to the stream if a whole new block
446           is ready to be written.
447        """
448        self.buf += s
449        while len(self.buf) > self.bufsize:
450            self.fileobj.write(self.buf[:self.bufsize])
451            self.buf = self.buf[self.bufsize:]
452
453    def close(self):
454        """Close the _Stream object. No operation should be
455           done on it afterwards.
456        """
457        if self.closed:
458            return
459
460        self.closed = True
461        try:
462            if self.mode == "w" and self.comptype != "tar":
463                self.buf += self.cmp.flush()
464
465            if self.mode == "w" and self.buf:
466                self.fileobj.write(self.buf)
467                self.buf = b""
468                if self.comptype == "gz":
469                    self.fileobj.write(struct.pack("<L", self.crc))
470                    self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
471        finally:
472            if not self._extfileobj:
473                self.fileobj.close()
474
475    def _init_read_gz(self):
476        """Initialize for reading a gzip compressed fileobj.
477        """
478        self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
479        self.dbuf = b""
480
481        # taken from gzip.GzipFile with some alterations
482        if self.__read(2) != b"\037\213":
483            raise ReadError("not a gzip file")
484        if self.__read(1) != b"\010":
485            raise CompressionError("unsupported compression method")
486
487        flag = ord(self.__read(1))
488        self.__read(6)
489
490        if flag & 4:
491            xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
492            self.read(xlen)
493        if flag & 8:
494            while True:
495                s = self.__read(1)
496                if not s or s == NUL:
497                    break
498        if flag & 16:
499            while True:
500                s = self.__read(1)
501                if not s or s == NUL:
502                    break
503        if flag & 2:
504            self.__read(2)
505
506    def tell(self):
507        """Return the stream's file pointer position.
508        """
509        return self.pos
510
511    def seek(self, pos=0):
512        """Set the stream's file pointer to pos. Negative seeking
513           is forbidden.
514        """
515        if pos - self.pos >= 0:
516            blocks, remainder = divmod(pos - self.pos, self.bufsize)
517            for i in range(blocks):
518                self.read(self.bufsize)
519            self.read(remainder)
520        else:
521            raise StreamError("seeking backwards is not allowed")
522        return self.pos
523
524    def read(self, size):
525        """Return the next size number of bytes from the stream."""
526        assert size is not None
527        buf = self._read(size)
528        self.pos += len(buf)
529        return buf
530
531    def _read(self, size):
532        """Return size bytes from the stream.
533        """
534        if self.comptype == "tar":
535            return self.__read(size)
536
537        c = len(self.dbuf)
538        t = [self.dbuf]
539        while c < size:
540            # Skip underlying buffer to avoid unaligned double buffering.
541            if self.buf:
542                buf = self.buf
543                self.buf = b""
544            else:
545                buf = self.fileobj.read(self.bufsize)
546                if not buf:
547                    break
548            try:
549                buf = self.cmp.decompress(buf)
550            except self.exception as e:
551                raise ReadError("invalid compressed data") from e
552            t.append(buf)
553            c += len(buf)
554        t = b"".join(t)
555        self.dbuf = t[size:]
556        return t[:size]
557
558    def __read(self, size):
559        """Return size bytes from stream. If internal buffer is empty,
560           read another block from the stream.
561        """
562        c = len(self.buf)
563        t = [self.buf]
564        while c < size:
565            buf = self.fileobj.read(self.bufsize)
566            if not buf:
567                break
568            t.append(buf)
569            c += len(buf)
570        t = b"".join(t)
571        self.buf = t[size:]
572        return t[:size]
573# class _Stream
574
575class _StreamProxy(object):
576    """Small proxy class that enables transparent compression
577       detection for the Stream interface (mode 'r|*').
578    """
579
580    def __init__(self, fileobj):
581        self.fileobj = fileobj
582        self.buf = self.fileobj.read(BLOCKSIZE)
583
584    def read(self, size):
585        self.read = self.fileobj.read
586        return self.buf
587
588    def getcomptype(self):
589        if self.buf.startswith(b"\x1f\x8b\x08"):
590            return "gz"
591        elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
592            return "bz2"
593        elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
594            return "xz"
595        else:
596            return "tar"
597
598    def close(self):
599        self.fileobj.close()
600# class StreamProxy
601
602#------------------------
603# Extraction file object
604#------------------------
605class _FileInFile(object):
606    """A thin wrapper around an existing file object that
607       provides a part of its data as an individual file
608       object.
609    """
610
611    def __init__(self, fileobj, offset, size, name, blockinfo=None):
612        self.fileobj = fileobj
613        self.offset = offset
614        self.size = size
615        self.position = 0
616        self.name = name
617        self.closed = False
618
619        if blockinfo is None:
620            blockinfo = [(0, size)]
621
622        # Construct a map with data and zero blocks.
623        self.map_index = 0
624        self.map = []
625        lastpos = 0
626        realpos = self.offset
627        for offset, size in blockinfo:
628            if offset > lastpos:
629                self.map.append((False, lastpos, offset, None))
630            self.map.append((True, offset, offset + size, realpos))
631            realpos += size
632            lastpos = offset + size
633        if lastpos < self.size:
634            self.map.append((False, lastpos, self.size, None))
635
636    def flush(self):
637        pass
638
639    @property
640    def mode(self):
641        return 'rb'
642
643    def readable(self):
644        return True
645
646    def writable(self):
647        return False
648
649    def seekable(self):
650        return self.fileobj.seekable()
651
652    def tell(self):
653        """Return the current file position.
654        """
655        return self.position
656
657    def seek(self, position, whence=io.SEEK_SET):
658        """Seek to a position in the file.
659        """
660        if whence == io.SEEK_SET:
661            self.position = min(max(position, 0), self.size)
662        elif whence == io.SEEK_CUR:
663            if position < 0:
664                self.position = max(self.position + position, 0)
665            else:
666                self.position = min(self.position + position, self.size)
667        elif whence == io.SEEK_END:
668            self.position = max(min(self.size + position, self.size), 0)
669        else:
670            raise ValueError("Invalid argument")
671        return self.position
672
673    def read(self, size=None):
674        """Read data from the file.
675        """
676        if size is None:
677            size = self.size - self.position
678        else:
679            size = min(size, self.size - self.position)
680
681        buf = b""
682        while size > 0:
683            while True:
684                data, start, stop, offset = self.map[self.map_index]
685                if start <= self.position < stop:
686                    break
687                else:
688                    self.map_index += 1
689                    if self.map_index == len(self.map):
690                        self.map_index = 0
691            length = min(size, stop - self.position)
692            if data:
693                self.fileobj.seek(offset + (self.position - start))
694                b = self.fileobj.read(length)
695                if len(b) != length:
696                    raise ReadError("unexpected end of data")
697                buf += b
698            else:
699                buf += NUL * length
700            size -= length
701            self.position += length
702        return buf
703
704    def readinto(self, b):
705        buf = self.read(len(b))
706        b[:len(buf)] = buf
707        return len(buf)
708
709    def close(self):
710        self.closed = True
711#class _FileInFile
712
713class ExFileObject(io.BufferedReader):
714
715    def __init__(self, tarfile, tarinfo):
716        fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
717                tarinfo.size, tarinfo.name, tarinfo.sparse)
718        super().__init__(fileobj)
719#class ExFileObject
720
721
722#-----------------------------
723# extraction filters (PEP 706)
724#-----------------------------
725
726class FilterError(TarError):
727    pass
728
729class AbsolutePathError(FilterError):
730    def __init__(self, tarinfo):
731        self.tarinfo = tarinfo
732        super().__init__(f'member {tarinfo.name!r} has an absolute path')
733
734class OutsideDestinationError(FilterError):
735    def __init__(self, tarinfo, path):
736        self.tarinfo = tarinfo
737        self._path = path
738        super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, '
739                         + 'which is outside the destination')
740
741class SpecialFileError(FilterError):
742    def __init__(self, tarinfo):
743        self.tarinfo = tarinfo
744        super().__init__(f'{tarinfo.name!r} is a special file')
745
746class AbsoluteLinkError(FilterError):
747    def __init__(self, tarinfo):
748        self.tarinfo = tarinfo
749        super().__init__(f'{tarinfo.name!r} is a link to an absolute path')
750
751class LinkOutsideDestinationError(FilterError):
752    def __init__(self, tarinfo, path):
753        self.tarinfo = tarinfo
754        self._path = path
755        super().__init__(f'{tarinfo.name!r} would link to {path!r}, '
756                         + 'which is outside the destination')
757
758def _get_filtered_attrs(member, dest_path, for_data=True):
759    new_attrs = {}
760    name = member.name
761    dest_path = os.path.realpath(dest_path)
762    # Strip leading / (tar's directory separator) from filenames.
763    # Include os.sep (target OS directory separator) as well.
764    if name.startswith(('/', os.sep)):
765        name = new_attrs['name'] = member.path.lstrip('/' + os.sep)
766    if os.path.isabs(name):
767        # Path is absolute even after stripping.
768        # For example, 'C:/foo' on Windows.
769        raise AbsolutePathError(member)
770    # Ensure we stay in the destination
771    target_path = os.path.realpath(os.path.join(dest_path, name))
772    if os.path.commonpath([target_path, dest_path]) != dest_path:
773        raise OutsideDestinationError(member, target_path)
774    # Limit permissions (no high bits, and go-w)
775    mode = member.mode
776    if mode is not None:
777        # Strip high bits & group/other write bits
778        mode = mode & 0o755
779        if for_data:
780            # For data, handle permissions & file types
781            if member.isreg() or member.islnk():
782                if not mode & 0o100:
783                    # Clear executable bits if not executable by user
784                    mode &= ~0o111
785                # Ensure owner can read & write
786                mode |= 0o600
787            elif member.isdir() or member.issym():
788                # Ignore mode for directories & symlinks
789                mode = None
790            else:
791                # Reject special files
792                raise SpecialFileError(member)
793        if mode != member.mode:
794            new_attrs['mode'] = mode
795    if for_data:
796        # Ignore ownership for 'data'
797        if member.uid is not None:
798            new_attrs['uid'] = None
799        if member.gid is not None:
800            new_attrs['gid'] = None
801        if member.uname is not None:
802            new_attrs['uname'] = None
803        if member.gname is not None:
804            new_attrs['gname'] = None
805        # Check link destination for 'data'
806        if member.islnk() or member.issym():
807            if os.path.isabs(member.linkname):
808                raise AbsoluteLinkError(member)
809            if member.issym():
810                target_path = os.path.join(dest_path,
811                                           os.path.dirname(name),
812                                           member.linkname)
813            else:
814                target_path = os.path.join(dest_path,
815                                           member.linkname)
816            target_path = os.path.realpath(target_path)
817            if os.path.commonpath([target_path, dest_path]) != dest_path:
818                raise LinkOutsideDestinationError(member, target_path)
819    return new_attrs
820
821def fully_trusted_filter(member, dest_path):
822    return member
823
824def tar_filter(member, dest_path):
825    new_attrs = _get_filtered_attrs(member, dest_path, False)
826    if new_attrs:
827        return member.replace(**new_attrs, deep=False)
828    return member
829
830def data_filter(member, dest_path):
831    new_attrs = _get_filtered_attrs(member, dest_path, True)
832    if new_attrs:
833        return member.replace(**new_attrs, deep=False)
834    return member
835
836_NAMED_FILTERS = {
837    "fully_trusted": fully_trusted_filter,
838    "tar": tar_filter,
839    "data": data_filter,
840}
841
842#------------------
843# Exported Classes
844#------------------
845
846# Sentinel for replace() defaults, meaning "don't change the attribute"
847_KEEP = object()
848
849# Header length is digits followed by a space.
850_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ")
851
852class TarInfo(object):
853    """Informational class which holds the details about an
854       archive member given by a tar header block.
855       TarInfo objects are returned by TarFile.getmember(),
856       TarFile.getmembers() and TarFile.gettarinfo() and are
857       usually created internally.
858    """
859
860    __slots__ = dict(
861        name = 'Name of the archive member.',
862        mode = 'Permission bits.',
863        uid = 'User ID of the user who originally stored this member.',
864        gid = 'Group ID of the user who originally stored this member.',
865        size = 'Size in bytes.',
866        mtime = 'Time of last modification.',
867        chksum = 'Header checksum.',
868        type = ('File type. type is usually one of these constants: '
869                'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
870                'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
871        linkname = ('Name of the target file name, which is only present '
872                    'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
873        uname = 'User name.',
874        gname = 'Group name.',
875        devmajor = 'Device major number.',
876        devminor = 'Device minor number.',
877        offset = 'The tar header starts here.',
878        offset_data = "The file's data starts here.",
879        pax_headers = ('A dictionary containing key-value pairs of an '
880                       'associated pax extended header.'),
881        sparse = 'Sparse member information.',
882        _tarfile = None,
883        _sparse_structs = None,
884        _link_target = None,
885        )
886
887    def __init__(self, name=""):
888        """Construct a TarInfo object. name is the optional name
889           of the member.
890        """
891        self.name = name        # member name
892        self.mode = 0o644       # file permissions
893        self.uid = 0            # user id
894        self.gid = 0            # group id
895        self.size = 0           # file size
896        self.mtime = 0          # modification time
897        self.chksum = 0         # header checksum
898        self.type = REGTYPE     # member type
899        self.linkname = ""      # link name
900        self.uname = ""         # user name
901        self.gname = ""         # group name
902        self.devmajor = 0       # device major number
903        self.devminor = 0       # device minor number
904
905        self.offset = 0         # the tar header starts here
906        self.offset_data = 0    # the file's data starts here
907
908        self.sparse = None      # sparse member information
909        self.pax_headers = {}   # pax header information
910
911    @property
912    def tarfile(self):
913        import warnings
914        warnings.warn(
915            'The undocumented "tarfile" attribute of TarInfo objects '
916            + 'is deprecated and will be removed in Python 3.16',
917            DeprecationWarning, stacklevel=2)
918        return self._tarfile
919
920    @tarfile.setter
921    def tarfile(self, tarfile):
922        import warnings
923        warnings.warn(
924            'The undocumented "tarfile" attribute of TarInfo objects '
925            + 'is deprecated and will be removed in Python 3.16',
926            DeprecationWarning, stacklevel=2)
927        self._tarfile = tarfile
928
929    @property
930    def path(self):
931        'In pax headers, "name" is called "path".'
932        return self.name
933
934    @path.setter
935    def path(self, name):
936        self.name = name
937
938    @property
939    def linkpath(self):
940        'In pax headers, "linkname" is called "linkpath".'
941        return self.linkname
942
943    @linkpath.setter
944    def linkpath(self, linkname):
945        self.linkname = linkname
946
947    def __repr__(self):
948        return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
949
950    def replace(self, *,
951                name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP,
952                uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP,
953                deep=True, _KEEP=_KEEP):
954        """Return a deep copy of self with the given attributes replaced.
955        """
956        if deep:
957            result = copy.deepcopy(self)
958        else:
959            result = copy.copy(self)
960        if name is not _KEEP:
961            result.name = name
962        if mtime is not _KEEP:
963            result.mtime = mtime
964        if mode is not _KEEP:
965            result.mode = mode
966        if linkname is not _KEEP:
967            result.linkname = linkname
968        if uid is not _KEEP:
969            result.uid = uid
970        if gid is not _KEEP:
971            result.gid = gid
972        if uname is not _KEEP:
973            result.uname = uname
974        if gname is not _KEEP:
975            result.gname = gname
976        return result
977
978    def get_info(self):
979        """Return the TarInfo's attributes as a dictionary.
980        """
981        if self.mode is None:
982            mode = None
983        else:
984            mode = self.mode & 0o7777
985        info = {
986            "name":     self.name,
987            "mode":     mode,
988            "uid":      self.uid,
989            "gid":      self.gid,
990            "size":     self.size,
991            "mtime":    self.mtime,
992            "chksum":   self.chksum,
993            "type":     self.type,
994            "linkname": self.linkname,
995            "uname":    self.uname,
996            "gname":    self.gname,
997            "devmajor": self.devmajor,
998            "devminor": self.devminor
999        }
1000
1001        if info["type"] == DIRTYPE and not info["name"].endswith("/"):
1002            info["name"] += "/"
1003
1004        return info
1005
1006    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
1007        """Return a tar header as a string of 512 byte blocks.
1008        """
1009        info = self.get_info()
1010        for name, value in info.items():
1011            if value is None:
1012                raise ValueError("%s may not be None" % name)
1013
1014        if format == USTAR_FORMAT:
1015            return self.create_ustar_header(info, encoding, errors)
1016        elif format == GNU_FORMAT:
1017            return self.create_gnu_header(info, encoding, errors)
1018        elif format == PAX_FORMAT:
1019            return self.create_pax_header(info, encoding)
1020        else:
1021            raise ValueError("invalid format")
1022
1023    def create_ustar_header(self, info, encoding, errors):
1024        """Return the object as a ustar header block.
1025        """
1026        info["magic"] = POSIX_MAGIC
1027
1028        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
1029            raise ValueError("linkname is too long")
1030
1031        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
1032            info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
1033
1034        return self._create_header(info, USTAR_FORMAT, encoding, errors)
1035
1036    def create_gnu_header(self, info, encoding, errors):
1037        """Return the object as a GNU header block sequence.
1038        """
1039        info["magic"] = GNU_MAGIC
1040
1041        buf = b""
1042        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
1043            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
1044
1045        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
1046            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
1047
1048        return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
1049
1050    def create_pax_header(self, info, encoding):
1051        """Return the object as a ustar header block. If it cannot be
1052           represented this way, prepend a pax extended header sequence
1053           with supplement information.
1054        """
1055        info["magic"] = POSIX_MAGIC
1056        pax_headers = self.pax_headers.copy()
1057
1058        # Test string fields for values that exceed the field length or cannot
1059        # be represented in ASCII encoding.
1060        for name, hname, length in (
1061                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1062                ("uname", "uname", 32), ("gname", "gname", 32)):
1063
1064            if hname in pax_headers:
1065                # The pax header has priority.
1066                continue
1067
1068            # Try to encode the string as ASCII.
1069            try:
1070                info[name].encode("ascii", "strict")
1071            except UnicodeEncodeError:
1072                pax_headers[hname] = info[name]
1073                continue
1074
1075            if len(info[name]) > length:
1076                pax_headers[hname] = info[name]
1077
1078        # Test number fields for values that exceed the field limit or values
1079        # that like to be stored as float.
1080        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1081            needs_pax = False
1082
1083            val = info[name]
1084            val_is_float = isinstance(val, float)
1085            val_int = round(val) if val_is_float else val
1086            if not 0 <= val_int < 8 ** (digits - 1):
1087                # Avoid overflow.
1088                info[name] = 0
1089                needs_pax = True
1090            elif val_is_float:
1091                # Put rounded value in ustar header, and full
1092                # precision value in pax header.
1093                info[name] = val_int
1094                needs_pax = True
1095
1096            # The existing pax header has priority.
1097            if needs_pax and name not in pax_headers:
1098                pax_headers[name] = str(val)
1099
1100        # Create a pax extended header if necessary.
1101        if pax_headers:
1102            buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
1103        else:
1104            buf = b""
1105
1106        return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
1107
1108    @classmethod
1109    def create_pax_global_header(cls, pax_headers):
1110        """Return the object as a pax global header block sequence.
1111        """
1112        return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
1113
1114    def _posix_split_name(self, name, encoding, errors):
1115        """Split a name longer than 100 chars into a prefix
1116           and a name part.
1117        """
1118        components = name.split("/")
1119        for i in range(1, len(components)):
1120            prefix = "/".join(components[:i])
1121            name = "/".join(components[i:])
1122            if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
1123                    len(name.encode(encoding, errors)) <= LENGTH_NAME:
1124                break
1125        else:
1126            raise ValueError("name is too long")
1127
1128        return prefix, name
1129
1130    @staticmethod
1131    def _create_header(info, format, encoding, errors):
1132        """Return a header block. info is a dictionary with file
1133           information, format must be one of the *_FORMAT constants.
1134        """
1135        has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)
1136        if has_device_fields:
1137            devmajor = itn(info.get("devmajor", 0), 8, format)
1138            devminor = itn(info.get("devminor", 0), 8, format)
1139        else:
1140            devmajor = stn("", 8, encoding, errors)
1141            devminor = stn("", 8, encoding, errors)
1142
1143        # None values in metadata should cause ValueError.
1144        # itn()/stn() do this for all fields except type.
1145        filetype = info.get("type", REGTYPE)
1146        if filetype is None:
1147            raise ValueError("TarInfo.type must not be None")
1148
1149        parts = [
1150            stn(info.get("name", ""), 100, encoding, errors),
1151            itn(info.get("mode", 0) & 0o7777, 8, format),
1152            itn(info.get("uid", 0), 8, format),
1153            itn(info.get("gid", 0), 8, format),
1154            itn(info.get("size", 0), 12, format),
1155            itn(info.get("mtime", 0), 12, format),
1156            b"        ", # checksum field
1157            filetype,
1158            stn(info.get("linkname", ""), 100, encoding, errors),
1159            info.get("magic", POSIX_MAGIC),
1160            stn(info.get("uname", ""), 32, encoding, errors),
1161            stn(info.get("gname", ""), 32, encoding, errors),
1162            devmajor,
1163            devminor,
1164            stn(info.get("prefix", ""), 155, encoding, errors)
1165        ]
1166
1167        buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
1168        chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1169        buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
1170        return buf
1171
1172    @staticmethod
1173    def _create_payload(payload):
1174        """Return the string payload filled with zero bytes
1175           up to the next 512 byte border.
1176        """
1177        blocks, remainder = divmod(len(payload), BLOCKSIZE)
1178        if remainder > 0:
1179            payload += (BLOCKSIZE - remainder) * NUL
1180        return payload
1181
1182    @classmethod
1183    def _create_gnu_long_header(cls, name, type, encoding, errors):
1184        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1185           for name.
1186        """
1187        name = name.encode(encoding, errors) + NUL
1188
1189        info = {}
1190        info["name"] = "././@LongLink"
1191        info["type"] = type
1192        info["size"] = len(name)
1193        info["magic"] = GNU_MAGIC
1194
1195        # create extended header + name blocks.
1196        return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
1197                cls._create_payload(name)
1198
1199    @classmethod
1200    def _create_pax_generic_header(cls, pax_headers, type, encoding):
1201        """Return a POSIX.1-2008 extended or global header sequence
1202           that contains a list of keyword, value pairs. The values
1203           must be strings.
1204        """
1205        # Check if one of the fields contains surrogate characters and thereby
1206        # forces hdrcharset=BINARY, see _proc_pax() for more information.
1207        binary = False
1208        for keyword, value in pax_headers.items():
1209            try:
1210                value.encode("utf-8", "strict")
1211            except UnicodeEncodeError:
1212                binary = True
1213                break
1214
1215        records = b""
1216        if binary:
1217            # Put the hdrcharset field at the beginning of the header.
1218            records += b"21 hdrcharset=BINARY\n"
1219
1220        for keyword, value in pax_headers.items():
1221            keyword = keyword.encode("utf-8")
1222            if binary:
1223                # Try to restore the original byte representation of `value'.
1224                # Needless to say, that the encoding must match the string.
1225                value = value.encode(encoding, "surrogateescape")
1226            else:
1227                value = value.encode("utf-8")
1228
1229            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1230            n = p = 0
1231            while True:
1232                n = l + len(str(p))
1233                if n == p:
1234                    break
1235                p = n
1236            records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1237
1238        # We use a hardcoded "././@PaxHeader" name like star does
1239        # instead of the one that POSIX recommends.
1240        info = {}
1241        info["name"] = "././@PaxHeader"
1242        info["type"] = type
1243        info["size"] = len(records)
1244        info["magic"] = POSIX_MAGIC
1245
1246        # Create pax header + record blocks.
1247        return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1248                cls._create_payload(records)
1249
1250    @classmethod
1251    def frombuf(cls, buf, encoding, errors):
1252        """Construct a TarInfo object from a 512 byte bytes object.
1253        """
1254        if len(buf) == 0:
1255            raise EmptyHeaderError("empty header")
1256        if len(buf) != BLOCKSIZE:
1257            raise TruncatedHeaderError("truncated header")
1258        if buf.count(NUL) == BLOCKSIZE:
1259            raise EOFHeaderError("end of file header")
1260
1261        chksum = nti(buf[148:156])
1262        if chksum not in calc_chksums(buf):
1263            raise InvalidHeaderError("bad checksum")
1264
1265        obj = cls()
1266        obj.name = nts(buf[0:100], encoding, errors)
1267        obj.mode = nti(buf[100:108])
1268        obj.uid = nti(buf[108:116])
1269        obj.gid = nti(buf[116:124])
1270        obj.size = nti(buf[124:136])
1271        obj.mtime = nti(buf[136:148])
1272        obj.chksum = chksum
1273        obj.type = buf[156:157]
1274        obj.linkname = nts(buf[157:257], encoding, errors)
1275        obj.uname = nts(buf[265:297], encoding, errors)
1276        obj.gname = nts(buf[297:329], encoding, errors)
1277        obj.devmajor = nti(buf[329:337])
1278        obj.devminor = nti(buf[337:345])
1279        prefix = nts(buf[345:500], encoding, errors)
1280
1281        # Old V7 tar format represents a directory as a regular
1282        # file with a trailing slash.
1283        if obj.type == AREGTYPE and obj.name.endswith("/"):
1284            obj.type = DIRTYPE
1285
1286        # The old GNU sparse format occupies some of the unused
1287        # space in the buffer for up to 4 sparse structures.
1288        # Save them for later processing in _proc_sparse().
1289        if obj.type == GNUTYPE_SPARSE:
1290            pos = 386
1291            structs = []
1292            for i in range(4):
1293                try:
1294                    offset = nti(buf[pos:pos + 12])
1295                    numbytes = nti(buf[pos + 12:pos + 24])
1296                except ValueError:
1297                    break
1298                structs.append((offset, numbytes))
1299                pos += 24
1300            isextended = bool(buf[482])
1301            origsize = nti(buf[483:495])
1302            obj._sparse_structs = (structs, isextended, origsize)
1303
1304        # Remove redundant slashes from directories.
1305        if obj.isdir():
1306            obj.name = obj.name.rstrip("/")
1307
1308        # Reconstruct a ustar longname.
1309        if prefix and obj.type not in GNU_TYPES:
1310            obj.name = prefix + "/" + obj.name
1311        return obj
1312
1313    @classmethod
1314    def fromtarfile(cls, tarfile):
1315        """Return the next TarInfo object from TarFile object
1316           tarfile.
1317        """
1318        buf = tarfile.fileobj.read(BLOCKSIZE)
1319        obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1320        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1321        return obj._proc_member(tarfile)
1322
1323    #--------------------------------------------------------------------------
1324    # The following are methods that are called depending on the type of a
1325    # member. The entry point is _proc_member() which can be overridden in a
1326    # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1327    # implement the following
1328    # operations:
1329    # 1. Set self.offset_data to the position where the data blocks begin,
1330    #    if there is data that follows.
1331    # 2. Set tarfile.offset to the position where the next member's header will
1332    #    begin.
1333    # 3. Return self or another valid TarInfo object.
1334    def _proc_member(self, tarfile):
1335        """Choose the right processing method depending on
1336           the type and call it.
1337        """
1338        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1339            return self._proc_gnulong(tarfile)
1340        elif self.type == GNUTYPE_SPARSE:
1341            return self._proc_sparse(tarfile)
1342        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1343            return self._proc_pax(tarfile)
1344        else:
1345            return self._proc_builtin(tarfile)
1346
1347    def _proc_builtin(self, tarfile):
1348        """Process a builtin type or an unknown type which
1349           will be treated as a regular file.
1350        """
1351        self.offset_data = tarfile.fileobj.tell()
1352        offset = self.offset_data
1353        if self.isreg() or self.type not in SUPPORTED_TYPES:
1354            # Skip the following data blocks.
1355            offset += self._block(self.size)
1356        tarfile.offset = offset
1357
1358        # Patch the TarInfo object with saved global
1359        # header information.
1360        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1361
1362        # Remove redundant slashes from directories. This is to be consistent
1363        # with frombuf().
1364        if self.isdir():
1365            self.name = self.name.rstrip("/")
1366
1367        return self
1368
1369    def _proc_gnulong(self, tarfile):
1370        """Process the blocks that hold a GNU longname
1371           or longlink member.
1372        """
1373        buf = tarfile.fileobj.read(self._block(self.size))
1374
1375        # Fetch the next header and process it.
1376        try:
1377            next = self.fromtarfile(tarfile)
1378        except HeaderError as e:
1379            raise SubsequentHeaderError(str(e)) from None
1380
1381        # Patch the TarInfo object from the next header with
1382        # the longname information.
1383        next.offset = self.offset
1384        if self.type == GNUTYPE_LONGNAME:
1385            next.name = nts(buf, tarfile.encoding, tarfile.errors)
1386        elif self.type == GNUTYPE_LONGLINK:
1387            next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1388
1389        # Remove redundant slashes from directories. This is to be consistent
1390        # with frombuf().
1391        if next.isdir():
1392            next.name = next.name.removesuffix("/")
1393
1394        return next
1395
1396    def _proc_sparse(self, tarfile):
1397        """Process a GNU sparse header plus extra headers.
1398        """
1399        # We already collected some sparse structures in frombuf().
1400        structs, isextended, origsize = self._sparse_structs
1401        del self._sparse_structs
1402
1403        # Collect sparse structures from extended header blocks.
1404        while isextended:
1405            buf = tarfile.fileobj.read(BLOCKSIZE)
1406            pos = 0
1407            for i in range(21):
1408                try:
1409                    offset = nti(buf[pos:pos + 12])
1410                    numbytes = nti(buf[pos + 12:pos + 24])
1411                except ValueError:
1412                    break
1413                if offset and numbytes:
1414                    structs.append((offset, numbytes))
1415                pos += 24
1416            isextended = bool(buf[504])
1417        self.sparse = structs
1418
1419        self.offset_data = tarfile.fileobj.tell()
1420        tarfile.offset = self.offset_data + self._block(self.size)
1421        self.size = origsize
1422        return self
1423
1424    def _proc_pax(self, tarfile):
1425        """Process an extended or global header as described in
1426           POSIX.1-2008.
1427        """
1428        # Read the header information.
1429        buf = tarfile.fileobj.read(self._block(self.size))
1430
1431        # A pax header stores supplemental information for either
1432        # the following file (extended) or all following files
1433        # (global).
1434        if self.type == XGLTYPE:
1435            pax_headers = tarfile.pax_headers
1436        else:
1437            pax_headers = tarfile.pax_headers.copy()
1438
1439        # Parse pax header information. A record looks like that:
1440        # "%d %s=%s\n" % (length, keyword, value). length is the size
1441        # of the complete record including the length field itself and
1442        # the newline.
1443        pos = 0
1444        encoding = None
1445        raw_headers = []
1446        while len(buf) > pos and buf[pos] != 0x00:
1447            if not (match := _header_length_prefix_re.match(buf, pos)):
1448                raise InvalidHeaderError("invalid header")
1449            try:
1450                length = int(match.group(1))
1451            except ValueError:
1452                raise InvalidHeaderError("invalid header")
1453            # Headers must be at least 5 bytes, shortest being '5 x=\n'.
1454            # Value is allowed to be empty.
1455            if length < 5:
1456                raise InvalidHeaderError("invalid header")
1457            if pos + length > len(buf):
1458                raise InvalidHeaderError("invalid header")
1459
1460            header_value_end_offset = match.start(1) + length - 1  # Last byte of the header
1461            keyword_and_value = buf[match.end(1) + 1:header_value_end_offset]
1462            raw_keyword, equals, raw_value = keyword_and_value.partition(b"=")
1463
1464            # Check the framing of the header. The last character must be '\n' (0x0A)
1465            if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A:
1466                raise InvalidHeaderError("invalid header")
1467            raw_headers.append((length, raw_keyword, raw_value))
1468
1469            # Check if the pax header contains a hdrcharset field. This tells us
1470            # the encoding of the path, linkpath, uname and gname fields. Normally,
1471            # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1472            # implementations are allowed to store them as raw binary strings if
1473            # the translation to UTF-8 fails. For the time being, we don't care about
1474            # anything other than "BINARY". The only other value that is currently
1475            # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1476            # Note that we only follow the initial 'hdrcharset' setting to preserve
1477            # the initial behavior of the 'tarfile' module.
1478            if raw_keyword == b"hdrcharset" and encoding is None:
1479                if raw_value == b"BINARY":
1480                    encoding = tarfile.encoding
1481                else:  # This branch ensures only the first 'hdrcharset' header is used.
1482                    encoding = "utf-8"
1483
1484            pos += length
1485
1486        # If no explicit hdrcharset is set, we use UTF-8 as a default.
1487        if encoding is None:
1488            encoding = "utf-8"
1489
1490        # After parsing the raw headers we can decode them to text.
1491        for length, raw_keyword, raw_value in raw_headers:
1492            # Normally, we could just use "utf-8" as the encoding and "strict"
1493            # as the error handler, but we better not take the risk. For
1494            # example, GNU tar <= 1.23 is known to store filenames it cannot
1495            # translate to UTF-8 as raw strings (unfortunately without a
1496            # hdrcharset=BINARY header).
1497            # We first try the strict standard encoding, and if that fails we
1498            # fall back on the user's encoding and error handler.
1499            keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8",
1500                    tarfile.errors)
1501            if keyword in PAX_NAME_FIELDS:
1502                value = self._decode_pax_field(raw_value, encoding, tarfile.encoding,
1503                        tarfile.errors)
1504            else:
1505                value = self._decode_pax_field(raw_value, "utf-8", "utf-8",
1506                        tarfile.errors)
1507
1508            pax_headers[keyword] = value
1509
1510        # Fetch the next header.
1511        try:
1512            next = self.fromtarfile(tarfile)
1513        except HeaderError as e:
1514            raise SubsequentHeaderError(str(e)) from None
1515
1516        # Process GNU sparse information.
1517        if "GNU.sparse.map" in pax_headers:
1518            # GNU extended sparse format version 0.1.
1519            self._proc_gnusparse_01(next, pax_headers)
1520
1521        elif "GNU.sparse.size" in pax_headers:
1522            # GNU extended sparse format version 0.0.
1523            self._proc_gnusparse_00(next, raw_headers)
1524
1525        elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1526            # GNU extended sparse format version 1.0.
1527            self._proc_gnusparse_10(next, pax_headers, tarfile)
1528
1529        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1530            # Patch the TarInfo object with the extended header info.
1531            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1532            next.offset = self.offset
1533
1534            if "size" in pax_headers:
1535                # If the extended header replaces the size field,
1536                # we need to recalculate the offset where the next
1537                # header starts.
1538                offset = next.offset_data
1539                if next.isreg() or next.type not in SUPPORTED_TYPES:
1540                    offset += next._block(next.size)
1541                tarfile.offset = offset
1542
1543        return next
1544
1545    def _proc_gnusparse_00(self, next, raw_headers):
1546        """Process a GNU tar extended sparse header, version 0.0.
1547        """
1548        offsets = []
1549        numbytes = []
1550        for _, keyword, value in raw_headers:
1551            if keyword == b"GNU.sparse.offset":
1552                try:
1553                    offsets.append(int(value.decode()))
1554                except ValueError:
1555                    raise InvalidHeaderError("invalid header")
1556
1557            elif keyword == b"GNU.sparse.numbytes":
1558                try:
1559                    numbytes.append(int(value.decode()))
1560                except ValueError:
1561                    raise InvalidHeaderError("invalid header")
1562
1563        next.sparse = list(zip(offsets, numbytes))
1564
1565    def _proc_gnusparse_01(self, next, pax_headers):
1566        """Process a GNU tar extended sparse header, version 0.1.
1567        """
1568        sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1569        next.sparse = list(zip(sparse[::2], sparse[1::2]))
1570
1571    def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1572        """Process a GNU tar extended sparse header, version 1.0.
1573        """
1574        fields = None
1575        sparse = []
1576        buf = tarfile.fileobj.read(BLOCKSIZE)
1577        fields, buf = buf.split(b"\n", 1)
1578        fields = int(fields)
1579        while len(sparse) < fields * 2:
1580            if b"\n" not in buf:
1581                buf += tarfile.fileobj.read(BLOCKSIZE)
1582            number, buf = buf.split(b"\n", 1)
1583            sparse.append(int(number))
1584        next.offset_data = tarfile.fileobj.tell()
1585        next.sparse = list(zip(sparse[::2], sparse[1::2]))
1586
1587    def _apply_pax_info(self, pax_headers, encoding, errors):
1588        """Replace fields with supplemental information from a previous
1589           pax extended or global header.
1590        """
1591        for keyword, value in pax_headers.items():
1592            if keyword == "GNU.sparse.name":
1593                setattr(self, "path", value)
1594            elif keyword == "GNU.sparse.size":
1595                setattr(self, "size", int(value))
1596            elif keyword == "GNU.sparse.realsize":
1597                setattr(self, "size", int(value))
1598            elif keyword in PAX_FIELDS:
1599                if keyword in PAX_NUMBER_FIELDS:
1600                    try:
1601                        value = PAX_NUMBER_FIELDS[keyword](value)
1602                    except ValueError:
1603                        value = 0
1604                if keyword == "path":
1605                    value = value.rstrip("/")
1606                setattr(self, keyword, value)
1607
1608        self.pax_headers = pax_headers.copy()
1609
1610    def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1611        """Decode a single field from a pax record.
1612        """
1613        try:
1614            return value.decode(encoding, "strict")
1615        except UnicodeDecodeError:
1616            return value.decode(fallback_encoding, fallback_errors)
1617
1618    def _block(self, count):
1619        """Round up a byte count by BLOCKSIZE and return it,
1620           e.g. _block(834) => 1024.
1621        """
1622        blocks, remainder = divmod(count, BLOCKSIZE)
1623        if remainder:
1624            blocks += 1
1625        return blocks * BLOCKSIZE
1626
1627    def isreg(self):
1628        'Return True if the Tarinfo object is a regular file.'
1629        return self.type in REGULAR_TYPES
1630
1631    def isfile(self):
1632        'Return True if the Tarinfo object is a regular file.'
1633        return self.isreg()
1634
1635    def isdir(self):
1636        'Return True if it is a directory.'
1637        return self.type == DIRTYPE
1638
1639    def issym(self):
1640        'Return True if it is a symbolic link.'
1641        return self.type == SYMTYPE
1642
1643    def islnk(self):
1644        'Return True if it is a hard link.'
1645        return self.type == LNKTYPE
1646
1647    def ischr(self):
1648        'Return True if it is a character device.'
1649        return self.type == CHRTYPE
1650
1651    def isblk(self):
1652        'Return True if it is a block device.'
1653        return self.type == BLKTYPE
1654
1655    def isfifo(self):
1656        'Return True if it is a FIFO.'
1657        return self.type == FIFOTYPE
1658
1659    def issparse(self):
1660        return self.sparse is not None
1661
1662    def isdev(self):
1663        'Return True if it is one of character device, block device or FIFO.'
1664        return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1665# class TarInfo
1666
1667class TarFile(object):
1668    """The TarFile Class provides an interface to tar archives.
1669    """
1670
1671    debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1672
1673    dereference = False         # If true, add content of linked file to the
1674                                # tar file, else the link.
1675
1676    ignore_zeros = False        # If true, skips empty or invalid blocks and
1677                                # continues processing.
1678
1679    errorlevel = 1              # If 0, fatal errors only appear in debug
1680                                # messages (if debug >= 0). If > 0, errors
1681                                # are passed to the caller as exceptions.
1682
1683    format = DEFAULT_FORMAT     # The format to use when creating an archive.
1684
1685    encoding = ENCODING         # Encoding for 8-bit character strings.
1686
1687    errors = None               # Error handler for unicode conversion.
1688
1689    tarinfo = TarInfo           # The default TarInfo class to use.
1690
1691    fileobject = ExFileObject   # The file-object for extractfile().
1692
1693    extraction_filter = None    # The default filter for extraction.
1694
1695    def __init__(self, name=None, mode="r", fileobj=None, format=None,
1696            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1697            errors="surrogateescape", pax_headers=None, debug=None,
1698            errorlevel=None, copybufsize=None, stream=False):
1699        """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1700           read from an existing archive, 'a' to append data to an existing
1701           file or 'w' to create a new file overwriting an existing one. `mode'
1702           defaults to 'r'.
1703           If `fileobj' is given, it is used for reading or writing data. If it
1704           can be determined, `mode' is overridden by `fileobj's mode.
1705           `fileobj' is not closed, when TarFile is closed.
1706        """
1707        modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
1708        if mode not in modes:
1709            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1710        self.mode = mode
1711        self._mode = modes[mode]
1712
1713        if not fileobj:
1714            if self.mode == "a" and not os.path.exists(name):
1715                # Create nonexistent files in append mode.
1716                self.mode = "w"
1717                self._mode = "wb"
1718            fileobj = bltn_open(name, self._mode)
1719            self._extfileobj = False
1720        else:
1721            if (name is None and hasattr(fileobj, "name") and
1722                isinstance(fileobj.name, (str, bytes))):
1723                name = fileobj.name
1724            if hasattr(fileobj, "mode"):
1725                self._mode = fileobj.mode
1726            self._extfileobj = True
1727        self.name = os.path.abspath(name) if name else None
1728        self.fileobj = fileobj
1729
1730        self.stream = stream
1731
1732        # Init attributes.
1733        if format is not None:
1734            self.format = format
1735        if tarinfo is not None:
1736            self.tarinfo = tarinfo
1737        if dereference is not None:
1738            self.dereference = dereference
1739        if ignore_zeros is not None:
1740            self.ignore_zeros = ignore_zeros
1741        if encoding is not None:
1742            self.encoding = encoding
1743        self.errors = errors
1744
1745        if pax_headers is not None and self.format == PAX_FORMAT:
1746            self.pax_headers = pax_headers
1747        else:
1748            self.pax_headers = {}
1749
1750        if debug is not None:
1751            self.debug = debug
1752        if errorlevel is not None:
1753            self.errorlevel = errorlevel
1754
1755        # Init datastructures.
1756        self.copybufsize = copybufsize
1757        self.closed = False
1758        self.members = []       # list of members as TarInfo objects
1759        self._loaded = False    # flag if all members have been read
1760        self.offset = self.fileobj.tell()
1761                                # current position in the archive file
1762        self.inodes = {}        # dictionary caching the inodes of
1763                                # archive members already added
1764
1765        try:
1766            if self.mode == "r":
1767                self.firstmember = None
1768                self.firstmember = self.next()
1769
1770            if self.mode == "a":
1771                # Move to the end of the archive,
1772                # before the first empty block.
1773                while True:
1774                    self.fileobj.seek(self.offset)
1775                    try:
1776                        tarinfo = self.tarinfo.fromtarfile(self)
1777                        self.members.append(tarinfo)
1778                    except EOFHeaderError:
1779                        self.fileobj.seek(self.offset)
1780                        break
1781                    except HeaderError as e:
1782                        raise ReadError(str(e)) from None
1783
1784            if self.mode in ("a", "w", "x"):
1785                self._loaded = True
1786
1787                if self.pax_headers:
1788                    buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1789                    self.fileobj.write(buf)
1790                    self.offset += len(buf)
1791        except:
1792            if not self._extfileobj:
1793                self.fileobj.close()
1794            self.closed = True
1795            raise
1796
1797    #--------------------------------------------------------------------------
1798    # Below are the classmethods which act as alternate constructors to the
1799    # TarFile class. The open() method is the only one that is needed for
1800    # public use; it is the "super"-constructor and is able to select an
1801    # adequate "sub"-constructor for a particular compression using the mapping
1802    # from OPEN_METH.
1803    #
1804    # This concept allows one to subclass TarFile without losing the comfort of
1805    # the super-constructor. A sub-constructor is registered and made available
1806    # by adding it to the mapping in OPEN_METH.
1807
1808    @classmethod
1809    def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1810        """Open a tar archive for reading, writing or appending. Return
1811           an appropriate TarFile class.
1812
1813           mode:
1814           'r' or 'r:*' open for reading with transparent compression
1815           'r:'         open for reading exclusively uncompressed
1816           'r:gz'       open for reading with gzip compression
1817           'r:bz2'      open for reading with bzip2 compression
1818           'r:xz'       open for reading with lzma compression
1819           'a' or 'a:'  open for appending, creating the file if necessary
1820           'w' or 'w:'  open for writing without compression
1821           'w:gz'       open for writing with gzip compression
1822           'w:bz2'      open for writing with bzip2 compression
1823           'w:xz'       open for writing with lzma compression
1824
1825           'x' or 'x:'  create a tarfile exclusively without compression, raise
1826                        an exception if the file is already created
1827           'x:gz'       create a gzip compressed tarfile, raise an exception
1828                        if the file is already created
1829           'x:bz2'      create a bzip2 compressed tarfile, raise an exception
1830                        if the file is already created
1831           'x:xz'       create an lzma compressed tarfile, raise an exception
1832                        if the file is already created
1833
1834           'r|*'        open a stream of tar blocks with transparent compression
1835           'r|'         open an uncompressed stream of tar blocks for reading
1836           'r|gz'       open a gzip compressed stream of tar blocks
1837           'r|bz2'      open a bzip2 compressed stream of tar blocks
1838           'r|xz'       open an lzma compressed stream of tar blocks
1839           'w|'         open an uncompressed stream for writing
1840           'w|gz'       open a gzip compressed stream for writing
1841           'w|bz2'      open a bzip2 compressed stream for writing
1842           'w|xz'       open an lzma compressed stream for writing
1843        """
1844
1845        if not name and not fileobj:
1846            raise ValueError("nothing to open")
1847
1848        if mode in ("r", "r:*"):
1849            # Find out which *open() is appropriate for opening the file.
1850            def not_compressed(comptype):
1851                return cls.OPEN_METH[comptype] == 'taropen'
1852            error_msgs = []
1853            for comptype in sorted(cls.OPEN_METH, key=not_compressed):
1854                func = getattr(cls, cls.OPEN_METH[comptype])
1855                if fileobj is not None:
1856                    saved_pos = fileobj.tell()
1857                try:
1858                    return func(name, "r", fileobj, **kwargs)
1859                except (ReadError, CompressionError) as e:
1860                    error_msgs.append(f'- method {comptype}: {e!r}')
1861                    if fileobj is not None:
1862                        fileobj.seek(saved_pos)
1863                    continue
1864            error_msgs_summary = '\n'.join(error_msgs)
1865            raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}")
1866
1867        elif ":" in mode:
1868            filemode, comptype = mode.split(":", 1)
1869            filemode = filemode or "r"
1870            comptype = comptype or "tar"
1871
1872            # Select the *open() function according to
1873            # given compression.
1874            if comptype in cls.OPEN_METH:
1875                func = getattr(cls, cls.OPEN_METH[comptype])
1876            else:
1877                raise CompressionError("unknown compression type %r" % comptype)
1878            return func(name, filemode, fileobj, **kwargs)
1879
1880        elif "|" in mode:
1881            filemode, comptype = mode.split("|", 1)
1882            filemode = filemode or "r"
1883            comptype = comptype or "tar"
1884
1885            if filemode not in ("r", "w"):
1886                raise ValueError("mode must be 'r' or 'w'")
1887
1888            compresslevel = kwargs.pop("compresslevel", 9)
1889            stream = _Stream(name, filemode, comptype, fileobj, bufsize,
1890                             compresslevel)
1891            try:
1892                t = cls(name, filemode, stream, **kwargs)
1893            except:
1894                stream.close()
1895                raise
1896            t._extfileobj = False
1897            return t
1898
1899        elif mode in ("a", "w", "x"):
1900            return cls.taropen(name, mode, fileobj, **kwargs)
1901
1902        raise ValueError("undiscernible mode")
1903
1904    @classmethod
1905    def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1906        """Open uncompressed tar archive name for reading or writing.
1907        """
1908        if mode not in ("r", "a", "w", "x"):
1909            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1910        return cls(name, mode, fileobj, **kwargs)
1911
1912    @classmethod
1913    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1914        """Open gzip compressed tar archive name for reading or writing.
1915           Appending is not allowed.
1916        """
1917        if mode not in ("r", "w", "x"):
1918            raise ValueError("mode must be 'r', 'w' or 'x'")
1919
1920        try:
1921            from gzip import GzipFile
1922        except ImportError:
1923            raise CompressionError("gzip module is not available") from None
1924
1925        try:
1926            fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
1927        except OSError as e:
1928            if fileobj is not None and mode == 'r':
1929                raise ReadError("not a gzip file") from e
1930            raise
1931
1932        try:
1933            t = cls.taropen(name, mode, fileobj, **kwargs)
1934        except OSError as e:
1935            fileobj.close()
1936            if mode == 'r':
1937                raise ReadError("not a gzip file") from e
1938            raise
1939        except:
1940            fileobj.close()
1941            raise
1942        t._extfileobj = False
1943        return t
1944
1945    @classmethod
1946    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1947        """Open bzip2 compressed tar archive name for reading or writing.
1948           Appending is not allowed.
1949        """
1950        if mode not in ("r", "w", "x"):
1951            raise ValueError("mode must be 'r', 'w' or 'x'")
1952
1953        try:
1954            from bz2 import BZ2File
1955        except ImportError:
1956            raise CompressionError("bz2 module is not available") from None
1957
1958        fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)
1959
1960        try:
1961            t = cls.taropen(name, mode, fileobj, **kwargs)
1962        except (OSError, EOFError) as e:
1963            fileobj.close()
1964            if mode == 'r':
1965                raise ReadError("not a bzip2 file") from e
1966            raise
1967        except:
1968            fileobj.close()
1969            raise
1970        t._extfileobj = False
1971        return t
1972
1973    @classmethod
1974    def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
1975        """Open lzma compressed tar archive name for reading or writing.
1976           Appending is not allowed.
1977        """
1978        if mode not in ("r", "w", "x"):
1979            raise ValueError("mode must be 'r', 'w' or 'x'")
1980
1981        try:
1982            from lzma import LZMAFile, LZMAError
1983        except ImportError:
1984            raise CompressionError("lzma module is not available") from None
1985
1986        fileobj = LZMAFile(fileobj or name, mode, preset=preset)
1987
1988        try:
1989            t = cls.taropen(name, mode, fileobj, **kwargs)
1990        except (LZMAError, EOFError) as e:
1991            fileobj.close()
1992            if mode == 'r':
1993                raise ReadError("not an lzma file") from e
1994            raise
1995        except:
1996            fileobj.close()
1997            raise
1998        t._extfileobj = False
1999        return t
2000
2001    # All *open() methods are registered here.
2002    OPEN_METH = {
2003        "tar": "taropen",   # uncompressed tar
2004        "gz":  "gzopen",    # gzip compressed tar
2005        "bz2": "bz2open",   # bzip2 compressed tar
2006        "xz":  "xzopen"     # lzma compressed tar
2007    }
2008
2009    #--------------------------------------------------------------------------
2010    # The public methods which TarFile provides:
2011
2012    def close(self):
2013        """Close the TarFile. In write-mode, two finishing zero blocks are
2014           appended to the archive.
2015        """
2016        if self.closed:
2017            return
2018
2019        self.closed = True
2020        try:
2021            if self.mode in ("a", "w", "x"):
2022                self.fileobj.write(NUL * (BLOCKSIZE * 2))
2023                self.offset += (BLOCKSIZE * 2)
2024                # fill up the end with zero-blocks
2025                # (like option -b20 for tar does)
2026                blocks, remainder = divmod(self.offset, RECORDSIZE)
2027                if remainder > 0:
2028                    self.fileobj.write(NUL * (RECORDSIZE - remainder))
2029        finally:
2030            if not self._extfileobj:
2031                self.fileobj.close()
2032
2033    def getmember(self, name):
2034        """Return a TarInfo object for member `name'. If `name' can not be
2035           found in the archive, KeyError is raised. If a member occurs more
2036           than once in the archive, its last occurrence is assumed to be the
2037           most up-to-date version.
2038        """
2039        tarinfo = self._getmember(name.rstrip('/'))
2040        if tarinfo is None:
2041            raise KeyError("filename %r not found" % name)
2042        return tarinfo
2043
2044    def getmembers(self):
2045        """Return the members of the archive as a list of TarInfo objects. The
2046           list has the same order as the members in the archive.
2047        """
2048        self._check()
2049        if not self._loaded:    # if we want to obtain a list of
2050            self._load()        # all members, we first have to
2051                                # scan the whole archive.
2052        return self.members
2053
2054    def getnames(self):
2055        """Return the members of the archive as a list of their names. It has
2056           the same order as the list returned by getmembers().
2057        """
2058        return [tarinfo.name for tarinfo in self.getmembers()]
2059
2060    def gettarinfo(self, name=None, arcname=None, fileobj=None):
2061        """Create a TarInfo object from the result of os.stat or equivalent
2062           on an existing file. The file is either named by `name', or
2063           specified as a file object `fileobj' with a file descriptor. If
2064           given, `arcname' specifies an alternative name for the file in the
2065           archive, otherwise, the name is taken from the 'name' attribute of
2066           'fileobj', or the 'name' argument. The name should be a text
2067           string.
2068        """
2069        self._check("awx")
2070
2071        # When fileobj is given, replace name by
2072        # fileobj's real name.
2073        if fileobj is not None:
2074            name = fileobj.name
2075
2076        # Building the name of the member in the archive.
2077        # Backward slashes are converted to forward slashes,
2078        # Absolute paths are turned to relative paths.
2079        if arcname is None:
2080            arcname = name
2081        drv, arcname = os.path.splitdrive(arcname)
2082        arcname = arcname.replace(os.sep, "/")
2083        arcname = arcname.lstrip("/")
2084
2085        # Now, fill the TarInfo object with
2086        # information specific for the file.
2087        tarinfo = self.tarinfo()
2088        tarinfo._tarfile = self  # To be removed in 3.16.
2089
2090        # Use os.stat or os.lstat, depending on if symlinks shall be resolved.
2091        if fileobj is None:
2092            if not self.dereference:
2093                statres = os.lstat(name)
2094            else:
2095                statres = os.stat(name)
2096        else:
2097            statres = os.fstat(fileobj.fileno())
2098        linkname = ""
2099
2100        stmd = statres.st_mode
2101        if stat.S_ISREG(stmd):
2102            inode = (statres.st_ino, statres.st_dev)
2103            if not self.dereference and statres.st_nlink > 1 and \
2104                    inode in self.inodes and arcname != self.inodes[inode]:
2105                # Is it a hardlink to an already
2106                # archived file?
2107                type = LNKTYPE
2108                linkname = self.inodes[inode]
2109            else:
2110                # The inode is added only if its valid.
2111                # For win32 it is always 0.
2112                type = REGTYPE
2113                if inode[0]:
2114                    self.inodes[inode] = arcname
2115        elif stat.S_ISDIR(stmd):
2116            type = DIRTYPE
2117        elif stat.S_ISFIFO(stmd):
2118            type = FIFOTYPE
2119        elif stat.S_ISLNK(stmd):
2120            type = SYMTYPE
2121            linkname = os.readlink(name)
2122        elif stat.S_ISCHR(stmd):
2123            type = CHRTYPE
2124        elif stat.S_ISBLK(stmd):
2125            type = BLKTYPE
2126        else:
2127            return None
2128
2129        # Fill the TarInfo object with all
2130        # information we can get.
2131        tarinfo.name = arcname
2132        tarinfo.mode = stmd
2133        tarinfo.uid = statres.st_uid
2134        tarinfo.gid = statres.st_gid
2135        if type == REGTYPE:
2136            tarinfo.size = statres.st_size
2137        else:
2138            tarinfo.size = 0
2139        tarinfo.mtime = statres.st_mtime
2140        tarinfo.type = type
2141        tarinfo.linkname = linkname
2142        if pwd:
2143            try:
2144                tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
2145            except KeyError:
2146                pass
2147        if grp:
2148            try:
2149                tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
2150            except KeyError:
2151                pass
2152
2153        if type in (CHRTYPE, BLKTYPE):
2154            if hasattr(os, "major") and hasattr(os, "minor"):
2155                tarinfo.devmajor = os.major(statres.st_rdev)
2156                tarinfo.devminor = os.minor(statres.st_rdev)
2157        return tarinfo
2158
2159    def list(self, verbose=True, *, members=None):
2160        """Print a table of contents to sys.stdout. If `verbose' is False, only
2161           the names of the members are printed. If it is True, an `ls -l'-like
2162           output is produced. `members' is optional and must be a subset of the
2163           list returned by getmembers().
2164        """
2165        # Convert tarinfo type to stat type.
2166        type2mode = {REGTYPE: stat.S_IFREG, SYMTYPE: stat.S_IFLNK,
2167                     FIFOTYPE: stat.S_IFIFO, CHRTYPE: stat.S_IFCHR,
2168                     DIRTYPE: stat.S_IFDIR, BLKTYPE: stat.S_IFBLK}
2169        self._check()
2170
2171        if members is None:
2172            members = self
2173        for tarinfo in members:
2174            if verbose:
2175                if tarinfo.mode is None:
2176                    _safe_print("??????????")
2177                else:
2178                    modetype = type2mode.get(tarinfo.type, 0)
2179                    _safe_print(stat.filemode(modetype | tarinfo.mode))
2180                _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
2181                                       tarinfo.gname or tarinfo.gid))
2182                if tarinfo.ischr() or tarinfo.isblk():
2183                    _safe_print("%10s" %
2184                            ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
2185                else:
2186                    _safe_print("%10d" % tarinfo.size)
2187                if tarinfo.mtime is None:
2188                    _safe_print("????-??-?? ??:??:??")
2189                else:
2190                    _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
2191                                % time.localtime(tarinfo.mtime)[:6])
2192
2193            _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
2194
2195            if verbose:
2196                if tarinfo.issym():
2197                    _safe_print("-> " + tarinfo.linkname)
2198                if tarinfo.islnk():
2199                    _safe_print("link to " + tarinfo.linkname)
2200            print()
2201
2202    def add(self, name, arcname=None, recursive=True, *, filter=None):
2203        """Add the file `name' to the archive. `name' may be any type of file
2204           (directory, fifo, symbolic link, etc.). If given, `arcname'
2205           specifies an alternative name for the file in the archive.
2206           Directories are added recursively by default. This can be avoided by
2207           setting `recursive' to False. `filter' is a function
2208           that expects a TarInfo object argument and returns the changed
2209           TarInfo object, if it returns None the TarInfo object will be
2210           excluded from the archive.
2211        """
2212        self._check("awx")
2213
2214        if arcname is None:
2215            arcname = name
2216
2217        # Skip if somebody tries to archive the archive...
2218        if self.name is not None and os.path.abspath(name) == self.name:
2219            self._dbg(2, "tarfile: Skipped %r" % name)
2220            return
2221
2222        self._dbg(1, name)
2223
2224        # Create a TarInfo object from the file.
2225        tarinfo = self.gettarinfo(name, arcname)
2226
2227        if tarinfo is None:
2228            self._dbg(1, "tarfile: Unsupported type %r" % name)
2229            return
2230
2231        # Change or exclude the TarInfo object.
2232        if filter is not None:
2233            tarinfo = filter(tarinfo)
2234            if tarinfo is None:
2235                self._dbg(2, "tarfile: Excluded %r" % name)
2236                return
2237
2238        # Append the tar header and data to the archive.
2239        if tarinfo.isreg():
2240            with bltn_open(name, "rb") as f:
2241                self.addfile(tarinfo, f)
2242
2243        elif tarinfo.isdir():
2244            self.addfile(tarinfo)
2245            if recursive:
2246                for f in sorted(os.listdir(name)):
2247                    self.add(os.path.join(name, f), os.path.join(arcname, f),
2248                            recursive, filter=filter)
2249
2250        else:
2251            self.addfile(tarinfo)
2252
2253    def addfile(self, tarinfo, fileobj=None):
2254        """Add the TarInfo object `tarinfo' to the archive. If `tarinfo' represents
2255           a non zero-size regular file, the `fileobj' argument should be a binary file,
2256           and tarinfo.size bytes are read from it and added to the archive.
2257           You can create TarInfo objects directly, or by using gettarinfo().
2258        """
2259        self._check("awx")
2260
2261        if fileobj is None and tarinfo.isreg() and tarinfo.size != 0:
2262            raise ValueError("fileobj not provided for non zero-size regular file")
2263
2264        tarinfo = copy.copy(tarinfo)
2265
2266        buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2267        self.fileobj.write(buf)
2268        self.offset += len(buf)
2269        bufsize=self.copybufsize
2270        # If there's data to follow, append it.
2271        if fileobj is not None:
2272            copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
2273            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2274            if remainder > 0:
2275                self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2276                blocks += 1
2277            self.offset += blocks * BLOCKSIZE
2278
2279        self.members.append(tarinfo)
2280
2281    def _get_filter_function(self, filter):
2282        if filter is None:
2283            filter = self.extraction_filter
2284            if filter is None:
2285                import warnings
2286                warnings.warn(
2287                    'Python 3.14 will, by default, filter extracted tar '
2288                    + 'archives and reject files or modify their metadata. '
2289                    + 'Use the filter argument to control this behavior.',
2290                    DeprecationWarning, stacklevel=3)
2291                return fully_trusted_filter
2292            if isinstance(filter, str):
2293                raise TypeError(
2294                    'String names are not supported for '
2295                    + 'TarFile.extraction_filter. Use a function such as '
2296                    + 'tarfile.data_filter directly.')
2297            return filter
2298        if callable(filter):
2299            return filter
2300        try:
2301            return _NAMED_FILTERS[filter]
2302        except KeyError:
2303            raise ValueError(f"filter {filter!r} not found") from None
2304
2305    def extractall(self, path=".", members=None, *, numeric_owner=False,
2306                   filter=None):
2307        """Extract all members from the archive to the current working
2308           directory and set owner, modification time and permissions on
2309           directories afterwards. `path' specifies a different directory
2310           to extract to. `members' is optional and must be a subset of the
2311           list returned by getmembers(). If `numeric_owner` is True, only
2312           the numbers for user/group names are used and not the names.
2313
2314           The `filter` function will be called on each member just
2315           before extraction.
2316           It can return a changed TarInfo or None to skip the member.
2317           String names of common filters are accepted.
2318        """
2319        directories = []
2320
2321        filter_function = self._get_filter_function(filter)
2322        if members is None:
2323            members = self
2324
2325        for member in members:
2326            tarinfo = self._get_extract_tarinfo(member, filter_function, path)
2327            if tarinfo is None:
2328                continue
2329            if tarinfo.isdir():
2330                # For directories, delay setting attributes until later,
2331                # since permissions can interfere with extraction and
2332                # extracting contents can reset mtime.
2333                directories.append(tarinfo)
2334            self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(),
2335                              numeric_owner=numeric_owner)
2336
2337        # Reverse sort directories.
2338        directories.sort(key=lambda a: a.name, reverse=True)
2339
2340        # Set correct owner, mtime and filemode on directories.
2341        for tarinfo in directories:
2342            dirpath = os.path.join(path, tarinfo.name)
2343            try:
2344                self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
2345                self.utime(tarinfo, dirpath)
2346                self.chmod(tarinfo, dirpath)
2347            except ExtractError as e:
2348                self._handle_nonfatal_error(e)
2349
2350    def extract(self, member, path="", set_attrs=True, *, numeric_owner=False,
2351                filter=None):
2352        """Extract a member from the archive to the current working directory,
2353           using its full name. Its file information is extracted as accurately
2354           as possible. `member' may be a filename or a TarInfo object. You can
2355           specify a different directory using `path'. File attributes (owner,
2356           mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2357           is True, only the numbers for user/group names are used and not
2358           the names.
2359
2360           The `filter` function will be called before extraction.
2361           It can return a changed TarInfo or None to skip the member.
2362           String names of common filters are accepted.
2363        """
2364        filter_function = self._get_filter_function(filter)
2365        tarinfo = self._get_extract_tarinfo(member, filter_function, path)
2366        if tarinfo is not None:
2367            self._extract_one(tarinfo, path, set_attrs, numeric_owner)
2368
2369    def _get_extract_tarinfo(self, member, filter_function, path):
2370        """Get filtered TarInfo (or None) from member, which might be a str"""
2371        if isinstance(member, str):
2372            tarinfo = self.getmember(member)
2373        else:
2374            tarinfo = member
2375
2376        unfiltered = tarinfo
2377        try:
2378            tarinfo = filter_function(tarinfo, path)
2379        except (OSError, FilterError) as e:
2380            self._handle_fatal_error(e)
2381        except ExtractError as e:
2382            self._handle_nonfatal_error(e)
2383        if tarinfo is None:
2384            self._dbg(2, "tarfile: Excluded %r" % unfiltered.name)
2385            return None
2386        # Prepare the link target for makelink().
2387        if tarinfo.islnk():
2388            tarinfo = copy.copy(tarinfo)
2389            tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2390        return tarinfo
2391
2392    def _extract_one(self, tarinfo, path, set_attrs, numeric_owner):
2393        """Extract from filtered tarinfo to disk"""
2394        self._check("r")
2395
2396        try:
2397            self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2398                                 set_attrs=set_attrs,
2399                                 numeric_owner=numeric_owner)
2400        except OSError as e:
2401            self._handle_fatal_error(e)
2402        except ExtractError as e:
2403            self._handle_nonfatal_error(e)
2404
2405    def _handle_nonfatal_error(self, e):
2406        """Handle non-fatal error (ExtractError) according to errorlevel"""
2407        if self.errorlevel > 1:
2408            raise
2409        else:
2410            self._dbg(1, "tarfile: %s" % e)
2411
2412    def _handle_fatal_error(self, e):
2413        """Handle "fatal" error according to self.errorlevel"""
2414        if self.errorlevel > 0:
2415            raise
2416        elif isinstance(e, OSError):
2417            if e.filename is None:
2418                self._dbg(1, "tarfile: %s" % e.strerror)
2419            else:
2420                self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2421        else:
2422            self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e))
2423
2424    def extractfile(self, member):
2425        """Extract a member from the archive as a file object. `member' may be
2426           a filename or a TarInfo object. If `member' is a regular file or
2427           a link, an io.BufferedReader object is returned. For all other
2428           existing members, None is returned. If `member' does not appear
2429           in the archive, KeyError is raised.
2430        """
2431        self._check("r")
2432
2433        if isinstance(member, str):
2434            tarinfo = self.getmember(member)
2435        else:
2436            tarinfo = member
2437
2438        if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2439            # Members with unknown types are treated as regular files.
2440            return self.fileobject(self, tarinfo)
2441
2442        elif tarinfo.islnk() or tarinfo.issym():
2443            if isinstance(self.fileobj, _Stream):
2444                # A small but ugly workaround for the case that someone tries
2445                # to extract a (sym)link as a file-object from a non-seekable
2446                # stream of tar blocks.
2447                raise StreamError("cannot extract (sym)link as file object")
2448            else:
2449                # A (sym)link's file object is its target's file object.
2450                return self.extractfile(self._find_link_target(tarinfo))
2451        else:
2452            # If there's no data associated with the member (directory, chrdev,
2453            # blkdev, etc.), return None instead of a file object.
2454            return None
2455
2456    def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2457                        numeric_owner=False):
2458        """Extract the TarInfo object tarinfo to a physical
2459           file called targetpath.
2460        """
2461        # Fetch the TarInfo object for the given name
2462        # and build the destination pathname, replacing
2463        # forward slashes to platform specific separators.
2464        targetpath = targetpath.rstrip("/")
2465        targetpath = targetpath.replace("/", os.sep)
2466
2467        # Create all upper directories.
2468        upperdirs = os.path.dirname(targetpath)
2469        if upperdirs and not os.path.exists(upperdirs):
2470            # Create directories that are not part of the archive with
2471            # default permissions.
2472            os.makedirs(upperdirs, exist_ok=True)
2473
2474        if tarinfo.islnk() or tarinfo.issym():
2475            self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2476        else:
2477            self._dbg(1, tarinfo.name)
2478
2479        if tarinfo.isreg():
2480            self.makefile(tarinfo, targetpath)
2481        elif tarinfo.isdir():
2482            self.makedir(tarinfo, targetpath)
2483        elif tarinfo.isfifo():
2484            self.makefifo(tarinfo, targetpath)
2485        elif tarinfo.ischr() or tarinfo.isblk():
2486            self.makedev(tarinfo, targetpath)
2487        elif tarinfo.islnk() or tarinfo.issym():
2488            self.makelink(tarinfo, targetpath)
2489        elif tarinfo.type not in SUPPORTED_TYPES:
2490            self.makeunknown(tarinfo, targetpath)
2491        else:
2492            self.makefile(tarinfo, targetpath)
2493
2494        if set_attrs:
2495            self.chown(tarinfo, targetpath, numeric_owner)
2496            if not tarinfo.issym():
2497                self.chmod(tarinfo, targetpath)
2498                self.utime(tarinfo, targetpath)
2499
2500    #--------------------------------------------------------------------------
2501    # Below are the different file methods. They are called via
2502    # _extract_member() when extract() is called. They can be replaced in a
2503    # subclass to implement other functionality.
2504
2505    def makedir(self, tarinfo, targetpath):
2506        """Make a directory called targetpath.
2507        """
2508        try:
2509            if tarinfo.mode is None:
2510                # Use the system's default mode
2511                os.mkdir(targetpath)
2512            else:
2513                # Use a safe mode for the directory, the real mode is set
2514                # later in _extract_member().
2515                os.mkdir(targetpath, 0o700)
2516        except FileExistsError:
2517            if not os.path.isdir(targetpath):
2518                raise
2519
2520    def makefile(self, tarinfo, targetpath):
2521        """Make a file called targetpath.
2522        """
2523        source = self.fileobj
2524        source.seek(tarinfo.offset_data)
2525        bufsize = self.copybufsize
2526        with bltn_open(targetpath, "wb") as target:
2527            if tarinfo.sparse is not None:
2528                for offset, size in tarinfo.sparse:
2529                    target.seek(offset)
2530                    copyfileobj(source, target, size, ReadError, bufsize)
2531                target.seek(tarinfo.size)
2532                target.truncate()
2533            else:
2534                copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
2535
2536    def makeunknown(self, tarinfo, targetpath):
2537        """Make a file from a TarInfo object with an unknown type
2538           at targetpath.
2539        """
2540        self.makefile(tarinfo, targetpath)
2541        self._dbg(1, "tarfile: Unknown file type %r, " \
2542                     "extracted as regular file." % tarinfo.type)
2543
2544    def makefifo(self, tarinfo, targetpath):
2545        """Make a fifo called targetpath.
2546        """
2547        if hasattr(os, "mkfifo"):
2548            os.mkfifo(targetpath)
2549        else:
2550            raise ExtractError("fifo not supported by system")
2551
2552    def makedev(self, tarinfo, targetpath):
2553        """Make a character or block device called targetpath.
2554        """
2555        if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2556            raise ExtractError("special devices not supported by system")
2557
2558        mode = tarinfo.mode
2559        if mode is None:
2560            # Use mknod's default
2561            mode = 0o600
2562        if tarinfo.isblk():
2563            mode |= stat.S_IFBLK
2564        else:
2565            mode |= stat.S_IFCHR
2566
2567        os.mknod(targetpath, mode,
2568                 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2569
2570    def makelink(self, tarinfo, targetpath):
2571        """Make a (symbolic) link called targetpath. If it cannot be created
2572          (platform limitation), we try to make a copy of the referenced file
2573          instead of a link.
2574        """
2575        try:
2576            # For systems that support symbolic and hard links.
2577            if tarinfo.issym():
2578                if os.path.lexists(targetpath):
2579                    # Avoid FileExistsError on following os.symlink.
2580                    os.unlink(targetpath)
2581                os.symlink(tarinfo.linkname, targetpath)
2582            else:
2583                if os.path.exists(tarinfo._link_target):
2584                    os.link(tarinfo._link_target, targetpath)
2585                else:
2586                    self._extract_member(self._find_link_target(tarinfo),
2587                                         targetpath)
2588        except symlink_exception:
2589            try:
2590                self._extract_member(self._find_link_target(tarinfo),
2591                                     targetpath)
2592            except KeyError:
2593                raise ExtractError("unable to resolve link inside archive") from None
2594
2595    def chown(self, tarinfo, targetpath, numeric_owner):
2596        """Set owner of targetpath according to tarinfo. If numeric_owner
2597           is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2598           is False, fall back to .gid/.uid when the search based on name
2599           fails.
2600        """
2601        if hasattr(os, "geteuid") and os.geteuid() == 0:
2602            # We have to be root to do so.
2603            g = tarinfo.gid
2604            u = tarinfo.uid
2605            if not numeric_owner:
2606                try:
2607                    if grp and tarinfo.gname:
2608                        g = grp.getgrnam(tarinfo.gname)[2]
2609                except KeyError:
2610                    pass
2611                try:
2612                    if pwd and tarinfo.uname:
2613                        u = pwd.getpwnam(tarinfo.uname)[2]
2614                except KeyError:
2615                    pass
2616            if g is None:
2617                g = -1
2618            if u is None:
2619                u = -1
2620            try:
2621                if tarinfo.issym() and hasattr(os, "lchown"):
2622                    os.lchown(targetpath, u, g)
2623                else:
2624                    os.chown(targetpath, u, g)
2625            except (OSError, OverflowError) as e:
2626                # OverflowError can be raised if an ID doesn't fit in `id_t`
2627                raise ExtractError("could not change owner") from e
2628
2629    def chmod(self, tarinfo, targetpath):
2630        """Set file permissions of targetpath according to tarinfo.
2631        """
2632        if tarinfo.mode is None:
2633            return
2634        try:
2635            os.chmod(targetpath, tarinfo.mode)
2636        except OSError as e:
2637            raise ExtractError("could not change mode") from e
2638
2639    def utime(self, tarinfo, targetpath):
2640        """Set modification time of targetpath according to tarinfo.
2641        """
2642        mtime = tarinfo.mtime
2643        if mtime is None:
2644            return
2645        if not hasattr(os, 'utime'):
2646            return
2647        try:
2648            os.utime(targetpath, (mtime, mtime))
2649        except OSError as e:
2650            raise ExtractError("could not change modification time") from e
2651
2652    #--------------------------------------------------------------------------
2653    def next(self):
2654        """Return the next member of the archive as a TarInfo object, when
2655           TarFile is opened for reading. Return None if there is no more
2656           available.
2657        """
2658        self._check("ra")
2659        if self.firstmember is not None:
2660            m = self.firstmember
2661            self.firstmember = None
2662            return m
2663
2664        # Advance the file pointer.
2665        if self.offset != self.fileobj.tell():
2666            if self.offset == 0:
2667                return None
2668            self.fileobj.seek(self.offset - 1)
2669            if not self.fileobj.read(1):
2670                raise ReadError("unexpected end of data")
2671
2672        # Read the next block.
2673        tarinfo = None
2674        while True:
2675            try:
2676                tarinfo = self.tarinfo.fromtarfile(self)
2677            except EOFHeaderError as e:
2678                if self.ignore_zeros:
2679                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2680                    self.offset += BLOCKSIZE
2681                    continue
2682            except InvalidHeaderError as e:
2683                if self.ignore_zeros:
2684                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2685                    self.offset += BLOCKSIZE
2686                    continue
2687                elif self.offset == 0:
2688                    raise ReadError(str(e)) from None
2689            except EmptyHeaderError:
2690                if self.offset == 0:
2691                    raise ReadError("empty file") from None
2692            except TruncatedHeaderError as e:
2693                if self.offset == 0:
2694                    raise ReadError(str(e)) from None
2695            except SubsequentHeaderError as e:
2696                raise ReadError(str(e)) from None
2697            except Exception as e:
2698                try:
2699                    import zlib
2700                    if isinstance(e, zlib.error):
2701                        raise ReadError(f'zlib error: {e}') from None
2702                    else:
2703                        raise e
2704                except ImportError:
2705                    raise e
2706            break
2707
2708        if tarinfo is not None:
2709            # if streaming the file we do not want to cache the tarinfo
2710            if not self.stream:
2711                self.members.append(tarinfo)
2712        else:
2713            self._loaded = True
2714
2715        return tarinfo
2716
2717    #--------------------------------------------------------------------------
2718    # Little helper methods:
2719
2720    def _getmember(self, name, tarinfo=None, normalize=False):
2721        """Find an archive member by name from bottom to top.
2722           If tarinfo is given, it is used as the starting point.
2723        """
2724        # Ensure that all members have been loaded.
2725        members = self.getmembers()
2726
2727        # Limit the member search list up to tarinfo.
2728        skipping = False
2729        if tarinfo is not None:
2730            try:
2731                index = members.index(tarinfo)
2732            except ValueError:
2733                # The given starting point might be a (modified) copy.
2734                # We'll later skip members until we find an equivalent.
2735                skipping = True
2736            else:
2737                # Happy fast path
2738                members = members[:index]
2739
2740        if normalize:
2741            name = os.path.normpath(name)
2742
2743        for member in reversed(members):
2744            if skipping:
2745                if tarinfo.offset == member.offset:
2746                    skipping = False
2747                continue
2748            if normalize:
2749                member_name = os.path.normpath(member.name)
2750            else:
2751                member_name = member.name
2752
2753            if name == member_name:
2754                return member
2755
2756        if skipping:
2757            # Starting point was not found
2758            raise ValueError(tarinfo)
2759
2760    def _load(self):
2761        """Read through the entire archive file and look for readable
2762           members. This should not run if the file is set to stream.
2763        """
2764        if not self.stream:
2765            while self.next() is not None:
2766                pass
2767            self._loaded = True
2768
2769    def _check(self, mode=None):
2770        """Check if TarFile is still open, and if the operation's mode
2771           corresponds to TarFile's mode.
2772        """
2773        if self.closed:
2774            raise OSError("%s is closed" % self.__class__.__name__)
2775        if mode is not None and self.mode not in mode:
2776            raise OSError("bad operation for mode %r" % self.mode)
2777
2778    def _find_link_target(self, tarinfo):
2779        """Find the target member of a symlink or hardlink member in the
2780           archive.
2781        """
2782        if tarinfo.issym():
2783            # Always search the entire archive.
2784            linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
2785            limit = None
2786        else:
2787            # Search the archive before the link, because a hard link is
2788            # just a reference to an already archived file.
2789            linkname = tarinfo.linkname
2790            limit = tarinfo
2791
2792        member = self._getmember(linkname, tarinfo=limit, normalize=True)
2793        if member is None:
2794            raise KeyError("linkname %r not found" % linkname)
2795        return member
2796
2797    def __iter__(self):
2798        """Provide an iterator object.
2799        """
2800        if self._loaded:
2801            yield from self.members
2802            return
2803
2804        # Yield items using TarFile's next() method.
2805        # When all members have been read, set TarFile as _loaded.
2806        index = 0
2807        # Fix for SF #1100429: Under rare circumstances it can
2808        # happen that getmembers() is called during iteration,
2809        # which will have already exhausted the next() method.
2810        if self.firstmember is not None:
2811            tarinfo = self.next()
2812            index += 1
2813            yield tarinfo
2814
2815        while True:
2816            if index < len(self.members):
2817                tarinfo = self.members[index]
2818            elif not self._loaded:
2819                tarinfo = self.next()
2820                if not tarinfo:
2821                    self._loaded = True
2822                    return
2823            else:
2824                return
2825            index += 1
2826            yield tarinfo
2827
2828    def _dbg(self, level, msg):
2829        """Write debugging output to sys.stderr.
2830        """
2831        if level <= self.debug:
2832            print(msg, file=sys.stderr)
2833
2834    def __enter__(self):
2835        self._check()
2836        return self
2837
2838    def __exit__(self, type, value, traceback):
2839        if type is None:
2840            self.close()
2841        else:
2842            # An exception occurred. We must not call close() because
2843            # it would try to write end-of-archive blocks and padding.
2844            if not self._extfileobj:
2845                self.fileobj.close()
2846            self.closed = True
2847
2848#--------------------
2849# exported functions
2850#--------------------
2851
2852def is_tarfile(name):
2853    """Return True if name points to a tar archive that we
2854       are able to handle, else return False.
2855
2856       'name' should be a string, file, or file-like object.
2857    """
2858    try:
2859        if hasattr(name, "read"):
2860            pos = name.tell()
2861            t = open(fileobj=name)
2862            name.seek(pos)
2863        else:
2864            t = open(name)
2865        t.close()
2866        return True
2867    except TarError:
2868        return False
2869
2870open = TarFile.open
2871
2872
2873def main():
2874    import argparse
2875
2876    description = 'A simple command-line interface for tarfile module.'
2877    parser = argparse.ArgumentParser(description=description)
2878    parser.add_argument('-v', '--verbose', action='store_true', default=False,
2879                        help='Verbose output')
2880    parser.add_argument('--filter', metavar='<filtername>',
2881                        choices=_NAMED_FILTERS,
2882                        help='Filter for extraction')
2883
2884    group = parser.add_mutually_exclusive_group(required=True)
2885    group.add_argument('-l', '--list', metavar='<tarfile>',
2886                       help='Show listing of a tarfile')
2887    group.add_argument('-e', '--extract', nargs='+',
2888                       metavar=('<tarfile>', '<output_dir>'),
2889                       help='Extract tarfile into target dir')
2890    group.add_argument('-c', '--create', nargs='+',
2891                       metavar=('<name>', '<file>'),
2892                       help='Create tarfile from sources')
2893    group.add_argument('-t', '--test', metavar='<tarfile>',
2894                       help='Test if a tarfile is valid')
2895
2896    args = parser.parse_args()
2897
2898    if args.filter and args.extract is None:
2899        parser.exit(1, '--filter is only valid for extraction\n')
2900
2901    if args.test is not None:
2902        src = args.test
2903        if is_tarfile(src):
2904            with open(src, 'r') as tar:
2905                tar.getmembers()
2906                print(tar.getmembers(), file=sys.stderr)
2907            if args.verbose:
2908                print('{!r} is a tar archive.'.format(src))
2909        else:
2910            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2911
2912    elif args.list is not None:
2913        src = args.list
2914        if is_tarfile(src):
2915            with TarFile.open(src, 'r:*') as tf:
2916                tf.list(verbose=args.verbose)
2917        else:
2918            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2919
2920    elif args.extract is not None:
2921        if len(args.extract) == 1:
2922            src = args.extract[0]
2923            curdir = os.curdir
2924        elif len(args.extract) == 2:
2925            src, curdir = args.extract
2926        else:
2927            parser.exit(1, parser.format_help())
2928
2929        if is_tarfile(src):
2930            with TarFile.open(src, 'r:*') as tf:
2931                tf.extractall(path=curdir, filter=args.filter)
2932            if args.verbose:
2933                if curdir == '.':
2934                    msg = '{!r} file is extracted.'.format(src)
2935                else:
2936                    msg = ('{!r} file is extracted '
2937                           'into {!r} directory.').format(src, curdir)
2938                print(msg)
2939        else:
2940            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2941
2942    elif args.create is not None:
2943        tar_name = args.create.pop(0)
2944        _, ext = os.path.splitext(tar_name)
2945        compressions = {
2946            # gz
2947            '.gz': 'gz',
2948            '.tgz': 'gz',
2949            # xz
2950            '.xz': 'xz',
2951            '.txz': 'xz',
2952            # bz2
2953            '.bz2': 'bz2',
2954            '.tbz': 'bz2',
2955            '.tbz2': 'bz2',
2956            '.tb2': 'bz2',
2957        }
2958        tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2959        tar_files = args.create
2960
2961        with TarFile.open(tar_name, tar_mode) as tf:
2962            for file_name in tar_files:
2963                tf.add(file_name)
2964
2965        if args.verbose:
2966            print('{!r} file created.'.format(tar_name))
2967
2968if __name__ == '__main__':
2969    main()
2970