• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
5# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
6# All rights reserved.
7#
8# Permission  is  hereby granted,  free  of charge,  to  any person
9# obtaining a  copy of  this software  and associated documentation
10# files  (the  "Software"),  to   deal  in  the  Software   without
11# restriction,  including  without limitation  the  rights to  use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies  of  the  Software,  and to  permit  persons  to  whom the
14# Software  is  furnished  to  do  so,  subject  to  the  following
15# conditions:
16#
17# The above copyright  notice and this  permission notice shall  be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
21# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
22# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
23# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
24# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
25# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
32version     = "0.9.0"
33__author__  = "Lars Gust\u00e4bel (lars@gustaebel.de)"
34__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
35
36#---------
37# Imports
38#---------
39from builtins import open as bltn_open
40import sys
41import os
42import io
43import shutil
44import stat
45import time
46import struct
47import copy
48import re
49import warnings
50
51try:
52    import pwd
53except ImportError:
54    pwd = None
55try:
56    import grp
57except ImportError:
58    grp = None
59
60# os.symlink on Windows prior to 6.0 raises NotImplementedError
61symlink_exception = (AttributeError, NotImplementedError)
62try:
63    # OSError (winerror=1314) will be raised if the caller does not hold the
64    # SeCreateSymbolicLinkPrivilege privilege
65    symlink_exception += (OSError,)
66except NameError:
67    pass
68
69# from tarfile import *
70__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
71           "CompressionError", "StreamError", "ExtractError", "HeaderError",
72           "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
73           "DEFAULT_FORMAT", "open"]
74
75
76#---------------------------------------------------------
77# tar constants
78#---------------------------------------------------------
79NUL = b"\0"                     # the null character
80BLOCKSIZE = 512                 # length of processing blocks
81RECORDSIZE = BLOCKSIZE * 20     # length of records
82GNU_MAGIC = b"ustar  \0"        # magic gnu tar string
83POSIX_MAGIC = b"ustar\x0000"    # magic posix tar string
84
85LENGTH_NAME = 100               # maximum length of a filename
86LENGTH_LINK = 100               # maximum length of a linkname
87LENGTH_PREFIX = 155             # maximum length of the prefix field
88
89REGTYPE = b"0"                  # regular file
90AREGTYPE = b"\0"                # regular file
91LNKTYPE = b"1"                  # link (inside tarfile)
92SYMTYPE = b"2"                  # symbolic link
93CHRTYPE = b"3"                  # character special device
94BLKTYPE = b"4"                  # block special device
95DIRTYPE = b"5"                  # directory
96FIFOTYPE = b"6"                 # fifo special device
97CONTTYPE = b"7"                 # contiguous file
98
99GNUTYPE_LONGNAME = b"L"         # GNU tar longname
100GNUTYPE_LONGLINK = b"K"         # GNU tar longlink
101GNUTYPE_SPARSE = b"S"           # GNU tar sparse file
102
103XHDTYPE = b"x"                  # POSIX.1-2001 extended header
104XGLTYPE = b"g"                  # POSIX.1-2001 global header
105SOLARIS_XHDTYPE = b"X"          # Solaris extended header
106
107USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
108GNU_FORMAT = 1                  # GNU tar format
109PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
110DEFAULT_FORMAT = PAX_FORMAT
111
112#---------------------------------------------------------
113# tarfile constants
114#---------------------------------------------------------
115# File types that tarfile supports:
116SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
117                   SYMTYPE, DIRTYPE, FIFOTYPE,
118                   CONTTYPE, CHRTYPE, BLKTYPE,
119                   GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
120                   GNUTYPE_SPARSE)
121
122# File types that will be treated as a regular file.
123REGULAR_TYPES = (REGTYPE, AREGTYPE,
124                 CONTTYPE, GNUTYPE_SPARSE)
125
126# File types that are part of the GNU tar format.
127GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
128             GNUTYPE_SPARSE)
129
130# Fields from a pax header that override a TarInfo attribute.
131PAX_FIELDS = ("path", "linkpath", "size", "mtime",
132              "uid", "gid", "uname", "gname")
133
134# Fields from a pax header that are affected by hdrcharset.
135PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
136
137# Fields in a pax header that are numbers, all other fields
138# are treated as strings.
139PAX_NUMBER_FIELDS = {
140    "atime": float,
141    "ctime": float,
142    "mtime": float,
143    "uid": int,
144    "gid": int,
145    "size": int
146}
147
148#---------------------------------------------------------
149# initialization
150#---------------------------------------------------------
151if os.name == "nt":
152    ENCODING = "utf-8"
153else:
154    ENCODING = sys.getfilesystemencoding()
155
156#---------------------------------------------------------
157# Some useful functions
158#---------------------------------------------------------
159
160def stn(s, length, encoding, errors):
161    """Convert a string to a null-terminated bytes object.
162    """
163    if s is None:
164        raise ValueError("metadata cannot contain None")
165    s = s.encode(encoding, errors)
166    return s[:length] + (length - len(s)) * NUL
167
168def nts(s, encoding, errors):
169    """Convert a null-terminated bytes object to a string.
170    """
171    p = s.find(b"\0")
172    if p != -1:
173        s = s[:p]
174    return s.decode(encoding, errors)
175
176def nti(s):
177    """Convert a number field to a python number.
178    """
179    # There are two possible encodings for a number field, see
180    # itn() below.
181    if s[0] in (0o200, 0o377):
182        n = 0
183        for i in range(len(s) - 1):
184            n <<= 8
185            n += s[i + 1]
186        if s[0] == 0o377:
187            n = -(256 ** (len(s) - 1) - n)
188    else:
189        try:
190            s = nts(s, "ascii", "strict")
191            n = int(s.strip() or "0", 8)
192        except ValueError:
193            raise InvalidHeaderError("invalid header")
194    return n
195
196def itn(n, digits=8, format=DEFAULT_FORMAT):
197    """Convert a python number to a number field.
198    """
199    # POSIX 1003.1-1988 requires numbers to be encoded as a string of
200    # octal digits followed by a null-byte, this allows values up to
201    # (8**(digits-1))-1. GNU tar allows storing numbers greater than
202    # that if necessary. A leading 0o200 or 0o377 byte indicate this
203    # particular encoding, the following digits-1 bytes are a big-endian
204    # base-256 representation. This allows values up to (256**(digits-1))-1.
205    # A 0o200 byte indicates a positive number, a 0o377 byte a negative
206    # number.
207    original_n = n
208    n = int(n)
209    if 0 <= n < 8 ** (digits - 1):
210        s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
211    elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
212        if n >= 0:
213            s = bytearray([0o200])
214        else:
215            s = bytearray([0o377])
216            n = 256 ** digits + n
217
218        for i in range(digits - 1):
219            s.insert(1, n & 0o377)
220            n >>= 8
221    else:
222        raise ValueError("overflow in number field")
223
224    return s
225
226def calc_chksums(buf):
227    """Calculate the checksum for a member's header by summing up all
228       characters except for the chksum field which is treated as if
229       it was filled with spaces. According to the GNU tar sources,
230       some tars (Sun and NeXT) calculate chksum with signed char,
231       which will be different if there are chars in the buffer with
232       the high bit set. So we calculate two checksums, unsigned and
233       signed.
234    """
235    unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
236    signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
237    return unsigned_chksum, signed_chksum
238
239def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
240    """Copy length bytes from fileobj src to fileobj dst.
241       If length is None, copy the entire content.
242    """
243    bufsize = bufsize or 16 * 1024
244    if length == 0:
245        return
246    if length is None:
247        shutil.copyfileobj(src, dst, bufsize)
248        return
249
250    blocks, remainder = divmod(length, bufsize)
251    for b in range(blocks):
252        buf = src.read(bufsize)
253        if len(buf) < bufsize:
254            raise exception("unexpected end of data")
255        dst.write(buf)
256
257    if remainder != 0:
258        buf = src.read(remainder)
259        if len(buf) < remainder:
260            raise exception("unexpected end of data")
261        dst.write(buf)
262    return
263
264def _safe_print(s):
265    encoding = getattr(sys.stdout, 'encoding', None)
266    if encoding is not None:
267        s = s.encode(encoding, 'backslashreplace').decode(encoding)
268    print(s, end=' ')
269
270
271class TarError(Exception):
272    """Base exception."""
273    pass
274class ExtractError(TarError):
275    """General exception for extract errors."""
276    pass
277class ReadError(TarError):
278    """Exception for unreadable tar archives."""
279    pass
280class CompressionError(TarError):
281    """Exception for unavailable compression methods."""
282    pass
283class StreamError(TarError):
284    """Exception for unsupported operations on stream-like TarFiles."""
285    pass
286class HeaderError(TarError):
287    """Base exception for header errors."""
288    pass
289class EmptyHeaderError(HeaderError):
290    """Exception for empty headers."""
291    pass
292class TruncatedHeaderError(HeaderError):
293    """Exception for truncated headers."""
294    pass
295class EOFHeaderError(HeaderError):
296    """Exception for end of file headers."""
297    pass
298class InvalidHeaderError(HeaderError):
299    """Exception for invalid headers."""
300    pass
301class SubsequentHeaderError(HeaderError):
302    """Exception for missing and invalid extended headers."""
303    pass
304
305#---------------------------
306# internal stream interface
307#---------------------------
308class _LowLevelFile:
309    """Low-level file object. Supports reading and writing.
310       It is used instead of a regular file object for streaming
311       access.
312    """
313
314    def __init__(self, name, mode):
315        mode = {
316            "r": os.O_RDONLY,
317            "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
318        }[mode]
319        if hasattr(os, "O_BINARY"):
320            mode |= os.O_BINARY
321        self.fd = os.open(name, mode, 0o666)
322
323    def close(self):
324        os.close(self.fd)
325
326    def read(self, size):
327        return os.read(self.fd, size)
328
329    def write(self, s):
330        os.write(self.fd, s)
331
332class _Stream:
333    """Class that serves as an adapter between TarFile and
334       a stream-like object.  The stream-like object only
335       needs to have a read() or write() method and is accessed
336       blockwise.  Use of gzip or bzip2 compression is possible.
337       A stream-like object could be for example: sys.stdin,
338       sys.stdout, a socket, a tape device etc.
339
340       _Stream is intended to be used only internally.
341    """
342
343    def __init__(self, name, mode, comptype, fileobj, bufsize):
344        """Construct a _Stream object.
345        """
346        self._extfileobj = True
347        if fileobj is None:
348            fileobj = _LowLevelFile(name, mode)
349            self._extfileobj = False
350
351        if comptype == '*':
352            # Enable transparent compression detection for the
353            # stream interface
354            fileobj = _StreamProxy(fileobj)
355            comptype = fileobj.getcomptype()
356
357        self.name     = name or ""
358        self.mode     = mode
359        self.comptype = comptype
360        self.fileobj  = fileobj
361        self.bufsize  = bufsize
362        self.buf      = b""
363        self.pos      = 0
364        self.closed   = False
365
366        try:
367            if comptype == "gz":
368                try:
369                    import zlib
370                except ImportError:
371                    raise CompressionError("zlib module is not available") from None
372                self.zlib = zlib
373                self.crc = zlib.crc32(b"")
374                if mode == "r":
375                    self._init_read_gz()
376                    self.exception = zlib.error
377                else:
378                    self._init_write_gz()
379
380            elif comptype == "bz2":
381                try:
382                    import bz2
383                except ImportError:
384                    raise CompressionError("bz2 module is not available") from None
385                if mode == "r":
386                    self.dbuf = b""
387                    self.cmp = bz2.BZ2Decompressor()
388                    self.exception = OSError
389                else:
390                    self.cmp = bz2.BZ2Compressor()
391
392            elif comptype == "xz":
393                try:
394                    import lzma
395                except ImportError:
396                    raise CompressionError("lzma module is not available") from None
397                if mode == "r":
398                    self.dbuf = b""
399                    self.cmp = lzma.LZMADecompressor()
400                    self.exception = lzma.LZMAError
401                else:
402                    self.cmp = lzma.LZMACompressor()
403
404            elif comptype != "tar":
405                raise CompressionError("unknown compression type %r" % comptype)
406
407        except:
408            if not self._extfileobj:
409                self.fileobj.close()
410            self.closed = True
411            raise
412
413    def __del__(self):
414        if hasattr(self, "closed") and not self.closed:
415            self.close()
416
417    def _init_write_gz(self):
418        """Initialize for writing with gzip compression.
419        """
420        self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
421                                            -self.zlib.MAX_WBITS,
422                                            self.zlib.DEF_MEM_LEVEL,
423                                            0)
424        timestamp = struct.pack("<L", int(time.time()))
425        self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
426        if self.name.endswith(".gz"):
427            self.name = self.name[:-3]
428        # Honor "directory components removed" from RFC1952
429        self.name = os.path.basename(self.name)
430        # RFC1952 says we must use ISO-8859-1 for the FNAME field.
431        self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
432
433    def write(self, s):
434        """Write string s to the stream.
435        """
436        if self.comptype == "gz":
437            self.crc = self.zlib.crc32(s, self.crc)
438        self.pos += len(s)
439        if self.comptype != "tar":
440            s = self.cmp.compress(s)
441        self.__write(s)
442
443    def __write(self, s):
444        """Write string s to the stream if a whole new block
445           is ready to be written.
446        """
447        self.buf += s
448        while len(self.buf) > self.bufsize:
449            self.fileobj.write(self.buf[:self.bufsize])
450            self.buf = self.buf[self.bufsize:]
451
452    def close(self):
453        """Close the _Stream object. No operation should be
454           done on it afterwards.
455        """
456        if self.closed:
457            return
458
459        self.closed = True
460        try:
461            if self.mode == "w" and self.comptype != "tar":
462                self.buf += self.cmp.flush()
463
464            if self.mode == "w" and self.buf:
465                self.fileobj.write(self.buf)
466                self.buf = b""
467                if self.comptype == "gz":
468                    self.fileobj.write(struct.pack("<L", self.crc))
469                    self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
470        finally:
471            if not self._extfileobj:
472                self.fileobj.close()
473
474    def _init_read_gz(self):
475        """Initialize for reading a gzip compressed fileobj.
476        """
477        self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
478        self.dbuf = b""
479
480        # taken from gzip.GzipFile with some alterations
481        if self.__read(2) != b"\037\213":
482            raise ReadError("not a gzip file")
483        if self.__read(1) != b"\010":
484            raise CompressionError("unsupported compression method")
485
486        flag = ord(self.__read(1))
487        self.__read(6)
488
489        if flag & 4:
490            xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
491            self.read(xlen)
492        if flag & 8:
493            while True:
494                s = self.__read(1)
495                if not s or s == NUL:
496                    break
497        if flag & 16:
498            while True:
499                s = self.__read(1)
500                if not s or s == NUL:
501                    break
502        if flag & 2:
503            self.__read(2)
504
505    def tell(self):
506        """Return the stream's file pointer position.
507        """
508        return self.pos
509
510    def seek(self, pos=0):
511        """Set the stream's file pointer to pos. Negative seeking
512           is forbidden.
513        """
514        if pos - self.pos >= 0:
515            blocks, remainder = divmod(pos - self.pos, self.bufsize)
516            for i in range(blocks):
517                self.read(self.bufsize)
518            self.read(remainder)
519        else:
520            raise StreamError("seeking backwards is not allowed")
521        return self.pos
522
523    def read(self, size):
524        """Return the next size number of bytes from the stream."""
525        assert size is not None
526        buf = self._read(size)
527        self.pos += len(buf)
528        return buf
529
530    def _read(self, size):
531        """Return size bytes from the stream.
532        """
533        if self.comptype == "tar":
534            return self.__read(size)
535
536        c = len(self.dbuf)
537        t = [self.dbuf]
538        while c < size:
539            # Skip underlying buffer to avoid unaligned double buffering.
540            if self.buf:
541                buf = self.buf
542                self.buf = b""
543            else:
544                buf = self.fileobj.read(self.bufsize)
545                if not buf:
546                    break
547            try:
548                buf = self.cmp.decompress(buf)
549            except self.exception as e:
550                raise ReadError("invalid compressed data") from e
551            t.append(buf)
552            c += len(buf)
553        t = b"".join(t)
554        self.dbuf = t[size:]
555        return t[:size]
556
557    def __read(self, size):
558        """Return size bytes from stream. If internal buffer is empty,
559           read another block from the stream.
560        """
561        c = len(self.buf)
562        t = [self.buf]
563        while c < size:
564            buf = self.fileobj.read(self.bufsize)
565            if not buf:
566                break
567            t.append(buf)
568            c += len(buf)
569        t = b"".join(t)
570        self.buf = t[size:]
571        return t[:size]
572# class _Stream
573
574class _StreamProxy(object):
575    """Small proxy class that enables transparent compression
576       detection for the Stream interface (mode 'r|*').
577    """
578
579    def __init__(self, fileobj):
580        self.fileobj = fileobj
581        self.buf = self.fileobj.read(BLOCKSIZE)
582
583    def read(self, size):
584        self.read = self.fileobj.read
585        return self.buf
586
587    def getcomptype(self):
588        if self.buf.startswith(b"\x1f\x8b\x08"):
589            return "gz"
590        elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
591            return "bz2"
592        elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
593            return "xz"
594        else:
595            return "tar"
596
597    def close(self):
598        self.fileobj.close()
599# class StreamProxy
600
601#------------------------
602# Extraction file object
603#------------------------
604class _FileInFile(object):
605    """A thin wrapper around an existing file object that
606       provides a part of its data as an individual file
607       object.
608    """
609
610    def __init__(self, fileobj, offset, size, blockinfo=None):
611        self.fileobj = fileobj
612        self.offset = offset
613        self.size = size
614        self.position = 0
615        self.name = getattr(fileobj, "name", None)
616        self.closed = False
617
618        if blockinfo is None:
619            blockinfo = [(0, size)]
620
621        # Construct a map with data and zero blocks.
622        self.map_index = 0
623        self.map = []
624        lastpos = 0
625        realpos = self.offset
626        for offset, size in blockinfo:
627            if offset > lastpos:
628                self.map.append((False, lastpos, offset, None))
629            self.map.append((True, offset, offset + size, realpos))
630            realpos += size
631            lastpos = offset + size
632        if lastpos < self.size:
633            self.map.append((False, lastpos, self.size, None))
634
635    def flush(self):
636        pass
637
638    def readable(self):
639        return True
640
641    def writable(self):
642        return False
643
644    def seekable(self):
645        return self.fileobj.seekable()
646
647    def tell(self):
648        """Return the current file position.
649        """
650        return self.position
651
652    def seek(self, position, whence=io.SEEK_SET):
653        """Seek to a position in the file.
654        """
655        if whence == io.SEEK_SET:
656            self.position = min(max(position, 0), self.size)
657        elif whence == io.SEEK_CUR:
658            if position < 0:
659                self.position = max(self.position + position, 0)
660            else:
661                self.position = min(self.position + position, self.size)
662        elif whence == io.SEEK_END:
663            self.position = max(min(self.size + position, self.size), 0)
664        else:
665            raise ValueError("Invalid argument")
666        return self.position
667
668    def read(self, size=None):
669        """Read data from the file.
670        """
671        if size is None:
672            size = self.size - self.position
673        else:
674            size = min(size, self.size - self.position)
675
676        buf = b""
677        while size > 0:
678            while True:
679                data, start, stop, offset = self.map[self.map_index]
680                if start <= self.position < stop:
681                    break
682                else:
683                    self.map_index += 1
684                    if self.map_index == len(self.map):
685                        self.map_index = 0
686            length = min(size, stop - self.position)
687            if data:
688                self.fileobj.seek(offset + (self.position - start))
689                b = self.fileobj.read(length)
690                if len(b) != length:
691                    raise ReadError("unexpected end of data")
692                buf += b
693            else:
694                buf += NUL * length
695            size -= length
696            self.position += length
697        return buf
698
699    def readinto(self, b):
700        buf = self.read(len(b))
701        b[:len(buf)] = buf
702        return len(buf)
703
704    def close(self):
705        self.closed = True
706#class _FileInFile
707
708class ExFileObject(io.BufferedReader):
709
710    def __init__(self, tarfile, tarinfo):
711        fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
712                tarinfo.size, tarinfo.sparse)
713        super().__init__(fileobj)
714#class ExFileObject
715
716
717#-----------------------------
718# extraction filters (PEP 706)
719#-----------------------------
720
721class FilterError(TarError):
722    pass
723
724class AbsolutePathError(FilterError):
725    def __init__(self, tarinfo):
726        self.tarinfo = tarinfo
727        super().__init__(f'member {tarinfo.name!r} has an absolute path')
728
729class OutsideDestinationError(FilterError):
730    def __init__(self, tarinfo, path):
731        self.tarinfo = tarinfo
732        self._path = path
733        super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, '
734                         + 'which is outside the destination')
735
736class SpecialFileError(FilterError):
737    def __init__(self, tarinfo):
738        self.tarinfo = tarinfo
739        super().__init__(f'{tarinfo.name!r} is a special file')
740
741class AbsoluteLinkError(FilterError):
742    def __init__(self, tarinfo):
743        self.tarinfo = tarinfo
744        super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path')
745
746class LinkOutsideDestinationError(FilterError):
747    def __init__(self, tarinfo, path):
748        self.tarinfo = tarinfo
749        self._path = path
750        super().__init__(f'{tarinfo.name!r} would link to {path!r}, '
751                         + 'which is outside the destination')
752
753def _get_filtered_attrs(member, dest_path, for_data=True):
754    new_attrs = {}
755    name = member.name
756    dest_path = os.path.realpath(dest_path)
757    # Strip leading / (tar's directory separator) from filenames.
758    # Include os.sep (target OS directory separator) as well.
759    if name.startswith(('/', os.sep)):
760        name = new_attrs['name'] = member.path.lstrip('/' + os.sep)
761    if os.path.isabs(name):
762        # Path is absolute even after stripping.
763        # For example, 'C:/foo' on Windows.
764        raise AbsolutePathError(member)
765    # Ensure we stay in the destination
766    target_path = os.path.realpath(os.path.join(dest_path, name))
767    if os.path.commonpath([target_path, dest_path]) != dest_path:
768        raise OutsideDestinationError(member, target_path)
769    # Limit permissions (no high bits, and go-w)
770    mode = member.mode
771    if mode is not None:
772        # Strip high bits & group/other write bits
773        mode = mode & 0o755
774        if for_data:
775            # For data, handle permissions & file types
776            if member.isreg() or member.islnk():
777                if not mode & 0o100:
778                    # Clear executable bits if not executable by user
779                    mode &= ~0o111
780                # Ensure owner can read & write
781                mode |= 0o600
782            elif member.isdir() or member.issym():
783                # Ignore mode for directories & symlinks
784                mode = None
785            else:
786                # Reject special files
787                raise SpecialFileError(member)
788        if mode != member.mode:
789            new_attrs['mode'] = mode
790    if for_data:
791        # Ignore ownership for 'data'
792        if member.uid is not None:
793            new_attrs['uid'] = None
794        if member.gid is not None:
795            new_attrs['gid'] = None
796        if member.uname is not None:
797            new_attrs['uname'] = None
798        if member.gname is not None:
799            new_attrs['gname'] = None
800        # Check link destination for 'data'
801        if member.islnk() or member.issym():
802            if os.path.isabs(member.linkname):
803                raise AbsoluteLinkError(member)
804            target_path = os.path.realpath(os.path.join(dest_path, member.linkname))
805            if os.path.commonpath([target_path, dest_path]) != dest_path:
806                raise LinkOutsideDestinationError(member, target_path)
807    return new_attrs
808
809def fully_trusted_filter(member, dest_path):
810    return member
811
812def tar_filter(member, dest_path):
813    new_attrs = _get_filtered_attrs(member, dest_path, False)
814    if new_attrs:
815        return member.replace(**new_attrs, deep=False)
816    return member
817
818def data_filter(member, dest_path):
819    new_attrs = _get_filtered_attrs(member, dest_path, True)
820    if new_attrs:
821        return member.replace(**new_attrs, deep=False)
822    return member
823
824_NAMED_FILTERS = {
825    "fully_trusted": fully_trusted_filter,
826    "tar": tar_filter,
827    "data": data_filter,
828}
829
830#------------------
831# Exported Classes
832#------------------
833
834# Sentinel for replace() defaults, meaning "don't change the attribute"
835_KEEP = object()
836
837# Header length is digits followed by a space.
838_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ")
839
840class TarInfo(object):
841    """Informational class which holds the details about an
842       archive member given by a tar header block.
843       TarInfo objects are returned by TarFile.getmember(),
844       TarFile.getmembers() and TarFile.gettarinfo() and are
845       usually created internally.
846    """
847
848    __slots__ = dict(
849        name = 'Name of the archive member.',
850        mode = 'Permission bits.',
851        uid = 'User ID of the user who originally stored this member.',
852        gid = 'Group ID of the user who originally stored this member.',
853        size = 'Size in bytes.',
854        mtime = 'Time of last modification.',
855        chksum = 'Header checksum.',
856        type = ('File type. type is usually one of these constants: '
857                'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
858                'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
859        linkname = ('Name of the target file name, which is only present '
860                    'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
861        uname = 'User name.',
862        gname = 'Group name.',
863        devmajor = 'Device major number.',
864        devminor = 'Device minor number.',
865        offset = 'The tar header starts here.',
866        offset_data = "The file's data starts here.",
867        pax_headers = ('A dictionary containing key-value pairs of an '
868                       'associated pax extended header.'),
869        sparse = 'Sparse member information.',
870        tarfile = None,
871        _sparse_structs = None,
872        _link_target = None,
873        )
874
875    def __init__(self, name=""):
876        """Construct a TarInfo object. name is the optional name
877           of the member.
878        """
879        self.name = name        # member name
880        self.mode = 0o644       # file permissions
881        self.uid = 0            # user id
882        self.gid = 0            # group id
883        self.size = 0           # file size
884        self.mtime = 0          # modification time
885        self.chksum = 0         # header checksum
886        self.type = REGTYPE     # member type
887        self.linkname = ""      # link name
888        self.uname = ""         # user name
889        self.gname = ""         # group name
890        self.devmajor = 0       # device major number
891        self.devminor = 0       # device minor number
892
893        self.offset = 0         # the tar header starts here
894        self.offset_data = 0    # the file's data starts here
895
896        self.sparse = None      # sparse member information
897        self.pax_headers = {}   # pax header information
898
899    @property
900    def path(self):
901        'In pax headers, "name" is called "path".'
902        return self.name
903
904    @path.setter
905    def path(self, name):
906        self.name = name
907
908    @property
909    def linkpath(self):
910        'In pax headers, "linkname" is called "linkpath".'
911        return self.linkname
912
913    @linkpath.setter
914    def linkpath(self, linkname):
915        self.linkname = linkname
916
917    def __repr__(self):
918        return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
919
920    def replace(self, *,
921                name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP,
922                uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP,
923                deep=True, _KEEP=_KEEP):
924        """Return a deep copy of self with the given attributes replaced.
925        """
926        if deep:
927            result = copy.deepcopy(self)
928        else:
929            result = copy.copy(self)
930        if name is not _KEEP:
931            result.name = name
932        if mtime is not _KEEP:
933            result.mtime = mtime
934        if mode is not _KEEP:
935            result.mode = mode
936        if linkname is not _KEEP:
937            result.linkname = linkname
938        if uid is not _KEEP:
939            result.uid = uid
940        if gid is not _KEEP:
941            result.gid = gid
942        if uname is not _KEEP:
943            result.uname = uname
944        if gname is not _KEEP:
945            result.gname = gname
946        return result
947
948    def get_info(self):
949        """Return the TarInfo's attributes as a dictionary.
950        """
951        if self.mode is None:
952            mode = None
953        else:
954            mode = self.mode & 0o7777
955        info = {
956            "name":     self.name,
957            "mode":     mode,
958            "uid":      self.uid,
959            "gid":      self.gid,
960            "size":     self.size,
961            "mtime":    self.mtime,
962            "chksum":   self.chksum,
963            "type":     self.type,
964            "linkname": self.linkname,
965            "uname":    self.uname,
966            "gname":    self.gname,
967            "devmajor": self.devmajor,
968            "devminor": self.devminor
969        }
970
971        if info["type"] == DIRTYPE and not info["name"].endswith("/"):
972            info["name"] += "/"
973
974        return info
975
976    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
977        """Return a tar header as a string of 512 byte blocks.
978        """
979        info = self.get_info()
980        for name, value in info.items():
981            if value is None:
982                raise ValueError("%s may not be None" % name)
983
984        if format == USTAR_FORMAT:
985            return self.create_ustar_header(info, encoding, errors)
986        elif format == GNU_FORMAT:
987            return self.create_gnu_header(info, encoding, errors)
988        elif format == PAX_FORMAT:
989            return self.create_pax_header(info, encoding)
990        else:
991            raise ValueError("invalid format")
992
993    def create_ustar_header(self, info, encoding, errors):
994        """Return the object as a ustar header block.
995        """
996        info["magic"] = POSIX_MAGIC
997
998        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
999            raise ValueError("linkname is too long")
1000
1001        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
1002            info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
1003
1004        return self._create_header(info, USTAR_FORMAT, encoding, errors)
1005
1006    def create_gnu_header(self, info, encoding, errors):
1007        """Return the object as a GNU header block sequence.
1008        """
1009        info["magic"] = GNU_MAGIC
1010
1011        buf = b""
1012        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
1013            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
1014
1015        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
1016            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
1017
1018        return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
1019
1020    def create_pax_header(self, info, encoding):
1021        """Return the object as a ustar header block. If it cannot be
1022           represented this way, prepend a pax extended header sequence
1023           with supplement information.
1024        """
1025        info["magic"] = POSIX_MAGIC
1026        pax_headers = self.pax_headers.copy()
1027
1028        # Test string fields for values that exceed the field length or cannot
1029        # be represented in ASCII encoding.
1030        for name, hname, length in (
1031                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1032                ("uname", "uname", 32), ("gname", "gname", 32)):
1033
1034            if hname in pax_headers:
1035                # The pax header has priority.
1036                continue
1037
1038            # Try to encode the string as ASCII.
1039            try:
1040                info[name].encode("ascii", "strict")
1041            except UnicodeEncodeError:
1042                pax_headers[hname] = info[name]
1043                continue
1044
1045            if len(info[name]) > length:
1046                pax_headers[hname] = info[name]
1047
1048        # Test number fields for values that exceed the field limit or values
1049        # that like to be stored as float.
1050        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1051            needs_pax = False
1052
1053            val = info[name]
1054            val_is_float = isinstance(val, float)
1055            val_int = round(val) if val_is_float else val
1056            if not 0 <= val_int < 8 ** (digits - 1):
1057                # Avoid overflow.
1058                info[name] = 0
1059                needs_pax = True
1060            elif val_is_float:
1061                # Put rounded value in ustar header, and full
1062                # precision value in pax header.
1063                info[name] = val_int
1064                needs_pax = True
1065
1066            # The existing pax header has priority.
1067            if needs_pax and name not in pax_headers:
1068                pax_headers[name] = str(val)
1069
1070        # Create a pax extended header if necessary.
1071        if pax_headers:
1072            buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
1073        else:
1074            buf = b""
1075
1076        return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
1077
1078    @classmethod
1079    def create_pax_global_header(cls, pax_headers):
1080        """Return the object as a pax global header block sequence.
1081        """
1082        return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
1083
1084    def _posix_split_name(self, name, encoding, errors):
1085        """Split a name longer than 100 chars into a prefix
1086           and a name part.
1087        """
1088        components = name.split("/")
1089        for i in range(1, len(components)):
1090            prefix = "/".join(components[:i])
1091            name = "/".join(components[i:])
1092            if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
1093                    len(name.encode(encoding, errors)) <= LENGTH_NAME:
1094                break
1095        else:
1096            raise ValueError("name is too long")
1097
1098        return prefix, name
1099
1100    @staticmethod
1101    def _create_header(info, format, encoding, errors):
1102        """Return a header block. info is a dictionary with file
1103           information, format must be one of the *_FORMAT constants.
1104        """
1105        has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)
1106        if has_device_fields:
1107            devmajor = itn(info.get("devmajor", 0), 8, format)
1108            devminor = itn(info.get("devminor", 0), 8, format)
1109        else:
1110            devmajor = stn("", 8, encoding, errors)
1111            devminor = stn("", 8, encoding, errors)
1112
1113        # None values in metadata should cause ValueError.
1114        # itn()/stn() do this for all fields except type.
1115        filetype = info.get("type", REGTYPE)
1116        if filetype is None:
1117            raise ValueError("TarInfo.type must not be None")
1118
1119        parts = [
1120            stn(info.get("name", ""), 100, encoding, errors),
1121            itn(info.get("mode", 0) & 0o7777, 8, format),
1122            itn(info.get("uid", 0), 8, format),
1123            itn(info.get("gid", 0), 8, format),
1124            itn(info.get("size", 0), 12, format),
1125            itn(info.get("mtime", 0), 12, format),
1126            b"        ", # checksum field
1127            filetype,
1128            stn(info.get("linkname", ""), 100, encoding, errors),
1129            info.get("magic", POSIX_MAGIC),
1130            stn(info.get("uname", ""), 32, encoding, errors),
1131            stn(info.get("gname", ""), 32, encoding, errors),
1132            devmajor,
1133            devminor,
1134            stn(info.get("prefix", ""), 155, encoding, errors)
1135        ]
1136
1137        buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
1138        chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1139        buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
1140        return buf
1141
1142    @staticmethod
1143    def _create_payload(payload):
1144        """Return the string payload filled with zero bytes
1145           up to the next 512 byte border.
1146        """
1147        blocks, remainder = divmod(len(payload), BLOCKSIZE)
1148        if remainder > 0:
1149            payload += (BLOCKSIZE - remainder) * NUL
1150        return payload
1151
1152    @classmethod
1153    def _create_gnu_long_header(cls, name, type, encoding, errors):
1154        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1155           for name.
1156        """
1157        name = name.encode(encoding, errors) + NUL
1158
1159        info = {}
1160        info["name"] = "././@LongLink"
1161        info["type"] = type
1162        info["size"] = len(name)
1163        info["magic"] = GNU_MAGIC
1164
1165        # create extended header + name blocks.
1166        return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
1167                cls._create_payload(name)
1168
1169    @classmethod
1170    def _create_pax_generic_header(cls, pax_headers, type, encoding):
1171        """Return a POSIX.1-2008 extended or global header sequence
1172           that contains a list of keyword, value pairs. The values
1173           must be strings.
1174        """
1175        # Check if one of the fields contains surrogate characters and thereby
1176        # forces hdrcharset=BINARY, see _proc_pax() for more information.
1177        binary = False
1178        for keyword, value in pax_headers.items():
1179            try:
1180                value.encode("utf-8", "strict")
1181            except UnicodeEncodeError:
1182                binary = True
1183                break
1184
1185        records = b""
1186        if binary:
1187            # Put the hdrcharset field at the beginning of the header.
1188            records += b"21 hdrcharset=BINARY\n"
1189
1190        for keyword, value in pax_headers.items():
1191            keyword = keyword.encode("utf-8")
1192            if binary:
1193                # Try to restore the original byte representation of `value'.
1194                # Needless to say, that the encoding must match the string.
1195                value = value.encode(encoding, "surrogateescape")
1196            else:
1197                value = value.encode("utf-8")
1198
1199            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1200            n = p = 0
1201            while True:
1202                n = l + len(str(p))
1203                if n == p:
1204                    break
1205                p = n
1206            records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1207
1208        # We use a hardcoded "././@PaxHeader" name like star does
1209        # instead of the one that POSIX recommends.
1210        info = {}
1211        info["name"] = "././@PaxHeader"
1212        info["type"] = type
1213        info["size"] = len(records)
1214        info["magic"] = POSIX_MAGIC
1215
1216        # Create pax header + record blocks.
1217        return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1218                cls._create_payload(records)
1219
1220    @classmethod
1221    def frombuf(cls, buf, encoding, errors):
1222        """Construct a TarInfo object from a 512 byte bytes object.
1223        """
1224        if len(buf) == 0:
1225            raise EmptyHeaderError("empty header")
1226        if len(buf) != BLOCKSIZE:
1227            raise TruncatedHeaderError("truncated header")
1228        if buf.count(NUL) == BLOCKSIZE:
1229            raise EOFHeaderError("end of file header")
1230
1231        chksum = nti(buf[148:156])
1232        if chksum not in calc_chksums(buf):
1233            raise InvalidHeaderError("bad checksum")
1234
1235        obj = cls()
1236        obj.name = nts(buf[0:100], encoding, errors)
1237        obj.mode = nti(buf[100:108])
1238        obj.uid = nti(buf[108:116])
1239        obj.gid = nti(buf[116:124])
1240        obj.size = nti(buf[124:136])
1241        obj.mtime = nti(buf[136:148])
1242        obj.chksum = chksum
1243        obj.type = buf[156:157]
1244        obj.linkname = nts(buf[157:257], encoding, errors)
1245        obj.uname = nts(buf[265:297], encoding, errors)
1246        obj.gname = nts(buf[297:329], encoding, errors)
1247        obj.devmajor = nti(buf[329:337])
1248        obj.devminor = nti(buf[337:345])
1249        prefix = nts(buf[345:500], encoding, errors)
1250
1251        # Old V7 tar format represents a directory as a regular
1252        # file with a trailing slash.
1253        if obj.type == AREGTYPE and obj.name.endswith("/"):
1254            obj.type = DIRTYPE
1255
1256        # The old GNU sparse format occupies some of the unused
1257        # space in the buffer for up to 4 sparse structures.
1258        # Save them for later processing in _proc_sparse().
1259        if obj.type == GNUTYPE_SPARSE:
1260            pos = 386
1261            structs = []
1262            for i in range(4):
1263                try:
1264                    offset = nti(buf[pos:pos + 12])
1265                    numbytes = nti(buf[pos + 12:pos + 24])
1266                except ValueError:
1267                    break
1268                structs.append((offset, numbytes))
1269                pos += 24
1270            isextended = bool(buf[482])
1271            origsize = nti(buf[483:495])
1272            obj._sparse_structs = (structs, isextended, origsize)
1273
1274        # Remove redundant slashes from directories.
1275        if obj.isdir():
1276            obj.name = obj.name.rstrip("/")
1277
1278        # Reconstruct a ustar longname.
1279        if prefix and obj.type not in GNU_TYPES:
1280            obj.name = prefix + "/" + obj.name
1281        return obj
1282
1283    @classmethod
1284    def fromtarfile(cls, tarfile):
1285        """Return the next TarInfo object from TarFile object
1286           tarfile.
1287        """
1288        buf = tarfile.fileobj.read(BLOCKSIZE)
1289        obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1290        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1291        return obj._proc_member(tarfile)
1292
1293    #--------------------------------------------------------------------------
1294    # The following are methods that are called depending on the type of a
1295    # member. The entry point is _proc_member() which can be overridden in a
1296    # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1297    # implement the following
1298    # operations:
1299    # 1. Set self.offset_data to the position where the data blocks begin,
1300    #    if there is data that follows.
1301    # 2. Set tarfile.offset to the position where the next member's header will
1302    #    begin.
1303    # 3. Return self or another valid TarInfo object.
1304    def _proc_member(self, tarfile):
1305        """Choose the right processing method depending on
1306           the type and call it.
1307        """
1308        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1309            return self._proc_gnulong(tarfile)
1310        elif self.type == GNUTYPE_SPARSE:
1311            return self._proc_sparse(tarfile)
1312        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1313            return self._proc_pax(tarfile)
1314        else:
1315            return self._proc_builtin(tarfile)
1316
1317    def _proc_builtin(self, tarfile):
1318        """Process a builtin type or an unknown type which
1319           will be treated as a regular file.
1320        """
1321        self.offset_data = tarfile.fileobj.tell()
1322        offset = self.offset_data
1323        if self.isreg() or self.type not in SUPPORTED_TYPES:
1324            # Skip the following data blocks.
1325            offset += self._block(self.size)
1326        tarfile.offset = offset
1327
1328        # Patch the TarInfo object with saved global
1329        # header information.
1330        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1331
1332        # Remove redundant slashes from directories. This is to be consistent
1333        # with frombuf().
1334        if self.isdir():
1335            self.name = self.name.rstrip("/")
1336
1337        return self
1338
1339    def _proc_gnulong(self, tarfile):
1340        """Process the blocks that hold a GNU longname
1341           or longlink member.
1342        """
1343        buf = tarfile.fileobj.read(self._block(self.size))
1344
1345        # Fetch the next header and process it.
1346        try:
1347            next = self.fromtarfile(tarfile)
1348        except HeaderError as e:
1349            raise SubsequentHeaderError(str(e)) from None
1350
1351        # Patch the TarInfo object from the next header with
1352        # the longname information.
1353        next.offset = self.offset
1354        if self.type == GNUTYPE_LONGNAME:
1355            next.name = nts(buf, tarfile.encoding, tarfile.errors)
1356        elif self.type == GNUTYPE_LONGLINK:
1357            next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1358
1359        # Remove redundant slashes from directories. This is to be consistent
1360        # with frombuf().
1361        if next.isdir():
1362            next.name = next.name.removesuffix("/")
1363
1364        return next
1365
1366    def _proc_sparse(self, tarfile):
1367        """Process a GNU sparse header plus extra headers.
1368        """
1369        # We already collected some sparse structures in frombuf().
1370        structs, isextended, origsize = self._sparse_structs
1371        del self._sparse_structs
1372
1373        # Collect sparse structures from extended header blocks.
1374        while isextended:
1375            buf = tarfile.fileobj.read(BLOCKSIZE)
1376            pos = 0
1377            for i in range(21):
1378                try:
1379                    offset = nti(buf[pos:pos + 12])
1380                    numbytes = nti(buf[pos + 12:pos + 24])
1381                except ValueError:
1382                    break
1383                if offset and numbytes:
1384                    structs.append((offset, numbytes))
1385                pos += 24
1386            isextended = bool(buf[504])
1387        self.sparse = structs
1388
1389        self.offset_data = tarfile.fileobj.tell()
1390        tarfile.offset = self.offset_data + self._block(self.size)
1391        self.size = origsize
1392        return self
1393
1394    def _proc_pax(self, tarfile):
1395        """Process an extended or global header as described in
1396           POSIX.1-2008.
1397        """
1398        # Read the header information.
1399        buf = tarfile.fileobj.read(self._block(self.size))
1400
1401        # A pax header stores supplemental information for either
1402        # the following file (extended) or all following files
1403        # (global).
1404        if self.type == XGLTYPE:
1405            pax_headers = tarfile.pax_headers
1406        else:
1407            pax_headers = tarfile.pax_headers.copy()
1408
1409        # Parse pax header information. A record looks like that:
1410        # "%d %s=%s\n" % (length, keyword, value). length is the size
1411        # of the complete record including the length field itself and
1412        # the newline.
1413        pos = 0
1414        encoding = None
1415        raw_headers = []
1416        while len(buf) > pos and buf[pos] != 0x00:
1417            if not (match := _header_length_prefix_re.match(buf, pos)):
1418                raise InvalidHeaderError("invalid header")
1419            try:
1420                length = int(match.group(1))
1421            except ValueError:
1422                raise InvalidHeaderError("invalid header")
1423            # Headers must be at least 5 bytes, shortest being '5 x=\n'.
1424            # Value is allowed to be empty.
1425            if length < 5:
1426                raise InvalidHeaderError("invalid header")
1427            if pos + length > len(buf):
1428                raise InvalidHeaderError("invalid header")
1429
1430            header_value_end_offset = match.start(1) + length - 1  # Last byte of the header
1431            keyword_and_value = buf[match.end(1) + 1:header_value_end_offset]
1432            raw_keyword, equals, raw_value = keyword_and_value.partition(b"=")
1433
1434            # Check the framing of the header. The last character must be '\n' (0x0A)
1435            if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A:
1436                raise InvalidHeaderError("invalid header")
1437            raw_headers.append((length, raw_keyword, raw_value))
1438
1439            # Check if the pax header contains a hdrcharset field. This tells us
1440            # the encoding of the path, linkpath, uname and gname fields. Normally,
1441            # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1442            # implementations are allowed to store them as raw binary strings if
1443            # the translation to UTF-8 fails. For the time being, we don't care about
1444            # anything other than "BINARY". The only other value that is currently
1445            # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1446            # Note that we only follow the initial 'hdrcharset' setting to preserve
1447            # the initial behavior of the 'tarfile' module.
1448            if raw_keyword == b"hdrcharset" and encoding is None:
1449                if raw_value == b"BINARY":
1450                    encoding = tarfile.encoding
1451                else:  # This branch ensures only the first 'hdrcharset' header is used.
1452                    encoding = "utf-8"
1453
1454            pos += length
1455
1456        # If no explicit hdrcharset is set, we use UTF-8 as a default.
1457        if encoding is None:
1458            encoding = "utf-8"
1459
1460        # After parsing the raw headers we can decode them to text.
1461        for length, raw_keyword, raw_value in raw_headers:
1462            # Normally, we could just use "utf-8" as the encoding and "strict"
1463            # as the error handler, but we better not take the risk. For
1464            # example, GNU tar <= 1.23 is known to store filenames it cannot
1465            # translate to UTF-8 as raw strings (unfortunately without a
1466            # hdrcharset=BINARY header).
1467            # We first try the strict standard encoding, and if that fails we
1468            # fall back on the user's encoding and error handler.
1469            keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8",
1470                    tarfile.errors)
1471            if keyword in PAX_NAME_FIELDS:
1472                value = self._decode_pax_field(raw_value, encoding, tarfile.encoding,
1473                        tarfile.errors)
1474            else:
1475                value = self._decode_pax_field(raw_value, "utf-8", "utf-8",
1476                        tarfile.errors)
1477
1478            pax_headers[keyword] = value
1479
1480        # Fetch the next header.
1481        try:
1482            next = self.fromtarfile(tarfile)
1483        except HeaderError as e:
1484            raise SubsequentHeaderError(str(e)) from None
1485
1486        # Process GNU sparse information.
1487        if "GNU.sparse.map" in pax_headers:
1488            # GNU extended sparse format version 0.1.
1489            self._proc_gnusparse_01(next, pax_headers)
1490
1491        elif "GNU.sparse.size" in pax_headers:
1492            # GNU extended sparse format version 0.0.
1493            self._proc_gnusparse_00(next, raw_headers)
1494
1495        elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1496            # GNU extended sparse format version 1.0.
1497            self._proc_gnusparse_10(next, pax_headers, tarfile)
1498
1499        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1500            # Patch the TarInfo object with the extended header info.
1501            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1502            next.offset = self.offset
1503
1504            if "size" in pax_headers:
1505                # If the extended header replaces the size field,
1506                # we need to recalculate the offset where the next
1507                # header starts.
1508                offset = next.offset_data
1509                if next.isreg() or next.type not in SUPPORTED_TYPES:
1510                    offset += next._block(next.size)
1511                tarfile.offset = offset
1512
1513        return next
1514
1515    def _proc_gnusparse_00(self, next, raw_headers):
1516        """Process a GNU tar extended sparse header, version 0.0.
1517        """
1518        offsets = []
1519        numbytes = []
1520        for _, keyword, value in raw_headers:
1521            if keyword == b"GNU.sparse.offset":
1522                try:
1523                    offsets.append(int(value.decode()))
1524                except ValueError:
1525                    raise InvalidHeaderError("invalid header")
1526
1527            elif keyword == b"GNU.sparse.numbytes":
1528                try:
1529                    numbytes.append(int(value.decode()))
1530                except ValueError:
1531                    raise InvalidHeaderError("invalid header")
1532
1533        next.sparse = list(zip(offsets, numbytes))
1534
1535    def _proc_gnusparse_01(self, next, pax_headers):
1536        """Process a GNU tar extended sparse header, version 0.1.
1537        """
1538        sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1539        next.sparse = list(zip(sparse[::2], sparse[1::2]))
1540
1541    def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1542        """Process a GNU tar extended sparse header, version 1.0.
1543        """
1544        fields = None
1545        sparse = []
1546        buf = tarfile.fileobj.read(BLOCKSIZE)
1547        fields, buf = buf.split(b"\n", 1)
1548        fields = int(fields)
1549        while len(sparse) < fields * 2:
1550            if b"\n" not in buf:
1551                buf += tarfile.fileobj.read(BLOCKSIZE)
1552            number, buf = buf.split(b"\n", 1)
1553            sparse.append(int(number))
1554        next.offset_data = tarfile.fileobj.tell()
1555        next.sparse = list(zip(sparse[::2], sparse[1::2]))
1556
1557    def _apply_pax_info(self, pax_headers, encoding, errors):
1558        """Replace fields with supplemental information from a previous
1559           pax extended or global header.
1560        """
1561        for keyword, value in pax_headers.items():
1562            if keyword == "GNU.sparse.name":
1563                setattr(self, "path", value)
1564            elif keyword == "GNU.sparse.size":
1565                setattr(self, "size", int(value))
1566            elif keyword == "GNU.sparse.realsize":
1567                setattr(self, "size", int(value))
1568            elif keyword in PAX_FIELDS:
1569                if keyword in PAX_NUMBER_FIELDS:
1570                    try:
1571                        value = PAX_NUMBER_FIELDS[keyword](value)
1572                    except ValueError:
1573                        value = 0
1574                if keyword == "path":
1575                    value = value.rstrip("/")
1576                setattr(self, keyword, value)
1577
1578        self.pax_headers = pax_headers.copy()
1579
1580    def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1581        """Decode a single field from a pax record.
1582        """
1583        try:
1584            return value.decode(encoding, "strict")
1585        except UnicodeDecodeError:
1586            return value.decode(fallback_encoding, fallback_errors)
1587
1588    def _block(self, count):
1589        """Round up a byte count by BLOCKSIZE and return it,
1590           e.g. _block(834) => 1024.
1591        """
1592        blocks, remainder = divmod(count, BLOCKSIZE)
1593        if remainder:
1594            blocks += 1
1595        return blocks * BLOCKSIZE
1596
1597    def isreg(self):
1598        'Return True if the Tarinfo object is a regular file.'
1599        return self.type in REGULAR_TYPES
1600
1601    def isfile(self):
1602        'Return True if the Tarinfo object is a regular file.'
1603        return self.isreg()
1604
1605    def isdir(self):
1606        'Return True if it is a directory.'
1607        return self.type == DIRTYPE
1608
1609    def issym(self):
1610        'Return True if it is a symbolic link.'
1611        return self.type == SYMTYPE
1612
1613    def islnk(self):
1614        'Return True if it is a hard link.'
1615        return self.type == LNKTYPE
1616
1617    def ischr(self):
1618        'Return True if it is a character device.'
1619        return self.type == CHRTYPE
1620
1621    def isblk(self):
1622        'Return True if it is a block device.'
1623        return self.type == BLKTYPE
1624
1625    def isfifo(self):
1626        'Return True if it is a FIFO.'
1627        return self.type == FIFOTYPE
1628
1629    def issparse(self):
1630        return self.sparse is not None
1631
1632    def isdev(self):
1633        'Return True if it is one of character device, block device or FIFO.'
1634        return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1635# class TarInfo
1636
1637class TarFile(object):
1638    """The TarFile Class provides an interface to tar archives.
1639    """
1640
1641    debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1642
1643    dereference = False         # If true, add content of linked file to the
1644                                # tar file, else the link.
1645
1646    ignore_zeros = False        # If true, skips empty or invalid blocks and
1647                                # continues processing.
1648
1649    errorlevel = 1              # If 0, fatal errors only appear in debug
1650                                # messages (if debug >= 0). If > 0, errors
1651                                # are passed to the caller as exceptions.
1652
1653    format = DEFAULT_FORMAT     # The format to use when creating an archive.
1654
1655    encoding = ENCODING         # Encoding for 8-bit character strings.
1656
1657    errors = None               # Error handler for unicode conversion.
1658
1659    tarinfo = TarInfo           # The default TarInfo class to use.
1660
1661    fileobject = ExFileObject   # The file-object for extractfile().
1662
1663    extraction_filter = None    # The default filter for extraction.
1664
1665    def __init__(self, name=None, mode="r", fileobj=None, format=None,
1666            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1667            errors="surrogateescape", pax_headers=None, debug=None,
1668            errorlevel=None, copybufsize=None):
1669        """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1670           read from an existing archive, 'a' to append data to an existing
1671           file or 'w' to create a new file overwriting an existing one. `mode'
1672           defaults to 'r'.
1673           If `fileobj' is given, it is used for reading or writing data. If it
1674           can be determined, `mode' is overridden by `fileobj's mode.
1675           `fileobj' is not closed, when TarFile is closed.
1676        """
1677        modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
1678        if mode not in modes:
1679            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1680        self.mode = mode
1681        self._mode = modes[mode]
1682
1683        if not fileobj:
1684            if self.mode == "a" and not os.path.exists(name):
1685                # Create nonexistent files in append mode.
1686                self.mode = "w"
1687                self._mode = "wb"
1688            fileobj = bltn_open(name, self._mode)
1689            self._extfileobj = False
1690        else:
1691            if (name is None and hasattr(fileobj, "name") and
1692                isinstance(fileobj.name, (str, bytes))):
1693                name = fileobj.name
1694            if hasattr(fileobj, "mode"):
1695                self._mode = fileobj.mode
1696            self._extfileobj = True
1697        self.name = os.path.abspath(name) if name else None
1698        self.fileobj = fileobj
1699
1700        # Init attributes.
1701        if format is not None:
1702            self.format = format
1703        if tarinfo is not None:
1704            self.tarinfo = tarinfo
1705        if dereference is not None:
1706            self.dereference = dereference
1707        if ignore_zeros is not None:
1708            self.ignore_zeros = ignore_zeros
1709        if encoding is not None:
1710            self.encoding = encoding
1711        self.errors = errors
1712
1713        if pax_headers is not None and self.format == PAX_FORMAT:
1714            self.pax_headers = pax_headers
1715        else:
1716            self.pax_headers = {}
1717
1718        if debug is not None:
1719            self.debug = debug
1720        if errorlevel is not None:
1721            self.errorlevel = errorlevel
1722
1723        # Init datastructures.
1724        self.copybufsize = copybufsize
1725        self.closed = False
1726        self.members = []       # list of members as TarInfo objects
1727        self._loaded = False    # flag if all members have been read
1728        self.offset = self.fileobj.tell()
1729                                # current position in the archive file
1730        self.inodes = {}        # dictionary caching the inodes of
1731                                # archive members already added
1732
1733        try:
1734            if self.mode == "r":
1735                self.firstmember = None
1736                self.firstmember = self.next()
1737
1738            if self.mode == "a":
1739                # Move to the end of the archive,
1740                # before the first empty block.
1741                while True:
1742                    self.fileobj.seek(self.offset)
1743                    try:
1744                        tarinfo = self.tarinfo.fromtarfile(self)
1745                        self.members.append(tarinfo)
1746                    except EOFHeaderError:
1747                        self.fileobj.seek(self.offset)
1748                        break
1749                    except HeaderError as e:
1750                        raise ReadError(str(e)) from None
1751
1752            if self.mode in ("a", "w", "x"):
1753                self._loaded = True
1754
1755                if self.pax_headers:
1756                    buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1757                    self.fileobj.write(buf)
1758                    self.offset += len(buf)
1759        except:
1760            if not self._extfileobj:
1761                self.fileobj.close()
1762            self.closed = True
1763            raise
1764
1765    #--------------------------------------------------------------------------
1766    # Below are the classmethods which act as alternate constructors to the
1767    # TarFile class. The open() method is the only one that is needed for
1768    # public use; it is the "super"-constructor and is able to select an
1769    # adequate "sub"-constructor for a particular compression using the mapping
1770    # from OPEN_METH.
1771    #
1772    # This concept allows one to subclass TarFile without losing the comfort of
1773    # the super-constructor. A sub-constructor is registered and made available
1774    # by adding it to the mapping in OPEN_METH.
1775
1776    @classmethod
1777    def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1778        """Open a tar archive for reading, writing or appending. Return
1779           an appropriate TarFile class.
1780
1781           mode:
1782           'r' or 'r:*' open for reading with transparent compression
1783           'r:'         open for reading exclusively uncompressed
1784           'r:gz'       open for reading with gzip compression
1785           'r:bz2'      open for reading with bzip2 compression
1786           'r:xz'       open for reading with lzma compression
1787           'a' or 'a:'  open for appending, creating the file if necessary
1788           'w' or 'w:'  open for writing without compression
1789           'w:gz'       open for writing with gzip compression
1790           'w:bz2'      open for writing with bzip2 compression
1791           'w:xz'       open for writing with lzma compression
1792
1793           'x' or 'x:'  create a tarfile exclusively without compression, raise
1794                        an exception if the file is already created
1795           'x:gz'       create a gzip compressed tarfile, raise an exception
1796                        if the file is already created
1797           'x:bz2'      create a bzip2 compressed tarfile, raise an exception
1798                        if the file is already created
1799           'x:xz'       create an lzma compressed tarfile, raise an exception
1800                        if the file is already created
1801
1802           'r|*'        open a stream of tar blocks with transparent compression
1803           'r|'         open an uncompressed stream of tar blocks for reading
1804           'r|gz'       open a gzip compressed stream of tar blocks
1805           'r|bz2'      open a bzip2 compressed stream of tar blocks
1806           'r|xz'       open an lzma compressed stream of tar blocks
1807           'w|'         open an uncompressed stream for writing
1808           'w|gz'       open a gzip compressed stream for writing
1809           'w|bz2'      open a bzip2 compressed stream for writing
1810           'w|xz'       open an lzma compressed stream for writing
1811        """
1812
1813        if not name and not fileobj:
1814            raise ValueError("nothing to open")
1815
1816        if mode in ("r", "r:*"):
1817            # Find out which *open() is appropriate for opening the file.
1818            def not_compressed(comptype):
1819                return cls.OPEN_METH[comptype] == 'taropen'
1820            error_msgs = []
1821            for comptype in sorted(cls.OPEN_METH, key=not_compressed):
1822                func = getattr(cls, cls.OPEN_METH[comptype])
1823                if fileobj is not None:
1824                    saved_pos = fileobj.tell()
1825                try:
1826                    return func(name, "r", fileobj, **kwargs)
1827                except (ReadError, CompressionError) as e:
1828                    error_msgs.append(f'- method {comptype}: {e!r}')
1829                    if fileobj is not None:
1830                        fileobj.seek(saved_pos)
1831                    continue
1832            error_msgs_summary = '\n'.join(error_msgs)
1833            raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}")
1834
1835        elif ":" in mode:
1836            filemode, comptype = mode.split(":", 1)
1837            filemode = filemode or "r"
1838            comptype = comptype or "tar"
1839
1840            # Select the *open() function according to
1841            # given compression.
1842            if comptype in cls.OPEN_METH:
1843                func = getattr(cls, cls.OPEN_METH[comptype])
1844            else:
1845                raise CompressionError("unknown compression type %r" % comptype)
1846            return func(name, filemode, fileobj, **kwargs)
1847
1848        elif "|" in mode:
1849            filemode, comptype = mode.split("|", 1)
1850            filemode = filemode or "r"
1851            comptype = comptype or "tar"
1852
1853            if filemode not in ("r", "w"):
1854                raise ValueError("mode must be 'r' or 'w'")
1855
1856            stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1857            try:
1858                t = cls(name, filemode, stream, **kwargs)
1859            except:
1860                stream.close()
1861                raise
1862            t._extfileobj = False
1863            return t
1864
1865        elif mode in ("a", "w", "x"):
1866            return cls.taropen(name, mode, fileobj, **kwargs)
1867
1868        raise ValueError("undiscernible mode")
1869
1870    @classmethod
1871    def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1872        """Open uncompressed tar archive name for reading or writing.
1873        """
1874        if mode not in ("r", "a", "w", "x"):
1875            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1876        return cls(name, mode, fileobj, **kwargs)
1877
1878    @classmethod
1879    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1880        """Open gzip compressed tar archive name for reading or writing.
1881           Appending is not allowed.
1882        """
1883        if mode not in ("r", "w", "x"):
1884            raise ValueError("mode must be 'r', 'w' or 'x'")
1885
1886        try:
1887            from gzip import GzipFile
1888        except ImportError:
1889            raise CompressionError("gzip module is not available") from None
1890
1891        try:
1892            fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
1893        except OSError as e:
1894            if fileobj is not None and mode == 'r':
1895                raise ReadError("not a gzip file") from e
1896            raise
1897
1898        try:
1899            t = cls.taropen(name, mode, fileobj, **kwargs)
1900        except OSError as e:
1901            fileobj.close()
1902            if mode == 'r':
1903                raise ReadError("not a gzip file") from e
1904            raise
1905        except:
1906            fileobj.close()
1907            raise
1908        t._extfileobj = False
1909        return t
1910
1911    @classmethod
1912    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1913        """Open bzip2 compressed tar archive name for reading or writing.
1914           Appending is not allowed.
1915        """
1916        if mode not in ("r", "w", "x"):
1917            raise ValueError("mode must be 'r', 'w' or 'x'")
1918
1919        try:
1920            from bz2 import BZ2File
1921        except ImportError:
1922            raise CompressionError("bz2 module is not available") from None
1923
1924        fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)
1925
1926        try:
1927            t = cls.taropen(name, mode, fileobj, **kwargs)
1928        except (OSError, EOFError) as e:
1929            fileobj.close()
1930            if mode == 'r':
1931                raise ReadError("not a bzip2 file") from e
1932            raise
1933        except:
1934            fileobj.close()
1935            raise
1936        t._extfileobj = False
1937        return t
1938
1939    @classmethod
1940    def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
1941        """Open lzma compressed tar archive name for reading or writing.
1942           Appending is not allowed.
1943        """
1944        if mode not in ("r", "w", "x"):
1945            raise ValueError("mode must be 'r', 'w' or 'x'")
1946
1947        try:
1948            from lzma import LZMAFile, LZMAError
1949        except ImportError:
1950            raise CompressionError("lzma module is not available") from None
1951
1952        fileobj = LZMAFile(fileobj or name, mode, preset=preset)
1953
1954        try:
1955            t = cls.taropen(name, mode, fileobj, **kwargs)
1956        except (LZMAError, EOFError) as e:
1957            fileobj.close()
1958            if mode == 'r':
1959                raise ReadError("not an lzma file") from e
1960            raise
1961        except:
1962            fileobj.close()
1963            raise
1964        t._extfileobj = False
1965        return t
1966
1967    # All *open() methods are registered here.
1968    OPEN_METH = {
1969        "tar": "taropen",   # uncompressed tar
1970        "gz":  "gzopen",    # gzip compressed tar
1971        "bz2": "bz2open",   # bzip2 compressed tar
1972        "xz":  "xzopen"     # lzma compressed tar
1973    }
1974
1975    #--------------------------------------------------------------------------
1976    # The public methods which TarFile provides:
1977
1978    def close(self):
1979        """Close the TarFile. In write-mode, two finishing zero blocks are
1980           appended to the archive.
1981        """
1982        if self.closed:
1983            return
1984
1985        self.closed = True
1986        try:
1987            if self.mode in ("a", "w", "x"):
1988                self.fileobj.write(NUL * (BLOCKSIZE * 2))
1989                self.offset += (BLOCKSIZE * 2)
1990                # fill up the end with zero-blocks
1991                # (like option -b20 for tar does)
1992                blocks, remainder = divmod(self.offset, RECORDSIZE)
1993                if remainder > 0:
1994                    self.fileobj.write(NUL * (RECORDSIZE - remainder))
1995        finally:
1996            if not self._extfileobj:
1997                self.fileobj.close()
1998
1999    def getmember(self, name):
2000        """Return a TarInfo object for member `name'. If `name' can not be
2001           found in the archive, KeyError is raised. If a member occurs more
2002           than once in the archive, its last occurrence is assumed to be the
2003           most up-to-date version.
2004        """
2005        tarinfo = self._getmember(name.rstrip('/'))
2006        if tarinfo is None:
2007            raise KeyError("filename %r not found" % name)
2008        return tarinfo
2009
2010    def getmembers(self):
2011        """Return the members of the archive as a list of TarInfo objects. The
2012           list has the same order as the members in the archive.
2013        """
2014        self._check()
2015        if not self._loaded:    # if we want to obtain a list of
2016            self._load()        # all members, we first have to
2017                                # scan the whole archive.
2018        return self.members
2019
2020    def getnames(self):
2021        """Return the members of the archive as a list of their names. It has
2022           the same order as the list returned by getmembers().
2023        """
2024        return [tarinfo.name for tarinfo in self.getmembers()]
2025
2026    def gettarinfo(self, name=None, arcname=None, fileobj=None):
2027        """Create a TarInfo object from the result of os.stat or equivalent
2028           on an existing file. The file is either named by `name', or
2029           specified as a file object `fileobj' with a file descriptor. If
2030           given, `arcname' specifies an alternative name for the file in the
2031           archive, otherwise, the name is taken from the 'name' attribute of
2032           'fileobj', or the 'name' argument. The name should be a text
2033           string.
2034        """
2035        self._check("awx")
2036
2037        # When fileobj is given, replace name by
2038        # fileobj's real name.
2039        if fileobj is not None:
2040            name = fileobj.name
2041
2042        # Building the name of the member in the archive.
2043        # Backward slashes are converted to forward slashes,
2044        # Absolute paths are turned to relative paths.
2045        if arcname is None:
2046            arcname = name
2047        drv, arcname = os.path.splitdrive(arcname)
2048        arcname = arcname.replace(os.sep, "/")
2049        arcname = arcname.lstrip("/")
2050
2051        # Now, fill the TarInfo object with
2052        # information specific for the file.
2053        tarinfo = self.tarinfo()
2054        tarinfo.tarfile = self  # Not needed
2055
2056        # Use os.stat or os.lstat, depending on if symlinks shall be resolved.
2057        if fileobj is None:
2058            if not self.dereference:
2059                statres = os.lstat(name)
2060            else:
2061                statres = os.stat(name)
2062        else:
2063            statres = os.fstat(fileobj.fileno())
2064        linkname = ""
2065
2066        stmd = statres.st_mode
2067        if stat.S_ISREG(stmd):
2068            inode = (statres.st_ino, statres.st_dev)
2069            if not self.dereference and statres.st_nlink > 1 and \
2070                    inode in self.inodes and arcname != self.inodes[inode]:
2071                # Is it a hardlink to an already
2072                # archived file?
2073                type = LNKTYPE
2074                linkname = self.inodes[inode]
2075            else:
2076                # The inode is added only if its valid.
2077                # For win32 it is always 0.
2078                type = REGTYPE
2079                if inode[0]:
2080                    self.inodes[inode] = arcname
2081        elif stat.S_ISDIR(stmd):
2082            type = DIRTYPE
2083        elif stat.S_ISFIFO(stmd):
2084            type = FIFOTYPE
2085        elif stat.S_ISLNK(stmd):
2086            type = SYMTYPE
2087            linkname = os.readlink(name)
2088        elif stat.S_ISCHR(stmd):
2089            type = CHRTYPE
2090        elif stat.S_ISBLK(stmd):
2091            type = BLKTYPE
2092        else:
2093            return None
2094
2095        # Fill the TarInfo object with all
2096        # information we can get.
2097        tarinfo.name = arcname
2098        tarinfo.mode = stmd
2099        tarinfo.uid = statres.st_uid
2100        tarinfo.gid = statres.st_gid
2101        if type == REGTYPE:
2102            tarinfo.size = statres.st_size
2103        else:
2104            tarinfo.size = 0
2105        tarinfo.mtime = statres.st_mtime
2106        tarinfo.type = type
2107        tarinfo.linkname = linkname
2108        if pwd:
2109            try:
2110                tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
2111            except KeyError:
2112                pass
2113        if grp:
2114            try:
2115                tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
2116            except KeyError:
2117                pass
2118
2119        if type in (CHRTYPE, BLKTYPE):
2120            if hasattr(os, "major") and hasattr(os, "minor"):
2121                tarinfo.devmajor = os.major(statres.st_rdev)
2122                tarinfo.devminor = os.minor(statres.st_rdev)
2123        return tarinfo
2124
2125    def list(self, verbose=True, *, members=None):
2126        """Print a table of contents to sys.stdout. If `verbose' is False, only
2127           the names of the members are printed. If it is True, an `ls -l'-like
2128           output is produced. `members' is optional and must be a subset of the
2129           list returned by getmembers().
2130        """
2131        self._check()
2132
2133        if members is None:
2134            members = self
2135        for tarinfo in members:
2136            if verbose:
2137                if tarinfo.mode is None:
2138                    _safe_print("??????????")
2139                else:
2140                    _safe_print(stat.filemode(tarinfo.mode))
2141                _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
2142                                       tarinfo.gname or tarinfo.gid))
2143                if tarinfo.ischr() or tarinfo.isblk():
2144                    _safe_print("%10s" %
2145                            ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
2146                else:
2147                    _safe_print("%10d" % tarinfo.size)
2148                if tarinfo.mtime is None:
2149                    _safe_print("????-??-?? ??:??:??")
2150                else:
2151                    _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
2152                                % time.localtime(tarinfo.mtime)[:6])
2153
2154            _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
2155
2156            if verbose:
2157                if tarinfo.issym():
2158                    _safe_print("-> " + tarinfo.linkname)
2159                if tarinfo.islnk():
2160                    _safe_print("link to " + tarinfo.linkname)
2161            print()
2162
2163    def add(self, name, arcname=None, recursive=True, *, filter=None):
2164        """Add the file `name' to the archive. `name' may be any type of file
2165           (directory, fifo, symbolic link, etc.). If given, `arcname'
2166           specifies an alternative name for the file in the archive.
2167           Directories are added recursively by default. This can be avoided by
2168           setting `recursive' to False. `filter' is a function
2169           that expects a TarInfo object argument and returns the changed
2170           TarInfo object, if it returns None the TarInfo object will be
2171           excluded from the archive.
2172        """
2173        self._check("awx")
2174
2175        if arcname is None:
2176            arcname = name
2177
2178        # Skip if somebody tries to archive the archive...
2179        if self.name is not None and os.path.abspath(name) == self.name:
2180            self._dbg(2, "tarfile: Skipped %r" % name)
2181            return
2182
2183        self._dbg(1, name)
2184
2185        # Create a TarInfo object from the file.
2186        tarinfo = self.gettarinfo(name, arcname)
2187
2188        if tarinfo is None:
2189            self._dbg(1, "tarfile: Unsupported type %r" % name)
2190            return
2191
2192        # Change or exclude the TarInfo object.
2193        if filter is not None:
2194            tarinfo = filter(tarinfo)
2195            if tarinfo is None:
2196                self._dbg(2, "tarfile: Excluded %r" % name)
2197                return
2198
2199        # Append the tar header and data to the archive.
2200        if tarinfo.isreg():
2201            with bltn_open(name, "rb") as f:
2202                self.addfile(tarinfo, f)
2203
2204        elif tarinfo.isdir():
2205            self.addfile(tarinfo)
2206            if recursive:
2207                for f in sorted(os.listdir(name)):
2208                    self.add(os.path.join(name, f), os.path.join(arcname, f),
2209                            recursive, filter=filter)
2210
2211        else:
2212            self.addfile(tarinfo)
2213
2214    def addfile(self, tarinfo, fileobj=None):
2215        """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2216           given, it should be a binary file, and tarinfo.size bytes are read
2217           from it and added to the archive. You can create TarInfo objects
2218           directly, or by using gettarinfo().
2219        """
2220        self._check("awx")
2221
2222        tarinfo = copy.copy(tarinfo)
2223
2224        buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2225        self.fileobj.write(buf)
2226        self.offset += len(buf)
2227        bufsize=self.copybufsize
2228        # If there's data to follow, append it.
2229        if fileobj is not None:
2230            copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
2231            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2232            if remainder > 0:
2233                self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2234                blocks += 1
2235            self.offset += blocks * BLOCKSIZE
2236
2237        self.members.append(tarinfo)
2238
2239    def _get_filter_function(self, filter):
2240        if filter is None:
2241            filter = self.extraction_filter
2242            if filter is None:
2243                return fully_trusted_filter
2244            if isinstance(filter, str):
2245                raise TypeError(
2246                    'String names are not supported for '
2247                    + 'TarFile.extraction_filter. Use a function such as '
2248                    + 'tarfile.data_filter directly.')
2249            return filter
2250        if callable(filter):
2251            return filter
2252        try:
2253            return _NAMED_FILTERS[filter]
2254        except KeyError:
2255            raise ValueError(f"filter {filter!r} not found") from None
2256
2257    def extractall(self, path=".", members=None, *, numeric_owner=False,
2258                   filter=None):
2259        """Extract all members from the archive to the current working
2260           directory and set owner, modification time and permissions on
2261           directories afterwards. `path' specifies a different directory
2262           to extract to. `members' is optional and must be a subset of the
2263           list returned by getmembers(). If `numeric_owner` is True, only
2264           the numbers for user/group names are used and not the names.
2265
2266           The `filter` function will be called on each member just
2267           before extraction.
2268           It can return a changed TarInfo or None to skip the member.
2269           String names of common filters are accepted.
2270        """
2271        directories = []
2272
2273        filter_function = self._get_filter_function(filter)
2274        if members is None:
2275            members = self
2276
2277        for member in members:
2278            tarinfo = self._get_extract_tarinfo(member, filter_function, path)
2279            if tarinfo is None:
2280                continue
2281            if tarinfo.isdir():
2282                # For directories, delay setting attributes until later,
2283                # since permissions can interfere with extraction and
2284                # extracting contents can reset mtime.
2285                directories.append(tarinfo)
2286            self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(),
2287                              numeric_owner=numeric_owner)
2288
2289        # Reverse sort directories.
2290        directories.sort(key=lambda a: a.name, reverse=True)
2291
2292        # Set correct owner, mtime and filemode on directories.
2293        for tarinfo in directories:
2294            dirpath = os.path.join(path, tarinfo.name)
2295            try:
2296                self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
2297                self.utime(tarinfo, dirpath)
2298                self.chmod(tarinfo, dirpath)
2299            except ExtractError as e:
2300                self._handle_nonfatal_error(e)
2301
2302    def extract(self, member, path="", set_attrs=True, *, numeric_owner=False,
2303                filter=None):
2304        """Extract a member from the archive to the current working directory,
2305           using its full name. Its file information is extracted as accurately
2306           as possible. `member' may be a filename or a TarInfo object. You can
2307           specify a different directory using `path'. File attributes (owner,
2308           mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2309           is True, only the numbers for user/group names are used and not
2310           the names.
2311
2312           The `filter` function will be called before extraction.
2313           It can return a changed TarInfo or None to skip the member.
2314           String names of common filters are accepted.
2315        """
2316        filter_function = self._get_filter_function(filter)
2317        tarinfo = self._get_extract_tarinfo(member, filter_function, path)
2318        if tarinfo is not None:
2319            self._extract_one(tarinfo, path, set_attrs, numeric_owner)
2320
2321    def _get_extract_tarinfo(self, member, filter_function, path):
2322        """Get filtered TarInfo (or None) from member, which might be a str"""
2323        if isinstance(member, str):
2324            tarinfo = self.getmember(member)
2325        else:
2326            tarinfo = member
2327
2328        unfiltered = tarinfo
2329        try:
2330            tarinfo = filter_function(tarinfo, path)
2331        except (OSError, FilterError) as e:
2332            self._handle_fatal_error(e)
2333        except ExtractError as e:
2334            self._handle_nonfatal_error(e)
2335        if tarinfo is None:
2336            self._dbg(2, "tarfile: Excluded %r" % unfiltered.name)
2337            return None
2338        # Prepare the link target for makelink().
2339        if tarinfo.islnk():
2340            tarinfo = copy.copy(tarinfo)
2341            tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2342        return tarinfo
2343
2344    def _extract_one(self, tarinfo, path, set_attrs, numeric_owner):
2345        """Extract from filtered tarinfo to disk"""
2346        self._check("r")
2347
2348        try:
2349            self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2350                                 set_attrs=set_attrs,
2351                                 numeric_owner=numeric_owner)
2352        except OSError as e:
2353            self._handle_fatal_error(e)
2354        except ExtractError as e:
2355            self._handle_nonfatal_error(e)
2356
2357    def _handle_nonfatal_error(self, e):
2358        """Handle non-fatal error (ExtractError) according to errorlevel"""
2359        if self.errorlevel > 1:
2360            raise
2361        else:
2362            self._dbg(1, "tarfile: %s" % e)
2363
2364    def _handle_fatal_error(self, e):
2365        """Handle "fatal" error according to self.errorlevel"""
2366        if self.errorlevel > 0:
2367            raise
2368        elif isinstance(e, OSError):
2369            if e.filename is None:
2370                self._dbg(1, "tarfile: %s" % e.strerror)
2371            else:
2372                self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2373        else:
2374            self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e))
2375
2376    def extractfile(self, member):
2377        """Extract a member from the archive as a file object. `member' may be
2378           a filename or a TarInfo object. If `member' is a regular file or
2379           a link, an io.BufferedReader object is returned. For all other
2380           existing members, None is returned. If `member' does not appear
2381           in the archive, KeyError is raised.
2382        """
2383        self._check("r")
2384
2385        if isinstance(member, str):
2386            tarinfo = self.getmember(member)
2387        else:
2388            tarinfo = member
2389
2390        if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2391            # Members with unknown types are treated as regular files.
2392            return self.fileobject(self, tarinfo)
2393
2394        elif tarinfo.islnk() or tarinfo.issym():
2395            if isinstance(self.fileobj, _Stream):
2396                # A small but ugly workaround for the case that someone tries
2397                # to extract a (sym)link as a file-object from a non-seekable
2398                # stream of tar blocks.
2399                raise StreamError("cannot extract (sym)link as file object")
2400            else:
2401                # A (sym)link's file object is its target's file object.
2402                return self.extractfile(self._find_link_target(tarinfo))
2403        else:
2404            # If there's no data associated with the member (directory, chrdev,
2405            # blkdev, etc.), return None instead of a file object.
2406            return None
2407
2408    def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2409                        numeric_owner=False):
2410        """Extract the TarInfo object tarinfo to a physical
2411           file called targetpath.
2412        """
2413        # Fetch the TarInfo object for the given name
2414        # and build the destination pathname, replacing
2415        # forward slashes to platform specific separators.
2416        targetpath = targetpath.rstrip("/")
2417        targetpath = targetpath.replace("/", os.sep)
2418
2419        # Create all upper directories.
2420        upperdirs = os.path.dirname(targetpath)
2421        if upperdirs and not os.path.exists(upperdirs):
2422            # Create directories that are not part of the archive with
2423            # default permissions.
2424            os.makedirs(upperdirs)
2425
2426        if tarinfo.islnk() or tarinfo.issym():
2427            self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2428        else:
2429            self._dbg(1, tarinfo.name)
2430
2431        if tarinfo.isreg():
2432            self.makefile(tarinfo, targetpath)
2433        elif tarinfo.isdir():
2434            self.makedir(tarinfo, targetpath)
2435        elif tarinfo.isfifo():
2436            self.makefifo(tarinfo, targetpath)
2437        elif tarinfo.ischr() or tarinfo.isblk():
2438            self.makedev(tarinfo, targetpath)
2439        elif tarinfo.islnk() or tarinfo.issym():
2440            self.makelink(tarinfo, targetpath)
2441        elif tarinfo.type not in SUPPORTED_TYPES:
2442            self.makeunknown(tarinfo, targetpath)
2443        else:
2444            self.makefile(tarinfo, targetpath)
2445
2446        if set_attrs:
2447            self.chown(tarinfo, targetpath, numeric_owner)
2448            if not tarinfo.issym():
2449                self.chmod(tarinfo, targetpath)
2450                self.utime(tarinfo, targetpath)
2451
2452    #--------------------------------------------------------------------------
2453    # Below are the different file methods. They are called via
2454    # _extract_member() when extract() is called. They can be replaced in a
2455    # subclass to implement other functionality.
2456
2457    def makedir(self, tarinfo, targetpath):
2458        """Make a directory called targetpath.
2459        """
2460        try:
2461            if tarinfo.mode is None:
2462                # Use the system's default mode
2463                os.mkdir(targetpath)
2464            else:
2465                # Use a safe mode for the directory, the real mode is set
2466                # later in _extract_member().
2467                os.mkdir(targetpath, 0o700)
2468        except FileExistsError:
2469            pass
2470
2471    def makefile(self, tarinfo, targetpath):
2472        """Make a file called targetpath.
2473        """
2474        source = self.fileobj
2475        source.seek(tarinfo.offset_data)
2476        bufsize = self.copybufsize
2477        with bltn_open(targetpath, "wb") as target:
2478            if tarinfo.sparse is not None:
2479                for offset, size in tarinfo.sparse:
2480                    target.seek(offset)
2481                    copyfileobj(source, target, size, ReadError, bufsize)
2482                target.seek(tarinfo.size)
2483                target.truncate()
2484            else:
2485                copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
2486
2487    def makeunknown(self, tarinfo, targetpath):
2488        """Make a file from a TarInfo object with an unknown type
2489           at targetpath.
2490        """
2491        self.makefile(tarinfo, targetpath)
2492        self._dbg(1, "tarfile: Unknown file type %r, " \
2493                     "extracted as regular file." % tarinfo.type)
2494
2495    def makefifo(self, tarinfo, targetpath):
2496        """Make a fifo called targetpath.
2497        """
2498        if hasattr(os, "mkfifo"):
2499            os.mkfifo(targetpath)
2500        else:
2501            raise ExtractError("fifo not supported by system")
2502
2503    def makedev(self, tarinfo, targetpath):
2504        """Make a character or block device called targetpath.
2505        """
2506        if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2507            raise ExtractError("special devices not supported by system")
2508
2509        mode = tarinfo.mode
2510        if mode is None:
2511            # Use mknod's default
2512            mode = 0o600
2513        if tarinfo.isblk():
2514            mode |= stat.S_IFBLK
2515        else:
2516            mode |= stat.S_IFCHR
2517
2518        os.mknod(targetpath, mode,
2519                 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2520
2521    def makelink(self, tarinfo, targetpath):
2522        """Make a (symbolic) link called targetpath. If it cannot be created
2523          (platform limitation), we try to make a copy of the referenced file
2524          instead of a link.
2525        """
2526        try:
2527            # For systems that support symbolic and hard links.
2528            if tarinfo.issym():
2529                if os.path.lexists(targetpath):
2530                    # Avoid FileExistsError on following os.symlink.
2531                    os.unlink(targetpath)
2532                os.symlink(tarinfo.linkname, targetpath)
2533            else:
2534                if os.path.exists(tarinfo._link_target):
2535                    os.link(tarinfo._link_target, targetpath)
2536                else:
2537                    self._extract_member(self._find_link_target(tarinfo),
2538                                         targetpath)
2539        except symlink_exception:
2540            try:
2541                self._extract_member(self._find_link_target(tarinfo),
2542                                     targetpath)
2543            except KeyError:
2544                raise ExtractError("unable to resolve link inside archive") from None
2545
2546    def chown(self, tarinfo, targetpath, numeric_owner):
2547        """Set owner of targetpath according to tarinfo. If numeric_owner
2548           is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2549           is False, fall back to .gid/.uid when the search based on name
2550           fails.
2551        """
2552        if hasattr(os, "geteuid") and os.geteuid() == 0:
2553            # We have to be root to do so.
2554            g = tarinfo.gid
2555            u = tarinfo.uid
2556            if not numeric_owner:
2557                try:
2558                    if grp and tarinfo.gname:
2559                        g = grp.getgrnam(tarinfo.gname)[2]
2560                except KeyError:
2561                    pass
2562                try:
2563                    if pwd and tarinfo.uname:
2564                        u = pwd.getpwnam(tarinfo.uname)[2]
2565                except KeyError:
2566                    pass
2567            if g is None:
2568                g = -1
2569            if u is None:
2570                u = -1
2571            try:
2572                if tarinfo.issym() and hasattr(os, "lchown"):
2573                    os.lchown(targetpath, u, g)
2574                else:
2575                    os.chown(targetpath, u, g)
2576            except OSError as e:
2577                raise ExtractError("could not change owner") from e
2578
2579    def chmod(self, tarinfo, targetpath):
2580        """Set file permissions of targetpath according to tarinfo.
2581        """
2582        if tarinfo.mode is None:
2583            return
2584        try:
2585            os.chmod(targetpath, tarinfo.mode)
2586        except OSError as e:
2587            raise ExtractError("could not change mode") from e
2588
2589    def utime(self, tarinfo, targetpath):
2590        """Set modification time of targetpath according to tarinfo.
2591        """
2592        mtime = tarinfo.mtime
2593        if mtime is None:
2594            return
2595        if not hasattr(os, 'utime'):
2596            return
2597        try:
2598            os.utime(targetpath, (mtime, mtime))
2599        except OSError as e:
2600            raise ExtractError("could not change modification time") from e
2601
2602    #--------------------------------------------------------------------------
2603    def next(self):
2604        """Return the next member of the archive as a TarInfo object, when
2605           TarFile is opened for reading. Return None if there is no more
2606           available.
2607        """
2608        self._check("ra")
2609        if self.firstmember is not None:
2610            m = self.firstmember
2611            self.firstmember = None
2612            return m
2613
2614        # Advance the file pointer.
2615        if self.offset != self.fileobj.tell():
2616            if self.offset == 0:
2617                return None
2618            self.fileobj.seek(self.offset - 1)
2619            if not self.fileobj.read(1):
2620                raise ReadError("unexpected end of data")
2621
2622        # Read the next block.
2623        tarinfo = None
2624        while True:
2625            try:
2626                tarinfo = self.tarinfo.fromtarfile(self)
2627            except EOFHeaderError as e:
2628                if self.ignore_zeros:
2629                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2630                    self.offset += BLOCKSIZE
2631                    continue
2632            except InvalidHeaderError as e:
2633                if self.ignore_zeros:
2634                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2635                    self.offset += BLOCKSIZE
2636                    continue
2637                elif self.offset == 0:
2638                    raise ReadError(str(e)) from None
2639            except EmptyHeaderError:
2640                if self.offset == 0:
2641                    raise ReadError("empty file") from None
2642            except TruncatedHeaderError as e:
2643                if self.offset == 0:
2644                    raise ReadError(str(e)) from None
2645            except SubsequentHeaderError as e:
2646                raise ReadError(str(e)) from None
2647            except Exception as e:
2648                try:
2649                    import zlib
2650                    if isinstance(e, zlib.error):
2651                        raise ReadError(f'zlib error: {e}') from None
2652                    else:
2653                        raise e
2654                except ImportError:
2655                    raise e
2656            break
2657
2658        if tarinfo is not None:
2659            self.members.append(tarinfo)
2660        else:
2661            self._loaded = True
2662
2663        return tarinfo
2664
2665    #--------------------------------------------------------------------------
2666    # Little helper methods:
2667
2668    def _getmember(self, name, tarinfo=None, normalize=False):
2669        """Find an archive member by name from bottom to top.
2670           If tarinfo is given, it is used as the starting point.
2671        """
2672        # Ensure that all members have been loaded.
2673        members = self.getmembers()
2674
2675        # Limit the member search list up to tarinfo.
2676        skipping = False
2677        if tarinfo is not None:
2678            try:
2679                index = members.index(tarinfo)
2680            except ValueError:
2681                # The given starting point might be a (modified) copy.
2682                # We'll later skip members until we find an equivalent.
2683                skipping = True
2684            else:
2685                # Happy fast path
2686                members = members[:index]
2687
2688        if normalize:
2689            name = os.path.normpath(name)
2690
2691        for member in reversed(members):
2692            if skipping:
2693                if tarinfo.offset == member.offset:
2694                    skipping = False
2695                continue
2696            if normalize:
2697                member_name = os.path.normpath(member.name)
2698            else:
2699                member_name = member.name
2700
2701            if name == member_name:
2702                return member
2703
2704        if skipping:
2705            # Starting point was not found
2706            raise ValueError(tarinfo)
2707
2708    def _load(self):
2709        """Read through the entire archive file and look for readable
2710           members.
2711        """
2712        while True:
2713            tarinfo = self.next()
2714            if tarinfo is None:
2715                break
2716        self._loaded = True
2717
2718    def _check(self, mode=None):
2719        """Check if TarFile is still open, and if the operation's mode
2720           corresponds to TarFile's mode.
2721        """
2722        if self.closed:
2723            raise OSError("%s is closed" % self.__class__.__name__)
2724        if mode is not None and self.mode not in mode:
2725            raise OSError("bad operation for mode %r" % self.mode)
2726
2727    def _find_link_target(self, tarinfo):
2728        """Find the target member of a symlink or hardlink member in the
2729           archive.
2730        """
2731        if tarinfo.issym():
2732            # Always search the entire archive.
2733            linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
2734            limit = None
2735        else:
2736            # Search the archive before the link, because a hard link is
2737            # just a reference to an already archived file.
2738            linkname = tarinfo.linkname
2739            limit = tarinfo
2740
2741        member = self._getmember(linkname, tarinfo=limit, normalize=True)
2742        if member is None:
2743            raise KeyError("linkname %r not found" % linkname)
2744        return member
2745
2746    def __iter__(self):
2747        """Provide an iterator object.
2748        """
2749        if self._loaded:
2750            yield from self.members
2751            return
2752
2753        # Yield items using TarFile's next() method.
2754        # When all members have been read, set TarFile as _loaded.
2755        index = 0
2756        # Fix for SF #1100429: Under rare circumstances it can
2757        # happen that getmembers() is called during iteration,
2758        # which will have already exhausted the next() method.
2759        if self.firstmember is not None:
2760            tarinfo = self.next()
2761            index += 1
2762            yield tarinfo
2763
2764        while True:
2765            if index < len(self.members):
2766                tarinfo = self.members[index]
2767            elif not self._loaded:
2768                tarinfo = self.next()
2769                if not tarinfo:
2770                    self._loaded = True
2771                    return
2772            else:
2773                return
2774            index += 1
2775            yield tarinfo
2776
2777    def _dbg(self, level, msg):
2778        """Write debugging output to sys.stderr.
2779        """
2780        if level <= self.debug:
2781            print(msg, file=sys.stderr)
2782
2783    def __enter__(self):
2784        self._check()
2785        return self
2786
2787    def __exit__(self, type, value, traceback):
2788        if type is None:
2789            self.close()
2790        else:
2791            # An exception occurred. We must not call close() because
2792            # it would try to write end-of-archive blocks and padding.
2793            if not self._extfileobj:
2794                self.fileobj.close()
2795            self.closed = True
2796
2797#--------------------
2798# exported functions
2799#--------------------
2800
2801def is_tarfile(name):
2802    """Return True if name points to a tar archive that we
2803       are able to handle, else return False.
2804
2805       'name' should be a string, file, or file-like object.
2806    """
2807    try:
2808        if hasattr(name, "read"):
2809            pos = name.tell()
2810            t = open(fileobj=name)
2811            name.seek(pos)
2812        else:
2813            t = open(name)
2814        t.close()
2815        return True
2816    except TarError:
2817        return False
2818
2819open = TarFile.open
2820
2821
2822def main():
2823    import argparse
2824
2825    description = 'A simple command-line interface for tarfile module.'
2826    parser = argparse.ArgumentParser(description=description)
2827    parser.add_argument('-v', '--verbose', action='store_true', default=False,
2828                        help='Verbose output')
2829    parser.add_argument('--filter', metavar='<filtername>',
2830                        choices=_NAMED_FILTERS,
2831                        help='Filter for extraction')
2832
2833    group = parser.add_mutually_exclusive_group(required=True)
2834    group.add_argument('-l', '--list', metavar='<tarfile>',
2835                       help='Show listing of a tarfile')
2836    group.add_argument('-e', '--extract', nargs='+',
2837                       metavar=('<tarfile>', '<output_dir>'),
2838                       help='Extract tarfile into target dir')
2839    group.add_argument('-c', '--create', nargs='+',
2840                       metavar=('<name>', '<file>'),
2841                       help='Create tarfile from sources')
2842    group.add_argument('-t', '--test', metavar='<tarfile>',
2843                       help='Test if a tarfile is valid')
2844
2845    args = parser.parse_args()
2846
2847    if args.filter and args.extract is None:
2848        parser.exit(1, '--filter is only valid for extraction\n')
2849
2850    if args.test is not None:
2851        src = args.test
2852        if is_tarfile(src):
2853            with open(src, 'r') as tar:
2854                tar.getmembers()
2855                print(tar.getmembers(), file=sys.stderr)
2856            if args.verbose:
2857                print('{!r} is a tar archive.'.format(src))
2858        else:
2859            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2860
2861    elif args.list is not None:
2862        src = args.list
2863        if is_tarfile(src):
2864            with TarFile.open(src, 'r:*') as tf:
2865                tf.list(verbose=args.verbose)
2866        else:
2867            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2868
2869    elif args.extract is not None:
2870        if len(args.extract) == 1:
2871            src = args.extract[0]
2872            curdir = os.curdir
2873        elif len(args.extract) == 2:
2874            src, curdir = args.extract
2875        else:
2876            parser.exit(1, parser.format_help())
2877
2878        if is_tarfile(src):
2879            with TarFile.open(src, 'r:*') as tf:
2880                tf.extractall(path=curdir, filter=args.filter)
2881            if args.verbose:
2882                if curdir == '.':
2883                    msg = '{!r} file is extracted.'.format(src)
2884                else:
2885                    msg = ('{!r} file is extracted '
2886                           'into {!r} directory.').format(src, curdir)
2887                print(msg)
2888        else:
2889            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2890
2891    elif args.create is not None:
2892        tar_name = args.create.pop(0)
2893        _, ext = os.path.splitext(tar_name)
2894        compressions = {
2895            # gz
2896            '.gz': 'gz',
2897            '.tgz': 'gz',
2898            # xz
2899            '.xz': 'xz',
2900            '.txz': 'xz',
2901            # bz2
2902            '.bz2': 'bz2',
2903            '.tbz': 'bz2',
2904            '.tbz2': 'bz2',
2905            '.tb2': 'bz2',
2906        }
2907        tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2908        tar_files = args.create
2909
2910        with TarFile.open(tar_name, tar_mode) as tf:
2911            for file_name in tar_files:
2912                tf.add(file_name)
2913
2914        if args.verbose:
2915            print('{!r} file created.'.format(tar_name))
2916
2917if __name__ == '__main__':
2918    main()
2919