• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
5# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
6# All rights reserved.
7#
8# Permission  is  hereby granted,  free  of charge,  to  any person
9# obtaining a  copy of  this software  and associated documentation
10# files  (the  "Software"),  to   deal  in  the  Software   without
11# restriction,  including  without limitation  the  rights to  use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies  of  the  Software,  and to  permit  persons  to  whom the
14# Software  is  furnished  to  do  so,  subject  to  the  following
15# conditions:
16#
17# The above copyright  notice and this  permission notice shall  be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
21# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
22# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
23# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
24# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
25# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
32version     = "0.9.0"
33__author__  = "Lars Gust\u00e4bel (lars@gustaebel.de)"
34__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
35
36#---------
37# Imports
38#---------
39from builtins import open as bltn_open
40import sys
41import os
42import io
43import shutil
44import stat
45import time
46import struct
47import copy
48import re
49import warnings
50
51try:
52    import pwd
53except ImportError:
54    pwd = None
55try:
56    import grp
57except ImportError:
58    grp = None
59
60# os.symlink on Windows prior to 6.0 raises NotImplementedError
61symlink_exception = (AttributeError, NotImplementedError)
62try:
63    # OSError (winerror=1314) will be raised if the caller does not hold the
64    # SeCreateSymbolicLinkPrivilege privilege
65    symlink_exception += (OSError,)
66except NameError:
67    pass
68
69# from tarfile import *
70__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
71           "CompressionError", "StreamError", "ExtractError", "HeaderError",
72           "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
73           "DEFAULT_FORMAT", "open"]
74
75
76#---------------------------------------------------------
77# tar constants
78#---------------------------------------------------------
79NUL = b"\0"                     # the null character
80BLOCKSIZE = 512                 # length of processing blocks
81RECORDSIZE = BLOCKSIZE * 20     # length of records
82GNU_MAGIC = b"ustar  \0"        # magic gnu tar string
83POSIX_MAGIC = b"ustar\x0000"    # magic posix tar string
84
85LENGTH_NAME = 100               # maximum length of a filename
86LENGTH_LINK = 100               # maximum length of a linkname
87LENGTH_PREFIX = 155             # maximum length of the prefix field
88
89REGTYPE = b"0"                  # regular file
90AREGTYPE = b"\0"                # regular file
91LNKTYPE = b"1"                  # link (inside tarfile)
92SYMTYPE = b"2"                  # symbolic link
93CHRTYPE = b"3"                  # character special device
94BLKTYPE = b"4"                  # block special device
95DIRTYPE = b"5"                  # directory
96FIFOTYPE = b"6"                 # fifo special device
97CONTTYPE = b"7"                 # contiguous file
98
99GNUTYPE_LONGNAME = b"L"         # GNU tar longname
100GNUTYPE_LONGLINK = b"K"         # GNU tar longlink
101GNUTYPE_SPARSE = b"S"           # GNU tar sparse file
102
103XHDTYPE = b"x"                  # POSIX.1-2001 extended header
104XGLTYPE = b"g"                  # POSIX.1-2001 global header
105SOLARIS_XHDTYPE = b"X"          # Solaris extended header
106
107USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
108GNU_FORMAT = 1                  # GNU tar format
109PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
110DEFAULT_FORMAT = PAX_FORMAT
111
112#---------------------------------------------------------
113# tarfile constants
114#---------------------------------------------------------
115# File types that tarfile supports:
116SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
117                   SYMTYPE, DIRTYPE, FIFOTYPE,
118                   CONTTYPE, CHRTYPE, BLKTYPE,
119                   GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
120                   GNUTYPE_SPARSE)
121
122# File types that will be treated as a regular file.
123REGULAR_TYPES = (REGTYPE, AREGTYPE,
124                 CONTTYPE, GNUTYPE_SPARSE)
125
126# File types that are part of the GNU tar format.
127GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
128             GNUTYPE_SPARSE)
129
130# Fields from a pax header that override a TarInfo attribute.
131PAX_FIELDS = ("path", "linkpath", "size", "mtime",
132              "uid", "gid", "uname", "gname")
133
134# Fields from a pax header that are affected by hdrcharset.
135PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
136
137# Fields in a pax header that are numbers, all other fields
138# are treated as strings.
139PAX_NUMBER_FIELDS = {
140    "atime": float,
141    "ctime": float,
142    "mtime": float,
143    "uid": int,
144    "gid": int,
145    "size": int
146}
147
148#---------------------------------------------------------
149# initialization
150#---------------------------------------------------------
151if os.name == "nt":
152    ENCODING = "utf-8"
153else:
154    ENCODING = sys.getfilesystemencoding()
155
156#---------------------------------------------------------
157# Some useful functions
158#---------------------------------------------------------
159
160def stn(s, length, encoding, errors):
161    """Convert a string to a null-terminated bytes object.
162    """
163    if s is None:
164        raise ValueError("metadata cannot contain None")
165    s = s.encode(encoding, errors)
166    return s[:length] + (length - len(s)) * NUL
167
168def nts(s, encoding, errors):
169    """Convert a null-terminated bytes object to a string.
170    """
171    p = s.find(b"\0")
172    if p != -1:
173        s = s[:p]
174    return s.decode(encoding, errors)
175
176def nti(s):
177    """Convert a number field to a python number.
178    """
179    # There are two possible encodings for a number field, see
180    # itn() below.
181    if s[0] in (0o200, 0o377):
182        n = 0
183        for i in range(len(s) - 1):
184            n <<= 8
185            n += s[i + 1]
186        if s[0] == 0o377:
187            n = -(256 ** (len(s) - 1) - n)
188    else:
189        try:
190            s = nts(s, "ascii", "strict")
191            n = int(s.strip() or "0", 8)
192        except ValueError:
193            raise InvalidHeaderError("invalid header")
194    return n
195
196def itn(n, digits=8, format=DEFAULT_FORMAT):
197    """Convert a python number to a number field.
198    """
199    # POSIX 1003.1-1988 requires numbers to be encoded as a string of
200    # octal digits followed by a null-byte, this allows values up to
201    # (8**(digits-1))-1. GNU tar allows storing numbers greater than
202    # that if necessary. A leading 0o200 or 0o377 byte indicate this
203    # particular encoding, the following digits-1 bytes are a big-endian
204    # base-256 representation. This allows values up to (256**(digits-1))-1.
205    # A 0o200 byte indicates a positive number, a 0o377 byte a negative
206    # number.
207    original_n = n
208    n = int(n)
209    if 0 <= n < 8 ** (digits - 1):
210        s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
211    elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
212        if n >= 0:
213            s = bytearray([0o200])
214        else:
215            s = bytearray([0o377])
216            n = 256 ** digits + n
217
218        for i in range(digits - 1):
219            s.insert(1, n & 0o377)
220            n >>= 8
221    else:
222        raise ValueError("overflow in number field")
223
224    return s
225
226def calc_chksums(buf):
227    """Calculate the checksum for a member's header by summing up all
228       characters except for the chksum field which is treated as if
229       it was filled with spaces. According to the GNU tar sources,
230       some tars (Sun and NeXT) calculate chksum with signed char,
231       which will be different if there are chars in the buffer with
232       the high bit set. So we calculate two checksums, unsigned and
233       signed.
234    """
235    unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
236    signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
237    return unsigned_chksum, signed_chksum
238
239def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
240    """Copy length bytes from fileobj src to fileobj dst.
241       If length is None, copy the entire content.
242    """
243    bufsize = bufsize or 16 * 1024
244    if length == 0:
245        return
246    if length is None:
247        shutil.copyfileobj(src, dst, bufsize)
248        return
249
250    blocks, remainder = divmod(length, bufsize)
251    for b in range(blocks):
252        buf = src.read(bufsize)
253        if len(buf) < bufsize:
254            raise exception("unexpected end of data")
255        dst.write(buf)
256
257    if remainder != 0:
258        buf = src.read(remainder)
259        if len(buf) < remainder:
260            raise exception("unexpected end of data")
261        dst.write(buf)
262    return
263
264def _safe_print(s):
265    encoding = getattr(sys.stdout, 'encoding', None)
266    if encoding is not None:
267        s = s.encode(encoding, 'backslashreplace').decode(encoding)
268    print(s, end=' ')
269
270
271class TarError(Exception):
272    """Base exception."""
273    pass
274class ExtractError(TarError):
275    """General exception for extract errors."""
276    pass
277class ReadError(TarError):
278    """Exception for unreadable tar archives."""
279    pass
280class CompressionError(TarError):
281    """Exception for unavailable compression methods."""
282    pass
283class StreamError(TarError):
284    """Exception for unsupported operations on stream-like TarFiles."""
285    pass
286class HeaderError(TarError):
287    """Base exception for header errors."""
288    pass
289class EmptyHeaderError(HeaderError):
290    """Exception for empty headers."""
291    pass
292class TruncatedHeaderError(HeaderError):
293    """Exception for truncated headers."""
294    pass
295class EOFHeaderError(HeaderError):
296    """Exception for end of file headers."""
297    pass
298class InvalidHeaderError(HeaderError):
299    """Exception for invalid headers."""
300    pass
301class SubsequentHeaderError(HeaderError):
302    """Exception for missing and invalid extended headers."""
303    pass
304
305#---------------------------
306# internal stream interface
307#---------------------------
308class _LowLevelFile:
309    """Low-level file object. Supports reading and writing.
310       It is used instead of a regular file object for streaming
311       access.
312    """
313
314    def __init__(self, name, mode):
315        mode = {
316            "r": os.O_RDONLY,
317            "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
318        }[mode]
319        if hasattr(os, "O_BINARY"):
320            mode |= os.O_BINARY
321        self.fd = os.open(name, mode, 0o666)
322
323    def close(self):
324        os.close(self.fd)
325
326    def read(self, size):
327        return os.read(self.fd, size)
328
329    def write(self, s):
330        os.write(self.fd, s)
331
332class _Stream:
333    """Class that serves as an adapter between TarFile and
334       a stream-like object.  The stream-like object only
335       needs to have a read() or write() method and is accessed
336       blockwise.  Use of gzip or bzip2 compression is possible.
337       A stream-like object could be for example: sys.stdin,
338       sys.stdout, a socket, a tape device etc.
339
340       _Stream is intended to be used only internally.
341    """
342
343    def __init__(self, name, mode, comptype, fileobj, bufsize):
344        """Construct a _Stream object.
345        """
346        self._extfileobj = True
347        if fileobj is None:
348            fileobj = _LowLevelFile(name, mode)
349            self._extfileobj = False
350
351        if comptype == '*':
352            # Enable transparent compression detection for the
353            # stream interface
354            fileobj = _StreamProxy(fileobj)
355            comptype = fileobj.getcomptype()
356
357        self.name     = name or ""
358        self.mode     = mode
359        self.comptype = comptype
360        self.fileobj  = fileobj
361        self.bufsize  = bufsize
362        self.buf      = b""
363        self.pos      = 0
364        self.closed   = False
365
366        try:
367            if comptype == "gz":
368                try:
369                    import zlib
370                except ImportError:
371                    raise CompressionError("zlib module is not available") from None
372                self.zlib = zlib
373                self.crc = zlib.crc32(b"")
374                if mode == "r":
375                    self._init_read_gz()
376                    self.exception = zlib.error
377                else:
378                    self._init_write_gz()
379
380            elif comptype == "bz2":
381                try:
382                    import bz2
383                except ImportError:
384                    raise CompressionError("bz2 module is not available") from None
385                if mode == "r":
386                    self.dbuf = b""
387                    self.cmp = bz2.BZ2Decompressor()
388                    self.exception = OSError
389                else:
390                    self.cmp = bz2.BZ2Compressor()
391
392            elif comptype == "xz":
393                try:
394                    import lzma
395                except ImportError:
396                    raise CompressionError("lzma module is not available") from None
397                if mode == "r":
398                    self.dbuf = b""
399                    self.cmp = lzma.LZMADecompressor()
400                    self.exception = lzma.LZMAError
401                else:
402                    self.cmp = lzma.LZMACompressor()
403
404            elif comptype != "tar":
405                raise CompressionError("unknown compression type %r" % comptype)
406
407        except:
408            if not self._extfileobj:
409                self.fileobj.close()
410            self.closed = True
411            raise
412
413    def __del__(self):
414        if hasattr(self, "closed") and not self.closed:
415            self.close()
416
417    def _init_write_gz(self):
418        """Initialize for writing with gzip compression.
419        """
420        self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
421                                            -self.zlib.MAX_WBITS,
422                                            self.zlib.DEF_MEM_LEVEL,
423                                            0)
424        timestamp = struct.pack("<L", int(time.time()))
425        self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
426        if self.name.endswith(".gz"):
427            self.name = self.name[:-3]
428        # Honor "directory components removed" from RFC1952
429        self.name = os.path.basename(self.name)
430        # RFC1952 says we must use ISO-8859-1 for the FNAME field.
431        self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
432
433    def write(self, s):
434        """Write string s to the stream.
435        """
436        if self.comptype == "gz":
437            self.crc = self.zlib.crc32(s, self.crc)
438        self.pos += len(s)
439        if self.comptype != "tar":
440            s = self.cmp.compress(s)
441        self.__write(s)
442
443    def __write(self, s):
444        """Write string s to the stream if a whole new block
445           is ready to be written.
446        """
447        self.buf += s
448        while len(self.buf) > self.bufsize:
449            self.fileobj.write(self.buf[:self.bufsize])
450            self.buf = self.buf[self.bufsize:]
451
452    def close(self):
453        """Close the _Stream object. No operation should be
454           done on it afterwards.
455        """
456        if self.closed:
457            return
458
459        self.closed = True
460        try:
461            if self.mode == "w" and self.comptype != "tar":
462                self.buf += self.cmp.flush()
463
464            if self.mode == "w" and self.buf:
465                self.fileobj.write(self.buf)
466                self.buf = b""
467                if self.comptype == "gz":
468                    self.fileobj.write(struct.pack("<L", self.crc))
469                    self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
470        finally:
471            if not self._extfileobj:
472                self.fileobj.close()
473
474    def _init_read_gz(self):
475        """Initialize for reading a gzip compressed fileobj.
476        """
477        self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
478        self.dbuf = b""
479
480        # taken from gzip.GzipFile with some alterations
481        if self.__read(2) != b"\037\213":
482            raise ReadError("not a gzip file")
483        if self.__read(1) != b"\010":
484            raise CompressionError("unsupported compression method")
485
486        flag = ord(self.__read(1))
487        self.__read(6)
488
489        if flag & 4:
490            xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
491            self.read(xlen)
492        if flag & 8:
493            while True:
494                s = self.__read(1)
495                if not s or s == NUL:
496                    break
497        if flag & 16:
498            while True:
499                s = self.__read(1)
500                if not s or s == NUL:
501                    break
502        if flag & 2:
503            self.__read(2)
504
505    def tell(self):
506        """Return the stream's file pointer position.
507        """
508        return self.pos
509
510    def seek(self, pos=0):
511        """Set the stream's file pointer to pos. Negative seeking
512           is forbidden.
513        """
514        if pos - self.pos >= 0:
515            blocks, remainder = divmod(pos - self.pos, self.bufsize)
516            for i in range(blocks):
517                self.read(self.bufsize)
518            self.read(remainder)
519        else:
520            raise StreamError("seeking backwards is not allowed")
521        return self.pos
522
523    def read(self, size):
524        """Return the next size number of bytes from the stream."""
525        assert size is not None
526        buf = self._read(size)
527        self.pos += len(buf)
528        return buf
529
530    def _read(self, size):
531        """Return size bytes from the stream.
532        """
533        if self.comptype == "tar":
534            return self.__read(size)
535
536        c = len(self.dbuf)
537        t = [self.dbuf]
538        while c < size:
539            # Skip underlying buffer to avoid unaligned double buffering.
540            if self.buf:
541                buf = self.buf
542                self.buf = b""
543            else:
544                buf = self.fileobj.read(self.bufsize)
545                if not buf:
546                    break
547            try:
548                buf = self.cmp.decompress(buf)
549            except self.exception as e:
550                raise ReadError("invalid compressed data") from e
551            t.append(buf)
552            c += len(buf)
553        t = b"".join(t)
554        self.dbuf = t[size:]
555        return t[:size]
556
557    def __read(self, size):
558        """Return size bytes from stream. If internal buffer is empty,
559           read another block from the stream.
560        """
561        c = len(self.buf)
562        t = [self.buf]
563        while c < size:
564            buf = self.fileobj.read(self.bufsize)
565            if not buf:
566                break
567            t.append(buf)
568            c += len(buf)
569        t = b"".join(t)
570        self.buf = t[size:]
571        return t[:size]
572# class _Stream
573
574class _StreamProxy(object):
575    """Small proxy class that enables transparent compression
576       detection for the Stream interface (mode 'r|*').
577    """
578
579    def __init__(self, fileobj):
580        self.fileobj = fileobj
581        self.buf = self.fileobj.read(BLOCKSIZE)
582
583    def read(self, size):
584        self.read = self.fileobj.read
585        return self.buf
586
587    def getcomptype(self):
588        if self.buf.startswith(b"\x1f\x8b\x08"):
589            return "gz"
590        elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
591            return "bz2"
592        elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
593            return "xz"
594        else:
595            return "tar"
596
597    def close(self):
598        self.fileobj.close()
599# class StreamProxy
600
601#------------------------
602# Extraction file object
603#------------------------
604class _FileInFile(object):
605    """A thin wrapper around an existing file object that
606       provides a part of its data as an individual file
607       object.
608    """
609
610    def __init__(self, fileobj, offset, size, blockinfo=None):
611        self.fileobj = fileobj
612        self.offset = offset
613        self.size = size
614        self.position = 0
615        self.name = getattr(fileobj, "name", None)
616        self.closed = False
617
618        if blockinfo is None:
619            blockinfo = [(0, size)]
620
621        # Construct a map with data and zero blocks.
622        self.map_index = 0
623        self.map = []
624        lastpos = 0
625        realpos = self.offset
626        for offset, size in blockinfo:
627            if offset > lastpos:
628                self.map.append((False, lastpos, offset, None))
629            self.map.append((True, offset, offset + size, realpos))
630            realpos += size
631            lastpos = offset + size
632        if lastpos < self.size:
633            self.map.append((False, lastpos, self.size, None))
634
635    def flush(self):
636        pass
637
638    def readable(self):
639        return True
640
641    def writable(self):
642        return False
643
644    def seekable(self):
645        return self.fileobj.seekable()
646
647    def tell(self):
648        """Return the current file position.
649        """
650        return self.position
651
652    def seek(self, position, whence=io.SEEK_SET):
653        """Seek to a position in the file.
654        """
655        if whence == io.SEEK_SET:
656            self.position = min(max(position, 0), self.size)
657        elif whence == io.SEEK_CUR:
658            if position < 0:
659                self.position = max(self.position + position, 0)
660            else:
661                self.position = min(self.position + position, self.size)
662        elif whence == io.SEEK_END:
663            self.position = max(min(self.size + position, self.size), 0)
664        else:
665            raise ValueError("Invalid argument")
666        return self.position
667
668    def read(self, size=None):
669        """Read data from the file.
670        """
671        if size is None:
672            size = self.size - self.position
673        else:
674            size = min(size, self.size - self.position)
675
676        buf = b""
677        while size > 0:
678            while True:
679                data, start, stop, offset = self.map[self.map_index]
680                if start <= self.position < stop:
681                    break
682                else:
683                    self.map_index += 1
684                    if self.map_index == len(self.map):
685                        self.map_index = 0
686            length = min(size, stop - self.position)
687            if data:
688                self.fileobj.seek(offset + (self.position - start))
689                b = self.fileobj.read(length)
690                if len(b) != length:
691                    raise ReadError("unexpected end of data")
692                buf += b
693            else:
694                buf += NUL * length
695            size -= length
696            self.position += length
697        return buf
698
699    def readinto(self, b):
700        buf = self.read(len(b))
701        b[:len(buf)] = buf
702        return len(buf)
703
704    def close(self):
705        self.closed = True
706#class _FileInFile
707
708class ExFileObject(io.BufferedReader):
709
710    def __init__(self, tarfile, tarinfo):
711        fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
712                tarinfo.size, tarinfo.sparse)
713        super().__init__(fileobj)
714#class ExFileObject
715
716
717#-----------------------------
718# extraction filters (PEP 706)
719#-----------------------------
720
721class FilterError(TarError):
722    pass
723
724class AbsolutePathError(FilterError):
725    def __init__(self, tarinfo):
726        self.tarinfo = tarinfo
727        super().__init__(f'member {tarinfo.name!r} has an absolute path')
728
729class OutsideDestinationError(FilterError):
730    def __init__(self, tarinfo, path):
731        self.tarinfo = tarinfo
732        self._path = path
733        super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, '
734                         + 'which is outside the destination')
735
736class SpecialFileError(FilterError):
737    def __init__(self, tarinfo):
738        self.tarinfo = tarinfo
739        super().__init__(f'{tarinfo.name!r} is a special file')
740
741class AbsoluteLinkError(FilterError):
742    def __init__(self, tarinfo):
743        self.tarinfo = tarinfo
744        super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path')
745
746class LinkOutsideDestinationError(FilterError):
747    def __init__(self, tarinfo, path):
748        self.tarinfo = tarinfo
749        self._path = path
750        super().__init__(f'{tarinfo.name!r} would link to {path!r}, '
751                         + 'which is outside the destination')
752
753class LinkFallbackError(FilterError):
754    def __init__(self, tarinfo, path):
755        self.tarinfo = tarinfo
756        self._path = path
757        super().__init__(f'link {tarinfo.name!r} would be extracted as a '
758                         + f'copy of {path!r}, which was rejected')
759
760# Errors caused by filters -- both "fatal" and "non-fatal" -- that
761# we consider to be issues with the argument, rather than a bug in the
762# filter function
763_FILTER_ERRORS = (FilterError, OSError, ExtractError)
764
765def _get_filtered_attrs(member, dest_path, for_data=True):
766    new_attrs = {}
767    name = member.name
768    dest_path = os.path.realpath(dest_path, strict=os.path.ALLOW_MISSING)
769    # Strip leading / (tar's directory separator) from filenames.
770    # Include os.sep (target OS directory separator) as well.
771    if name.startswith(('/', os.sep)):
772        name = new_attrs['name'] = member.path.lstrip('/' + os.sep)
773    if os.path.isabs(name):
774        # Path is absolute even after stripping.
775        # For example, 'C:/foo' on Windows.
776        raise AbsolutePathError(member)
777    # Ensure we stay in the destination
778    target_path = os.path.realpath(os.path.join(dest_path, name),
779                                   strict=os.path.ALLOW_MISSING)
780    if os.path.commonpath([target_path, dest_path]) != dest_path:
781        raise OutsideDestinationError(member, target_path)
782    # Limit permissions (no high bits, and go-w)
783    mode = member.mode
784    if mode is not None:
785        # Strip high bits & group/other write bits
786        mode = mode & 0o755
787        if for_data:
788            # For data, handle permissions & file types
789            if member.isreg() or member.islnk():
790                if not mode & 0o100:
791                    # Clear executable bits if not executable by user
792                    mode &= ~0o111
793                # Ensure owner can read & write
794                mode |= 0o600
795            elif member.isdir() or member.issym():
796                # Ignore mode for directories & symlinks
797                mode = None
798            else:
799                # Reject special files
800                raise SpecialFileError(member)
801        if mode != member.mode:
802            new_attrs['mode'] = mode
803    if for_data:
804        # Ignore ownership for 'data'
805        if member.uid is not None:
806            new_attrs['uid'] = None
807        if member.gid is not None:
808            new_attrs['gid'] = None
809        if member.uname is not None:
810            new_attrs['uname'] = None
811        if member.gname is not None:
812            new_attrs['gname'] = None
813        # Check link destination for 'data'
814        if member.islnk() or member.issym():
815            if os.path.isabs(member.linkname):
816                raise AbsoluteLinkError(member)
817            normalized = os.path.normpath(member.linkname)
818            if normalized != member.linkname:
819                new_attrs['linkname'] = normalized
820            if member.issym():
821                target_path = os.path.join(dest_path,
822                                           os.path.dirname(name),
823                                           member.linkname)
824            else:
825                target_path = os.path.join(dest_path,
826                                           member.linkname)
827            target_path = os.path.realpath(target_path,
828                                           strict=os.path.ALLOW_MISSING)
829            if os.path.commonpath([target_path, dest_path]) != dest_path:
830                raise LinkOutsideDestinationError(member, target_path)
831    return new_attrs
832
833def fully_trusted_filter(member, dest_path):
834    return member
835
836def tar_filter(member, dest_path):
837    new_attrs = _get_filtered_attrs(member, dest_path, False)
838    if new_attrs:
839        return member.replace(**new_attrs, deep=False)
840    return member
841
842def data_filter(member, dest_path):
843    new_attrs = _get_filtered_attrs(member, dest_path, True)
844    if new_attrs:
845        return member.replace(**new_attrs, deep=False)
846    return member
847
848_NAMED_FILTERS = {
849    "fully_trusted": fully_trusted_filter,
850    "tar": tar_filter,
851    "data": data_filter,
852}
853
854#------------------
855# Exported Classes
856#------------------
857
858# Sentinel for replace() defaults, meaning "don't change the attribute"
859_KEEP = object()
860
861# Header length is digits followed by a space.
862_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ")
863
864class TarInfo(object):
865    """Informational class which holds the details about an
866       archive member given by a tar header block.
867       TarInfo objects are returned by TarFile.getmember(),
868       TarFile.getmembers() and TarFile.gettarinfo() and are
869       usually created internally.
870    """
871
872    __slots__ = dict(
873        name = 'Name of the archive member.',
874        mode = 'Permission bits.',
875        uid = 'User ID of the user who originally stored this member.',
876        gid = 'Group ID of the user who originally stored this member.',
877        size = 'Size in bytes.',
878        mtime = 'Time of last modification.',
879        chksum = 'Header checksum.',
880        type = ('File type. type is usually one of these constants: '
881                'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
882                'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
883        linkname = ('Name of the target file name, which is only present '
884                    'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
885        uname = 'User name.',
886        gname = 'Group name.',
887        devmajor = 'Device major number.',
888        devminor = 'Device minor number.',
889        offset = 'The tar header starts here.',
890        offset_data = "The file's data starts here.",
891        pax_headers = ('A dictionary containing key-value pairs of an '
892                       'associated pax extended header.'),
893        sparse = 'Sparse member information.',
894        tarfile = None,
895        _sparse_structs = None,
896        _link_target = None,
897        )
898
899    def __init__(self, name=""):
900        """Construct a TarInfo object. name is the optional name
901           of the member.
902        """
903        self.name = name        # member name
904        self.mode = 0o644       # file permissions
905        self.uid = 0            # user id
906        self.gid = 0            # group id
907        self.size = 0           # file size
908        self.mtime = 0          # modification time
909        self.chksum = 0         # header checksum
910        self.type = REGTYPE     # member type
911        self.linkname = ""      # link name
912        self.uname = ""         # user name
913        self.gname = ""         # group name
914        self.devmajor = 0       # device major number
915        self.devminor = 0       # device minor number
916
917        self.offset = 0         # the tar header starts here
918        self.offset_data = 0    # the file's data starts here
919
920        self.sparse = None      # sparse member information
921        self.pax_headers = {}   # pax header information
922
923    @property
924    def path(self):
925        'In pax headers, "name" is called "path".'
926        return self.name
927
928    @path.setter
929    def path(self, name):
930        self.name = name
931
932    @property
933    def linkpath(self):
934        'In pax headers, "linkname" is called "linkpath".'
935        return self.linkname
936
937    @linkpath.setter
938    def linkpath(self, linkname):
939        self.linkname = linkname
940
941    def __repr__(self):
942        return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
943
944    def replace(self, *,
945                name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP,
946                uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP,
947                deep=True, _KEEP=_KEEP):
948        """Return a deep copy of self with the given attributes replaced.
949        """
950        if deep:
951            result = copy.deepcopy(self)
952        else:
953            result = copy.copy(self)
954        if name is not _KEEP:
955            result.name = name
956        if mtime is not _KEEP:
957            result.mtime = mtime
958        if mode is not _KEEP:
959            result.mode = mode
960        if linkname is not _KEEP:
961            result.linkname = linkname
962        if uid is not _KEEP:
963            result.uid = uid
964        if gid is not _KEEP:
965            result.gid = gid
966        if uname is not _KEEP:
967            result.uname = uname
968        if gname is not _KEEP:
969            result.gname = gname
970        return result
971
972    def get_info(self):
973        """Return the TarInfo's attributes as a dictionary.
974        """
975        if self.mode is None:
976            mode = None
977        else:
978            mode = self.mode & 0o7777
979        info = {
980            "name":     self.name,
981            "mode":     mode,
982            "uid":      self.uid,
983            "gid":      self.gid,
984            "size":     self.size,
985            "mtime":    self.mtime,
986            "chksum":   self.chksum,
987            "type":     self.type,
988            "linkname": self.linkname,
989            "uname":    self.uname,
990            "gname":    self.gname,
991            "devmajor": self.devmajor,
992            "devminor": self.devminor
993        }
994
995        if info["type"] == DIRTYPE and not info["name"].endswith("/"):
996            info["name"] += "/"
997
998        return info
999
1000    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
1001        """Return a tar header as a string of 512 byte blocks.
1002        """
1003        info = self.get_info()
1004        for name, value in info.items():
1005            if value is None:
1006                raise ValueError("%s may not be None" % name)
1007
1008        if format == USTAR_FORMAT:
1009            return self.create_ustar_header(info, encoding, errors)
1010        elif format == GNU_FORMAT:
1011            return self.create_gnu_header(info, encoding, errors)
1012        elif format == PAX_FORMAT:
1013            return self.create_pax_header(info, encoding)
1014        else:
1015            raise ValueError("invalid format")
1016
1017    def create_ustar_header(self, info, encoding, errors):
1018        """Return the object as a ustar header block.
1019        """
1020        info["magic"] = POSIX_MAGIC
1021
1022        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
1023            raise ValueError("linkname is too long")
1024
1025        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
1026            info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
1027
1028        return self._create_header(info, USTAR_FORMAT, encoding, errors)
1029
1030    def create_gnu_header(self, info, encoding, errors):
1031        """Return the object as a GNU header block sequence.
1032        """
1033        info["magic"] = GNU_MAGIC
1034
1035        buf = b""
1036        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
1037            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
1038
1039        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
1040            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
1041
1042        return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
1043
1044    def create_pax_header(self, info, encoding):
1045        """Return the object as a ustar header block. If it cannot be
1046           represented this way, prepend a pax extended header sequence
1047           with supplement information.
1048        """
1049        info["magic"] = POSIX_MAGIC
1050        pax_headers = self.pax_headers.copy()
1051
1052        # Test string fields for values that exceed the field length or cannot
1053        # be represented in ASCII encoding.
1054        for name, hname, length in (
1055                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1056                ("uname", "uname", 32), ("gname", "gname", 32)):
1057
1058            if hname in pax_headers:
1059                # The pax header has priority.
1060                continue
1061
1062            # Try to encode the string as ASCII.
1063            try:
1064                info[name].encode("ascii", "strict")
1065            except UnicodeEncodeError:
1066                pax_headers[hname] = info[name]
1067                continue
1068
1069            if len(info[name]) > length:
1070                pax_headers[hname] = info[name]
1071
1072        # Test number fields for values that exceed the field limit or values
1073        # that like to be stored as float.
1074        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1075            needs_pax = False
1076
1077            val = info[name]
1078            val_is_float = isinstance(val, float)
1079            val_int = round(val) if val_is_float else val
1080            if not 0 <= val_int < 8 ** (digits - 1):
1081                # Avoid overflow.
1082                info[name] = 0
1083                needs_pax = True
1084            elif val_is_float:
1085                # Put rounded value in ustar header, and full
1086                # precision value in pax header.
1087                info[name] = val_int
1088                needs_pax = True
1089
1090            # The existing pax header has priority.
1091            if needs_pax and name not in pax_headers:
1092                pax_headers[name] = str(val)
1093
1094        # Create a pax extended header if necessary.
1095        if pax_headers:
1096            buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
1097        else:
1098            buf = b""
1099
1100        return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
1101
1102    @classmethod
1103    def create_pax_global_header(cls, pax_headers):
1104        """Return the object as a pax global header block sequence.
1105        """
1106        return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
1107
1108    def _posix_split_name(self, name, encoding, errors):
1109        """Split a name longer than 100 chars into a prefix
1110           and a name part.
1111        """
1112        components = name.split("/")
1113        for i in range(1, len(components)):
1114            prefix = "/".join(components[:i])
1115            name = "/".join(components[i:])
1116            if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
1117                    len(name.encode(encoding, errors)) <= LENGTH_NAME:
1118                break
1119        else:
1120            raise ValueError("name is too long")
1121
1122        return prefix, name
1123
1124    @staticmethod
1125    def _create_header(info, format, encoding, errors):
1126        """Return a header block. info is a dictionary with file
1127           information, format must be one of the *_FORMAT constants.
1128        """
1129        has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)
1130        if has_device_fields:
1131            devmajor = itn(info.get("devmajor", 0), 8, format)
1132            devminor = itn(info.get("devminor", 0), 8, format)
1133        else:
1134            devmajor = stn("", 8, encoding, errors)
1135            devminor = stn("", 8, encoding, errors)
1136
1137        # None values in metadata should cause ValueError.
1138        # itn()/stn() do this for all fields except type.
1139        filetype = info.get("type", REGTYPE)
1140        if filetype is None:
1141            raise ValueError("TarInfo.type must not be None")
1142
1143        parts = [
1144            stn(info.get("name", ""), 100, encoding, errors),
1145            itn(info.get("mode", 0) & 0o7777, 8, format),
1146            itn(info.get("uid", 0), 8, format),
1147            itn(info.get("gid", 0), 8, format),
1148            itn(info.get("size", 0), 12, format),
1149            itn(info.get("mtime", 0), 12, format),
1150            b"        ", # checksum field
1151            filetype,
1152            stn(info.get("linkname", ""), 100, encoding, errors),
1153            info.get("magic", POSIX_MAGIC),
1154            stn(info.get("uname", ""), 32, encoding, errors),
1155            stn(info.get("gname", ""), 32, encoding, errors),
1156            devmajor,
1157            devminor,
1158            stn(info.get("prefix", ""), 155, encoding, errors)
1159        ]
1160
1161        buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
1162        chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1163        buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
1164        return buf
1165
1166    @staticmethod
1167    def _create_payload(payload):
1168        """Return the string payload filled with zero bytes
1169           up to the next 512 byte border.
1170        """
1171        blocks, remainder = divmod(len(payload), BLOCKSIZE)
1172        if remainder > 0:
1173            payload += (BLOCKSIZE - remainder) * NUL
1174        return payload
1175
1176    @classmethod
1177    def _create_gnu_long_header(cls, name, type, encoding, errors):
1178        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1179           for name.
1180        """
1181        name = name.encode(encoding, errors) + NUL
1182
1183        info = {}
1184        info["name"] = "././@LongLink"
1185        info["type"] = type
1186        info["size"] = len(name)
1187        info["magic"] = GNU_MAGIC
1188
1189        # create extended header + name blocks.
1190        return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
1191                cls._create_payload(name)
1192
1193    @classmethod
1194    def _create_pax_generic_header(cls, pax_headers, type, encoding):
1195        """Return a POSIX.1-2008 extended or global header sequence
1196           that contains a list of keyword, value pairs. The values
1197           must be strings.
1198        """
1199        # Check if one of the fields contains surrogate characters and thereby
1200        # forces hdrcharset=BINARY, see _proc_pax() for more information.
1201        binary = False
1202        for keyword, value in pax_headers.items():
1203            try:
1204                value.encode("utf-8", "strict")
1205            except UnicodeEncodeError:
1206                binary = True
1207                break
1208
1209        records = b""
1210        if binary:
1211            # Put the hdrcharset field at the beginning of the header.
1212            records += b"21 hdrcharset=BINARY\n"
1213
1214        for keyword, value in pax_headers.items():
1215            keyword = keyword.encode("utf-8")
1216            if binary:
1217                # Try to restore the original byte representation of `value'.
1218                # Needless to say, that the encoding must match the string.
1219                value = value.encode(encoding, "surrogateescape")
1220            else:
1221                value = value.encode("utf-8")
1222
1223            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1224            n = p = 0
1225            while True:
1226                n = l + len(str(p))
1227                if n == p:
1228                    break
1229                p = n
1230            records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1231
1232        # We use a hardcoded "././@PaxHeader" name like star does
1233        # instead of the one that POSIX recommends.
1234        info = {}
1235        info["name"] = "././@PaxHeader"
1236        info["type"] = type
1237        info["size"] = len(records)
1238        info["magic"] = POSIX_MAGIC
1239
1240        # Create pax header + record blocks.
1241        return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1242                cls._create_payload(records)
1243
1244    @classmethod
1245    def frombuf(cls, buf, encoding, errors):
1246        """Construct a TarInfo object from a 512 byte bytes object.
1247        """
1248        if len(buf) == 0:
1249            raise EmptyHeaderError("empty header")
1250        if len(buf) != BLOCKSIZE:
1251            raise TruncatedHeaderError("truncated header")
1252        if buf.count(NUL) == BLOCKSIZE:
1253            raise EOFHeaderError("end of file header")
1254
1255        chksum = nti(buf[148:156])
1256        if chksum not in calc_chksums(buf):
1257            raise InvalidHeaderError("bad checksum")
1258
1259        obj = cls()
1260        obj.name = nts(buf[0:100], encoding, errors)
1261        obj.mode = nti(buf[100:108])
1262        obj.uid = nti(buf[108:116])
1263        obj.gid = nti(buf[116:124])
1264        obj.size = nti(buf[124:136])
1265        obj.mtime = nti(buf[136:148])
1266        obj.chksum = chksum
1267        obj.type = buf[156:157]
1268        obj.linkname = nts(buf[157:257], encoding, errors)
1269        obj.uname = nts(buf[265:297], encoding, errors)
1270        obj.gname = nts(buf[297:329], encoding, errors)
1271        obj.devmajor = nti(buf[329:337])
1272        obj.devminor = nti(buf[337:345])
1273        prefix = nts(buf[345:500], encoding, errors)
1274
1275        # Old V7 tar format represents a directory as a regular
1276        # file with a trailing slash.
1277        if obj.type == AREGTYPE and obj.name.endswith("/"):
1278            obj.type = DIRTYPE
1279
1280        # The old GNU sparse format occupies some of the unused
1281        # space in the buffer for up to 4 sparse structures.
1282        # Save them for later processing in _proc_sparse().
1283        if obj.type == GNUTYPE_SPARSE:
1284            pos = 386
1285            structs = []
1286            for i in range(4):
1287                try:
1288                    offset = nti(buf[pos:pos + 12])
1289                    numbytes = nti(buf[pos + 12:pos + 24])
1290                except ValueError:
1291                    break
1292                structs.append((offset, numbytes))
1293                pos += 24
1294            isextended = bool(buf[482])
1295            origsize = nti(buf[483:495])
1296            obj._sparse_structs = (structs, isextended, origsize)
1297
1298        # Remove redundant slashes from directories.
1299        if obj.isdir():
1300            obj.name = obj.name.rstrip("/")
1301
1302        # Reconstruct a ustar longname.
1303        if prefix and obj.type not in GNU_TYPES:
1304            obj.name = prefix + "/" + obj.name
1305        return obj
1306
1307    @classmethod
1308    def fromtarfile(cls, tarfile):
1309        """Return the next TarInfo object from TarFile object
1310           tarfile.
1311        """
1312        buf = tarfile.fileobj.read(BLOCKSIZE)
1313        obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1314        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1315        return obj._proc_member(tarfile)
1316
1317    #--------------------------------------------------------------------------
1318    # The following are methods that are called depending on the type of a
1319    # member. The entry point is _proc_member() which can be overridden in a
1320    # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1321    # implement the following
1322    # operations:
1323    # 1. Set self.offset_data to the position where the data blocks begin,
1324    #    if there is data that follows.
1325    # 2. Set tarfile.offset to the position where the next member's header will
1326    #    begin.
1327    # 3. Return self or another valid TarInfo object.
1328    def _proc_member(self, tarfile):
1329        """Choose the right processing method depending on
1330           the type and call it.
1331        """
1332        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1333            return self._proc_gnulong(tarfile)
1334        elif self.type == GNUTYPE_SPARSE:
1335            return self._proc_sparse(tarfile)
1336        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1337            return self._proc_pax(tarfile)
1338        else:
1339            return self._proc_builtin(tarfile)
1340
1341    def _proc_builtin(self, tarfile):
1342        """Process a builtin type or an unknown type which
1343           will be treated as a regular file.
1344        """
1345        self.offset_data = tarfile.fileobj.tell()
1346        offset = self.offset_data
1347        if self.isreg() or self.type not in SUPPORTED_TYPES:
1348            # Skip the following data blocks.
1349            offset += self._block(self.size)
1350        tarfile.offset = offset
1351
1352        # Patch the TarInfo object with saved global
1353        # header information.
1354        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1355
1356        # Remove redundant slashes from directories. This is to be consistent
1357        # with frombuf().
1358        if self.isdir():
1359            self.name = self.name.rstrip("/")
1360
1361        return self
1362
1363    def _proc_gnulong(self, tarfile):
1364        """Process the blocks that hold a GNU longname
1365           or longlink member.
1366        """
1367        buf = tarfile.fileobj.read(self._block(self.size))
1368
1369        # Fetch the next header and process it.
1370        try:
1371            next = self.fromtarfile(tarfile)
1372        except HeaderError as e:
1373            raise SubsequentHeaderError(str(e)) from None
1374
1375        # Patch the TarInfo object from the next header with
1376        # the longname information.
1377        next.offset = self.offset
1378        if self.type == GNUTYPE_LONGNAME:
1379            next.name = nts(buf, tarfile.encoding, tarfile.errors)
1380        elif self.type == GNUTYPE_LONGLINK:
1381            next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1382
1383        # Remove redundant slashes from directories. This is to be consistent
1384        # with frombuf().
1385        if next.isdir():
1386            next.name = next.name.removesuffix("/")
1387
1388        return next
1389
1390    def _proc_sparse(self, tarfile):
1391        """Process a GNU sparse header plus extra headers.
1392        """
1393        # We already collected some sparse structures in frombuf().
1394        structs, isextended, origsize = self._sparse_structs
1395        del self._sparse_structs
1396
1397        # Collect sparse structures from extended header blocks.
1398        while isextended:
1399            buf = tarfile.fileobj.read(BLOCKSIZE)
1400            pos = 0
1401            for i in range(21):
1402                try:
1403                    offset = nti(buf[pos:pos + 12])
1404                    numbytes = nti(buf[pos + 12:pos + 24])
1405                except ValueError:
1406                    break
1407                if offset and numbytes:
1408                    structs.append((offset, numbytes))
1409                pos += 24
1410            isextended = bool(buf[504])
1411        self.sparse = structs
1412
1413        self.offset_data = tarfile.fileobj.tell()
1414        tarfile.offset = self.offset_data + self._block(self.size)
1415        self.size = origsize
1416        return self
1417
1418    def _proc_pax(self, tarfile):
1419        """Process an extended or global header as described in
1420           POSIX.1-2008.
1421        """
1422        # Read the header information.
1423        buf = tarfile.fileobj.read(self._block(self.size))
1424
1425        # A pax header stores supplemental information for either
1426        # the following file (extended) or all following files
1427        # (global).
1428        if self.type == XGLTYPE:
1429            pax_headers = tarfile.pax_headers
1430        else:
1431            pax_headers = tarfile.pax_headers.copy()
1432
1433        # Parse pax header information. A record looks like that:
1434        # "%d %s=%s\n" % (length, keyword, value). length is the size
1435        # of the complete record including the length field itself and
1436        # the newline.
1437        pos = 0
1438        encoding = None
1439        raw_headers = []
1440        while len(buf) > pos and buf[pos] != 0x00:
1441            if not (match := _header_length_prefix_re.match(buf, pos)):
1442                raise InvalidHeaderError("invalid header")
1443            try:
1444                length = int(match.group(1))
1445            except ValueError:
1446                raise InvalidHeaderError("invalid header")
1447            # Headers must be at least 5 bytes, shortest being '5 x=\n'.
1448            # Value is allowed to be empty.
1449            if length < 5:
1450                raise InvalidHeaderError("invalid header")
1451            if pos + length > len(buf):
1452                raise InvalidHeaderError("invalid header")
1453
1454            header_value_end_offset = match.start(1) + length - 1  # Last byte of the header
1455            keyword_and_value = buf[match.end(1) + 1:header_value_end_offset]
1456            raw_keyword, equals, raw_value = keyword_and_value.partition(b"=")
1457
1458            # Check the framing of the header. The last character must be '\n' (0x0A)
1459            if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A:
1460                raise InvalidHeaderError("invalid header")
1461            raw_headers.append((length, raw_keyword, raw_value))
1462
1463            # Check if the pax header contains a hdrcharset field. This tells us
1464            # the encoding of the path, linkpath, uname and gname fields. Normally,
1465            # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1466            # implementations are allowed to store them as raw binary strings if
1467            # the translation to UTF-8 fails. For the time being, we don't care about
1468            # anything other than "BINARY". The only other value that is currently
1469            # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1470            # Note that we only follow the initial 'hdrcharset' setting to preserve
1471            # the initial behavior of the 'tarfile' module.
1472            if raw_keyword == b"hdrcharset" and encoding is None:
1473                if raw_value == b"BINARY":
1474                    encoding = tarfile.encoding
1475                else:  # This branch ensures only the first 'hdrcharset' header is used.
1476                    encoding = "utf-8"
1477
1478            pos += length
1479
1480        # If no explicit hdrcharset is set, we use UTF-8 as a default.
1481        if encoding is None:
1482            encoding = "utf-8"
1483
1484        # After parsing the raw headers we can decode them to text.
1485        for length, raw_keyword, raw_value in raw_headers:
1486            # Normally, we could just use "utf-8" as the encoding and "strict"
1487            # as the error handler, but we better not take the risk. For
1488            # example, GNU tar <= 1.23 is known to store filenames it cannot
1489            # translate to UTF-8 as raw strings (unfortunately without a
1490            # hdrcharset=BINARY header).
1491            # We first try the strict standard encoding, and if that fails we
1492            # fall back on the user's encoding and error handler.
1493            keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8",
1494                    tarfile.errors)
1495            if keyword in PAX_NAME_FIELDS:
1496                value = self._decode_pax_field(raw_value, encoding, tarfile.encoding,
1497                        tarfile.errors)
1498            else:
1499                value = self._decode_pax_field(raw_value, "utf-8", "utf-8",
1500                        tarfile.errors)
1501
1502            pax_headers[keyword] = value
1503
1504        # Fetch the next header.
1505        try:
1506            next = self.fromtarfile(tarfile)
1507        except HeaderError as e:
1508            raise SubsequentHeaderError(str(e)) from None
1509
1510        # Process GNU sparse information.
1511        if "GNU.sparse.map" in pax_headers:
1512            # GNU extended sparse format version 0.1.
1513            self._proc_gnusparse_01(next, pax_headers)
1514
1515        elif "GNU.sparse.size" in pax_headers:
1516            # GNU extended sparse format version 0.0.
1517            self._proc_gnusparse_00(next, raw_headers)
1518
1519        elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1520            # GNU extended sparse format version 1.0.
1521            self._proc_gnusparse_10(next, pax_headers, tarfile)
1522
1523        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1524            # Patch the TarInfo object with the extended header info.
1525            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1526            next.offset = self.offset
1527
1528            if "size" in pax_headers:
1529                # If the extended header replaces the size field,
1530                # we need to recalculate the offset where the next
1531                # header starts.
1532                offset = next.offset_data
1533                if next.isreg() or next.type not in SUPPORTED_TYPES:
1534                    offset += next._block(next.size)
1535                tarfile.offset = offset
1536
1537        return next
1538
1539    def _proc_gnusparse_00(self, next, raw_headers):
1540        """Process a GNU tar extended sparse header, version 0.0.
1541        """
1542        offsets = []
1543        numbytes = []
1544        for _, keyword, value in raw_headers:
1545            if keyword == b"GNU.sparse.offset":
1546                try:
1547                    offsets.append(int(value.decode()))
1548                except ValueError:
1549                    raise InvalidHeaderError("invalid header")
1550
1551            elif keyword == b"GNU.sparse.numbytes":
1552                try:
1553                    numbytes.append(int(value.decode()))
1554                except ValueError:
1555                    raise InvalidHeaderError("invalid header")
1556
1557        next.sparse = list(zip(offsets, numbytes))
1558
1559    def _proc_gnusparse_01(self, next, pax_headers):
1560        """Process a GNU tar extended sparse header, version 0.1.
1561        """
1562        sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1563        next.sparse = list(zip(sparse[::2], sparse[1::2]))
1564
1565    def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1566        """Process a GNU tar extended sparse header, version 1.0.
1567        """
1568        fields = None
1569        sparse = []
1570        buf = tarfile.fileobj.read(BLOCKSIZE)
1571        fields, buf = buf.split(b"\n", 1)
1572        fields = int(fields)
1573        while len(sparse) < fields * 2:
1574            if b"\n" not in buf:
1575                buf += tarfile.fileobj.read(BLOCKSIZE)
1576            number, buf = buf.split(b"\n", 1)
1577            sparse.append(int(number))
1578        next.offset_data = tarfile.fileobj.tell()
1579        next.sparse = list(zip(sparse[::2], sparse[1::2]))
1580
1581    def _apply_pax_info(self, pax_headers, encoding, errors):
1582        """Replace fields with supplemental information from a previous
1583           pax extended or global header.
1584        """
1585        for keyword, value in pax_headers.items():
1586            if keyword == "GNU.sparse.name":
1587                setattr(self, "path", value)
1588            elif keyword == "GNU.sparse.size":
1589                setattr(self, "size", int(value))
1590            elif keyword == "GNU.sparse.realsize":
1591                setattr(self, "size", int(value))
1592            elif keyword in PAX_FIELDS:
1593                if keyword in PAX_NUMBER_FIELDS:
1594                    try:
1595                        value = PAX_NUMBER_FIELDS[keyword](value)
1596                    except ValueError:
1597                        value = 0
1598                if keyword == "path":
1599                    value = value.rstrip("/")
1600                setattr(self, keyword, value)
1601
1602        self.pax_headers = pax_headers.copy()
1603
1604    def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1605        """Decode a single field from a pax record.
1606        """
1607        try:
1608            return value.decode(encoding, "strict")
1609        except UnicodeDecodeError:
1610            return value.decode(fallback_encoding, fallback_errors)
1611
1612    def _block(self, count):
1613        """Round up a byte count by BLOCKSIZE and return it,
1614           e.g. _block(834) => 1024.
1615        """
1616        # Only non-negative offsets are allowed
1617        if count < 0:
1618            raise InvalidHeaderError("invalid offset")
1619        blocks, remainder = divmod(count, BLOCKSIZE)
1620        if remainder:
1621            blocks += 1
1622        return blocks * BLOCKSIZE
1623
1624    def isreg(self):
1625        'Return True if the Tarinfo object is a regular file.'
1626        return self.type in REGULAR_TYPES
1627
1628    def isfile(self):
1629        'Return True if the Tarinfo object is a regular file.'
1630        return self.isreg()
1631
1632    def isdir(self):
1633        'Return True if it is a directory.'
1634        return self.type == DIRTYPE
1635
1636    def issym(self):
1637        'Return True if it is a symbolic link.'
1638        return self.type == SYMTYPE
1639
1640    def islnk(self):
1641        'Return True if it is a hard link.'
1642        return self.type == LNKTYPE
1643
1644    def ischr(self):
1645        'Return True if it is a character device.'
1646        return self.type == CHRTYPE
1647
1648    def isblk(self):
1649        'Return True if it is a block device.'
1650        return self.type == BLKTYPE
1651
1652    def isfifo(self):
1653        'Return True if it is a FIFO.'
1654        return self.type == FIFOTYPE
1655
1656    def issparse(self):
1657        return self.sparse is not None
1658
1659    def isdev(self):
1660        'Return True if it is one of character device, block device or FIFO.'
1661        return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1662# class TarInfo
1663
1664class TarFile(object):
1665    """The TarFile Class provides an interface to tar archives.
1666    """
1667
1668    debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1669
1670    dereference = False         # If true, add content of linked file to the
1671                                # tar file, else the link.
1672
1673    ignore_zeros = False        # If true, skips empty or invalid blocks and
1674                                # continues processing.
1675
1676    errorlevel = 1              # If 0, fatal errors only appear in debug
1677                                # messages (if debug >= 0). If > 0, errors
1678                                # are passed to the caller as exceptions.
1679
1680    format = DEFAULT_FORMAT     # The format to use when creating an archive.
1681
1682    encoding = ENCODING         # Encoding for 8-bit character strings.
1683
1684    errors = None               # Error handler for unicode conversion.
1685
1686    tarinfo = TarInfo           # The default TarInfo class to use.
1687
1688    fileobject = ExFileObject   # The file-object for extractfile().
1689
1690    extraction_filter = None    # The default filter for extraction.
1691
1692    def __init__(self, name=None, mode="r", fileobj=None, format=None,
1693            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1694            errors="surrogateescape", pax_headers=None, debug=None,
1695            errorlevel=None, copybufsize=None):
1696        """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1697           read from an existing archive, 'a' to append data to an existing
1698           file or 'w' to create a new file overwriting an existing one. `mode'
1699           defaults to 'r'.
1700           If `fileobj' is given, it is used for reading or writing data. If it
1701           can be determined, `mode' is overridden by `fileobj's mode.
1702           `fileobj' is not closed, when TarFile is closed.
1703        """
1704        modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
1705        if mode not in modes:
1706            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1707        self.mode = mode
1708        self._mode = modes[mode]
1709
1710        if not fileobj:
1711            if self.mode == "a" and not os.path.exists(name):
1712                # Create nonexistent files in append mode.
1713                self.mode = "w"
1714                self._mode = "wb"
1715            fileobj = bltn_open(name, self._mode)
1716            self._extfileobj = False
1717        else:
1718            if (name is None and hasattr(fileobj, "name") and
1719                isinstance(fileobj.name, (str, bytes))):
1720                name = fileobj.name
1721            if hasattr(fileobj, "mode"):
1722                self._mode = fileobj.mode
1723            self._extfileobj = True
1724        self.name = os.path.abspath(name) if name else None
1725        self.fileobj = fileobj
1726
1727        # Init attributes.
1728        if format is not None:
1729            self.format = format
1730        if tarinfo is not None:
1731            self.tarinfo = tarinfo
1732        if dereference is not None:
1733            self.dereference = dereference
1734        if ignore_zeros is not None:
1735            self.ignore_zeros = ignore_zeros
1736        if encoding is not None:
1737            self.encoding = encoding
1738        self.errors = errors
1739
1740        if pax_headers is not None and self.format == PAX_FORMAT:
1741            self.pax_headers = pax_headers
1742        else:
1743            self.pax_headers = {}
1744
1745        if debug is not None:
1746            self.debug = debug
1747        if errorlevel is not None:
1748            self.errorlevel = errorlevel
1749
1750        # Init datastructures.
1751        self.copybufsize = copybufsize
1752        self.closed = False
1753        self.members = []       # list of members as TarInfo objects
1754        self._loaded = False    # flag if all members have been read
1755        self.offset = self.fileobj.tell()
1756                                # current position in the archive file
1757        self.inodes = {}        # dictionary caching the inodes of
1758                                # archive members already added
1759
1760        try:
1761            if self.mode == "r":
1762                self.firstmember = None
1763                self.firstmember = self.next()
1764
1765            if self.mode == "a":
1766                # Move to the end of the archive,
1767                # before the first empty block.
1768                while True:
1769                    self.fileobj.seek(self.offset)
1770                    try:
1771                        tarinfo = self.tarinfo.fromtarfile(self)
1772                        self.members.append(tarinfo)
1773                    except EOFHeaderError:
1774                        self.fileobj.seek(self.offset)
1775                        break
1776                    except HeaderError as e:
1777                        raise ReadError(str(e)) from None
1778
1779            if self.mode in ("a", "w", "x"):
1780                self._loaded = True
1781
1782                if self.pax_headers:
1783                    buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1784                    self.fileobj.write(buf)
1785                    self.offset += len(buf)
1786        except:
1787            if not self._extfileobj:
1788                self.fileobj.close()
1789            self.closed = True
1790            raise
1791
1792    #--------------------------------------------------------------------------
1793    # Below are the classmethods which act as alternate constructors to the
1794    # TarFile class. The open() method is the only one that is needed for
1795    # public use; it is the "super"-constructor and is able to select an
1796    # adequate "sub"-constructor for a particular compression using the mapping
1797    # from OPEN_METH.
1798    #
1799    # This concept allows one to subclass TarFile without losing the comfort of
1800    # the super-constructor. A sub-constructor is registered and made available
1801    # by adding it to the mapping in OPEN_METH.
1802
1803    @classmethod
1804    def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1805        """Open a tar archive for reading, writing or appending. Return
1806           an appropriate TarFile class.
1807
1808           mode:
1809           'r' or 'r:*' open for reading with transparent compression
1810           'r:'         open for reading exclusively uncompressed
1811           'r:gz'       open for reading with gzip compression
1812           'r:bz2'      open for reading with bzip2 compression
1813           'r:xz'       open for reading with lzma compression
1814           'a' or 'a:'  open for appending, creating the file if necessary
1815           'w' or 'w:'  open for writing without compression
1816           'w:gz'       open for writing with gzip compression
1817           'w:bz2'      open for writing with bzip2 compression
1818           'w:xz'       open for writing with lzma compression
1819
1820           'x' or 'x:'  create a tarfile exclusively without compression, raise
1821                        an exception if the file is already created
1822           'x:gz'       create a gzip compressed tarfile, raise an exception
1823                        if the file is already created
1824           'x:bz2'      create a bzip2 compressed tarfile, raise an exception
1825                        if the file is already created
1826           'x:xz'       create an lzma compressed tarfile, raise an exception
1827                        if the file is already created
1828
1829           'r|*'        open a stream of tar blocks with transparent compression
1830           'r|'         open an uncompressed stream of tar blocks for reading
1831           'r|gz'       open a gzip compressed stream of tar blocks
1832           'r|bz2'      open a bzip2 compressed stream of tar blocks
1833           'r|xz'       open an lzma compressed stream of tar blocks
1834           'w|'         open an uncompressed stream for writing
1835           'w|gz'       open a gzip compressed stream for writing
1836           'w|bz2'      open a bzip2 compressed stream for writing
1837           'w|xz'       open an lzma compressed stream for writing
1838        """
1839
1840        if not name and not fileobj:
1841            raise ValueError("nothing to open")
1842
1843        if mode in ("r", "r:*"):
1844            # Find out which *open() is appropriate for opening the file.
1845            def not_compressed(comptype):
1846                return cls.OPEN_METH[comptype] == 'taropen'
1847            error_msgs = []
1848            for comptype in sorted(cls.OPEN_METH, key=not_compressed):
1849                func = getattr(cls, cls.OPEN_METH[comptype])
1850                if fileobj is not None:
1851                    saved_pos = fileobj.tell()
1852                try:
1853                    return func(name, "r", fileobj, **kwargs)
1854                except (ReadError, CompressionError) as e:
1855                    error_msgs.append(f'- method {comptype}: {e!r}')
1856                    if fileobj is not None:
1857                        fileobj.seek(saved_pos)
1858                    continue
1859            error_msgs_summary = '\n'.join(error_msgs)
1860            raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}")
1861
1862        elif ":" in mode:
1863            filemode, comptype = mode.split(":", 1)
1864            filemode = filemode or "r"
1865            comptype = comptype or "tar"
1866
1867            # Select the *open() function according to
1868            # given compression.
1869            if comptype in cls.OPEN_METH:
1870                func = getattr(cls, cls.OPEN_METH[comptype])
1871            else:
1872                raise CompressionError("unknown compression type %r" % comptype)
1873            return func(name, filemode, fileobj, **kwargs)
1874
1875        elif "|" in mode:
1876            filemode, comptype = mode.split("|", 1)
1877            filemode = filemode or "r"
1878            comptype = comptype or "tar"
1879
1880            if filemode not in ("r", "w"):
1881                raise ValueError("mode must be 'r' or 'w'")
1882
1883            stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1884            try:
1885                t = cls(name, filemode, stream, **kwargs)
1886            except:
1887                stream.close()
1888                raise
1889            t._extfileobj = False
1890            return t
1891
1892        elif mode in ("a", "w", "x"):
1893            return cls.taropen(name, mode, fileobj, **kwargs)
1894
1895        raise ValueError("undiscernible mode")
1896
1897    @classmethod
1898    def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1899        """Open uncompressed tar archive name for reading or writing.
1900        """
1901        if mode not in ("r", "a", "w", "x"):
1902            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1903        return cls(name, mode, fileobj, **kwargs)
1904
1905    @classmethod
1906    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1907        """Open gzip compressed tar archive name for reading or writing.
1908           Appending is not allowed.
1909        """
1910        if mode not in ("r", "w", "x"):
1911            raise ValueError("mode must be 'r', 'w' or 'x'")
1912
1913        try:
1914            from gzip import GzipFile
1915        except ImportError:
1916            raise CompressionError("gzip module is not available") from None
1917
1918        try:
1919            fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
1920        except OSError as e:
1921            if fileobj is not None and mode == 'r':
1922                raise ReadError("not a gzip file") from e
1923            raise
1924
1925        try:
1926            t = cls.taropen(name, mode, fileobj, **kwargs)
1927        except OSError as e:
1928            fileobj.close()
1929            if mode == 'r':
1930                raise ReadError("not a gzip file") from e
1931            raise
1932        except:
1933            fileobj.close()
1934            raise
1935        t._extfileobj = False
1936        return t
1937
1938    @classmethod
1939    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1940        """Open bzip2 compressed tar archive name for reading or writing.
1941           Appending is not allowed.
1942        """
1943        if mode not in ("r", "w", "x"):
1944            raise ValueError("mode must be 'r', 'w' or 'x'")
1945
1946        try:
1947            from bz2 import BZ2File
1948        except ImportError:
1949            raise CompressionError("bz2 module is not available") from None
1950
1951        fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)
1952
1953        try:
1954            t = cls.taropen(name, mode, fileobj, **kwargs)
1955        except (OSError, EOFError) as e:
1956            fileobj.close()
1957            if mode == 'r':
1958                raise ReadError("not a bzip2 file") from e
1959            raise
1960        except:
1961            fileobj.close()
1962            raise
1963        t._extfileobj = False
1964        return t
1965
1966    @classmethod
1967    def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
1968        """Open lzma compressed tar archive name for reading or writing.
1969           Appending is not allowed.
1970        """
1971        if mode not in ("r", "w", "x"):
1972            raise ValueError("mode must be 'r', 'w' or 'x'")
1973
1974        try:
1975            from lzma import LZMAFile, LZMAError
1976        except ImportError:
1977            raise CompressionError("lzma module is not available") from None
1978
1979        fileobj = LZMAFile(fileobj or name, mode, preset=preset)
1980
1981        try:
1982            t = cls.taropen(name, mode, fileobj, **kwargs)
1983        except (LZMAError, EOFError) as e:
1984            fileobj.close()
1985            if mode == 'r':
1986                raise ReadError("not an lzma file") from e
1987            raise
1988        except:
1989            fileobj.close()
1990            raise
1991        t._extfileobj = False
1992        return t
1993
1994    # All *open() methods are registered here.
1995    OPEN_METH = {
1996        "tar": "taropen",   # uncompressed tar
1997        "gz":  "gzopen",    # gzip compressed tar
1998        "bz2": "bz2open",   # bzip2 compressed tar
1999        "xz":  "xzopen"     # lzma compressed tar
2000    }
2001
2002    #--------------------------------------------------------------------------
2003    # The public methods which TarFile provides:
2004
2005    def close(self):
2006        """Close the TarFile. In write-mode, two finishing zero blocks are
2007           appended to the archive.
2008        """
2009        if self.closed:
2010            return
2011
2012        self.closed = True
2013        try:
2014            if self.mode in ("a", "w", "x"):
2015                self.fileobj.write(NUL * (BLOCKSIZE * 2))
2016                self.offset += (BLOCKSIZE * 2)
2017                # fill up the end with zero-blocks
2018                # (like option -b20 for tar does)
2019                blocks, remainder = divmod(self.offset, RECORDSIZE)
2020                if remainder > 0:
2021                    self.fileobj.write(NUL * (RECORDSIZE - remainder))
2022        finally:
2023            if not self._extfileobj:
2024                self.fileobj.close()
2025
2026    def getmember(self, name):
2027        """Return a TarInfo object for member `name'. If `name' can not be
2028           found in the archive, KeyError is raised. If a member occurs more
2029           than once in the archive, its last occurrence is assumed to be the
2030           most up-to-date version.
2031        """
2032        tarinfo = self._getmember(name.rstrip('/'))
2033        if tarinfo is None:
2034            raise KeyError("filename %r not found" % name)
2035        return tarinfo
2036
2037    def getmembers(self):
2038        """Return the members of the archive as a list of TarInfo objects. The
2039           list has the same order as the members in the archive.
2040        """
2041        self._check()
2042        if not self._loaded:    # if we want to obtain a list of
2043            self._load()        # all members, we first have to
2044                                # scan the whole archive.
2045        return self.members
2046
2047    def getnames(self):
2048        """Return the members of the archive as a list of their names. It has
2049           the same order as the list returned by getmembers().
2050        """
2051        return [tarinfo.name for tarinfo in self.getmembers()]
2052
2053    def gettarinfo(self, name=None, arcname=None, fileobj=None):
2054        """Create a TarInfo object from the result of os.stat or equivalent
2055           on an existing file. The file is either named by `name', or
2056           specified as a file object `fileobj' with a file descriptor. If
2057           given, `arcname' specifies an alternative name for the file in the
2058           archive, otherwise, the name is taken from the 'name' attribute of
2059           'fileobj', or the 'name' argument. The name should be a text
2060           string.
2061        """
2062        self._check("awx")
2063
2064        # When fileobj is given, replace name by
2065        # fileobj's real name.
2066        if fileobj is not None:
2067            name = fileobj.name
2068
2069        # Building the name of the member in the archive.
2070        # Backward slashes are converted to forward slashes,
2071        # Absolute paths are turned to relative paths.
2072        if arcname is None:
2073            arcname = name
2074        drv, arcname = os.path.splitdrive(arcname)
2075        arcname = arcname.replace(os.sep, "/")
2076        arcname = arcname.lstrip("/")
2077
2078        # Now, fill the TarInfo object with
2079        # information specific for the file.
2080        tarinfo = self.tarinfo()
2081        tarinfo.tarfile = self  # Not needed
2082
2083        # Use os.stat or os.lstat, depending on if symlinks shall be resolved.
2084        if fileobj is None:
2085            if not self.dereference:
2086                statres = os.lstat(name)
2087            else:
2088                statres = os.stat(name)
2089        else:
2090            statres = os.fstat(fileobj.fileno())
2091        linkname = ""
2092
2093        stmd = statres.st_mode
2094        if stat.S_ISREG(stmd):
2095            inode = (statres.st_ino, statres.st_dev)
2096            if not self.dereference and statres.st_nlink > 1 and \
2097                    inode in self.inodes and arcname != self.inodes[inode]:
2098                # Is it a hardlink to an already
2099                # archived file?
2100                type = LNKTYPE
2101                linkname = self.inodes[inode]
2102            else:
2103                # The inode is added only if its valid.
2104                # For win32 it is always 0.
2105                type = REGTYPE
2106                if inode[0]:
2107                    self.inodes[inode] = arcname
2108        elif stat.S_ISDIR(stmd):
2109            type = DIRTYPE
2110        elif stat.S_ISFIFO(stmd):
2111            type = FIFOTYPE
2112        elif stat.S_ISLNK(stmd):
2113            type = SYMTYPE
2114            linkname = os.readlink(name)
2115        elif stat.S_ISCHR(stmd):
2116            type = CHRTYPE
2117        elif stat.S_ISBLK(stmd):
2118            type = BLKTYPE
2119        else:
2120            return None
2121
2122        # Fill the TarInfo object with all
2123        # information we can get.
2124        tarinfo.name = arcname
2125        tarinfo.mode = stmd
2126        tarinfo.uid = statres.st_uid
2127        tarinfo.gid = statres.st_gid
2128        if type == REGTYPE:
2129            tarinfo.size = statres.st_size
2130        else:
2131            tarinfo.size = 0
2132        tarinfo.mtime = statres.st_mtime
2133        tarinfo.type = type
2134        tarinfo.linkname = linkname
2135        if pwd:
2136            try:
2137                tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
2138            except KeyError:
2139                pass
2140        if grp:
2141            try:
2142                tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
2143            except KeyError:
2144                pass
2145
2146        if type in (CHRTYPE, BLKTYPE):
2147            if hasattr(os, "major") and hasattr(os, "minor"):
2148                tarinfo.devmajor = os.major(statres.st_rdev)
2149                tarinfo.devminor = os.minor(statres.st_rdev)
2150        return tarinfo
2151
2152    def list(self, verbose=True, *, members=None):
2153        """Print a table of contents to sys.stdout. If `verbose' is False, only
2154           the names of the members are printed. If it is True, an `ls -l'-like
2155           output is produced. `members' is optional and must be a subset of the
2156           list returned by getmembers().
2157        """
2158        self._check()
2159
2160        if members is None:
2161            members = self
2162        for tarinfo in members:
2163            if verbose:
2164                if tarinfo.mode is None:
2165                    _safe_print("??????????")
2166                else:
2167                    _safe_print(stat.filemode(tarinfo.mode))
2168                _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
2169                                       tarinfo.gname or tarinfo.gid))
2170                if tarinfo.ischr() or tarinfo.isblk():
2171                    _safe_print("%10s" %
2172                            ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
2173                else:
2174                    _safe_print("%10d" % tarinfo.size)
2175                if tarinfo.mtime is None:
2176                    _safe_print("????-??-?? ??:??:??")
2177                else:
2178                    _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
2179                                % time.localtime(tarinfo.mtime)[:6])
2180
2181            _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
2182
2183            if verbose:
2184                if tarinfo.issym():
2185                    _safe_print("-> " + tarinfo.linkname)
2186                if tarinfo.islnk():
2187                    _safe_print("link to " + tarinfo.linkname)
2188            print()
2189
2190    def add(self, name, arcname=None, recursive=True, *, filter=None):
2191        """Add the file `name' to the archive. `name' may be any type of file
2192           (directory, fifo, symbolic link, etc.). If given, `arcname'
2193           specifies an alternative name for the file in the archive.
2194           Directories are added recursively by default. This can be avoided by
2195           setting `recursive' to False. `filter' is a function
2196           that expects a TarInfo object argument and returns the changed
2197           TarInfo object, if it returns None the TarInfo object will be
2198           excluded from the archive.
2199        """
2200        self._check("awx")
2201
2202        if arcname is None:
2203            arcname = name
2204
2205        # Skip if somebody tries to archive the archive...
2206        if self.name is not None and os.path.abspath(name) == self.name:
2207            self._dbg(2, "tarfile: Skipped %r" % name)
2208            return
2209
2210        self._dbg(1, name)
2211
2212        # Create a TarInfo object from the file.
2213        tarinfo = self.gettarinfo(name, arcname)
2214
2215        if tarinfo is None:
2216            self._dbg(1, "tarfile: Unsupported type %r" % name)
2217            return
2218
2219        # Change or exclude the TarInfo object.
2220        if filter is not None:
2221            tarinfo = filter(tarinfo)
2222            if tarinfo is None:
2223                self._dbg(2, "tarfile: Excluded %r" % name)
2224                return
2225
2226        # Append the tar header and data to the archive.
2227        if tarinfo.isreg():
2228            with bltn_open(name, "rb") as f:
2229                self.addfile(tarinfo, f)
2230
2231        elif tarinfo.isdir():
2232            self.addfile(tarinfo)
2233            if recursive:
2234                for f in sorted(os.listdir(name)):
2235                    self.add(os.path.join(name, f), os.path.join(arcname, f),
2236                            recursive, filter=filter)
2237
2238        else:
2239            self.addfile(tarinfo)
2240
2241    def addfile(self, tarinfo, fileobj=None):
2242        """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2243           given, it should be a binary file, and tarinfo.size bytes are read
2244           from it and added to the archive. You can create TarInfo objects
2245           directly, or by using gettarinfo().
2246        """
2247        self._check("awx")
2248
2249        tarinfo = copy.copy(tarinfo)
2250
2251        buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2252        self.fileobj.write(buf)
2253        self.offset += len(buf)
2254        bufsize=self.copybufsize
2255        # If there's data to follow, append it.
2256        if fileobj is not None:
2257            copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
2258            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2259            if remainder > 0:
2260                self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2261                blocks += 1
2262            self.offset += blocks * BLOCKSIZE
2263
2264        self.members.append(tarinfo)
2265
2266    def _get_filter_function(self, filter):
2267        if filter is None:
2268            filter = self.extraction_filter
2269            if filter is None:
2270                return fully_trusted_filter
2271            if isinstance(filter, str):
2272                raise TypeError(
2273                    'String names are not supported for '
2274                    + 'TarFile.extraction_filter. Use a function such as '
2275                    + 'tarfile.data_filter directly.')
2276            return filter
2277        if callable(filter):
2278            return filter
2279        try:
2280            return _NAMED_FILTERS[filter]
2281        except KeyError:
2282            raise ValueError(f"filter {filter!r} not found") from None
2283
2284    def extractall(self, path=".", members=None, *, numeric_owner=False,
2285                   filter=None):
2286        """Extract all members from the archive to the current working
2287           directory and set owner, modification time and permissions on
2288           directories afterwards. `path' specifies a different directory
2289           to extract to. `members' is optional and must be a subset of the
2290           list returned by getmembers(). If `numeric_owner` is True, only
2291           the numbers for user/group names are used and not the names.
2292
2293           The `filter` function will be called on each member just
2294           before extraction.
2295           It can return a changed TarInfo or None to skip the member.
2296           String names of common filters are accepted.
2297        """
2298        directories = []
2299
2300        filter_function = self._get_filter_function(filter)
2301        if members is None:
2302            members = self
2303
2304        for member in members:
2305            tarinfo, unfiltered = self._get_extract_tarinfo(
2306                member, filter_function, path)
2307            if tarinfo is None:
2308                continue
2309            if tarinfo.isdir():
2310                # For directories, delay setting attributes until later,
2311                # since permissions can interfere with extraction and
2312                # extracting contents can reset mtime.
2313                directories.append(unfiltered)
2314            self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(),
2315                              numeric_owner=numeric_owner,
2316                              filter_function=filter_function)
2317
2318        # Reverse sort directories.
2319        directories.sort(key=lambda a: a.name, reverse=True)
2320
2321
2322        # Set correct owner, mtime and filemode on directories.
2323        for unfiltered in directories:
2324            try:
2325                # Need to re-apply any filter, to take the *current* filesystem
2326                # state into account.
2327                try:
2328                    tarinfo = filter_function(unfiltered, path)
2329                except _FILTER_ERRORS as exc:
2330                    self._log_no_directory_fixup(unfiltered, repr(exc))
2331                    continue
2332                if tarinfo is None:
2333                    self._log_no_directory_fixup(unfiltered,
2334                                                 'excluded by filter')
2335                    continue
2336                dirpath = os.path.join(path, tarinfo.name)
2337                try:
2338                    lstat = os.lstat(dirpath)
2339                except FileNotFoundError:
2340                    self._log_no_directory_fixup(tarinfo, 'missing')
2341                    continue
2342                if not stat.S_ISDIR(lstat.st_mode):
2343                    # This is no longer a directory; presumably a later
2344                    # member overwrote the entry.
2345                    self._log_no_directory_fixup(tarinfo, 'not a directory')
2346                    continue
2347                self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
2348                self.utime(tarinfo, dirpath)
2349                self.chmod(tarinfo, dirpath)
2350            except ExtractError as e:
2351                self._handle_nonfatal_error(e)
2352
2353    def _log_no_directory_fixup(self, member, reason):
2354        self._dbg(2, "tarfile: Not fixing up directory %r (%s)" %
2355                  (member.name, reason))
2356
2357    def extract(self, member, path="", set_attrs=True, *, numeric_owner=False,
2358                filter=None):
2359        """Extract a member from the archive to the current working directory,
2360           using its full name. Its file information is extracted as accurately
2361           as possible. `member' may be a filename or a TarInfo object. You can
2362           specify a different directory using `path'. File attributes (owner,
2363           mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2364           is True, only the numbers for user/group names are used and not
2365           the names.
2366
2367           The `filter` function will be called before extraction.
2368           It can return a changed TarInfo or None to skip the member.
2369           String names of common filters are accepted.
2370        """
2371        filter_function = self._get_filter_function(filter)
2372        tarinfo, unfiltered = self._get_extract_tarinfo(
2373            member, filter_function, path)
2374        if tarinfo is not None:
2375            self._extract_one(tarinfo, path, set_attrs, numeric_owner)
2376
2377    def _get_extract_tarinfo(self, member, filter_function, path):
2378        """Get (filtered, unfiltered) TarInfos from *member*
2379
2380        *member* might be a string.
2381
2382        Return (None, None) if not found.
2383        """
2384
2385        if isinstance(member, str):
2386            unfiltered = self.getmember(member)
2387        else:
2388            unfiltered = member
2389
2390        filtered = None
2391        try:
2392            filtered = filter_function(unfiltered, path)
2393        except (OSError, FilterError) as e:
2394            self._handle_fatal_error(e)
2395        except ExtractError as e:
2396            self._handle_nonfatal_error(e)
2397        if filtered is None:
2398            self._dbg(2, "tarfile: Excluded %r" % unfiltered.name)
2399            return None, None
2400
2401        # Prepare the link target for makelink().
2402        if filtered.islnk():
2403            filtered = copy.copy(filtered)
2404            filtered._link_target = os.path.join(path, filtered.linkname)
2405        return filtered, unfiltered
2406
2407    def _extract_one(self, tarinfo, path, set_attrs, numeric_owner,
2408                     filter_function=None):
2409        """Extract from filtered tarinfo to disk.
2410
2411           filter_function is only used when extracting a *different*
2412           member (e.g. as fallback to creating a symlink)
2413        """
2414        self._check("r")
2415
2416        try:
2417            self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2418                                 set_attrs=set_attrs,
2419                                 numeric_owner=numeric_owner,
2420                                 filter_function=filter_function,
2421                                 extraction_root=path)
2422        except OSError as e:
2423            self._handle_fatal_error(e)
2424        except ExtractError as e:
2425            self._handle_nonfatal_error(e)
2426
2427    def _handle_nonfatal_error(self, e):
2428        """Handle non-fatal error (ExtractError) according to errorlevel"""
2429        if self.errorlevel > 1:
2430            raise
2431        else:
2432            self._dbg(1, "tarfile: %s" % e)
2433
2434    def _handle_fatal_error(self, e):
2435        """Handle "fatal" error according to self.errorlevel"""
2436        if self.errorlevel > 0:
2437            raise
2438        elif isinstance(e, OSError):
2439            if e.filename is None:
2440                self._dbg(1, "tarfile: %s" % e.strerror)
2441            else:
2442                self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2443        else:
2444            self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e))
2445
2446    def extractfile(self, member):
2447        """Extract a member from the archive as a file object. `member' may be
2448           a filename or a TarInfo object. If `member' is a regular file or
2449           a link, an io.BufferedReader object is returned. For all other
2450           existing members, None is returned. If `member' does not appear
2451           in the archive, KeyError is raised.
2452        """
2453        self._check("r")
2454
2455        if isinstance(member, str):
2456            tarinfo = self.getmember(member)
2457        else:
2458            tarinfo = member
2459
2460        if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2461            # Members with unknown types are treated as regular files.
2462            return self.fileobject(self, tarinfo)
2463
2464        elif tarinfo.islnk() or tarinfo.issym():
2465            if isinstance(self.fileobj, _Stream):
2466                # A small but ugly workaround for the case that someone tries
2467                # to extract a (sym)link as a file-object from a non-seekable
2468                # stream of tar blocks.
2469                raise StreamError("cannot extract (sym)link as file object")
2470            else:
2471                # A (sym)link's file object is its target's file object.
2472                return self.extractfile(self._find_link_target(tarinfo))
2473        else:
2474            # If there's no data associated with the member (directory, chrdev,
2475            # blkdev, etc.), return None instead of a file object.
2476            return None
2477
2478    def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2479                        numeric_owner=False, *, filter_function=None,
2480                        extraction_root=None):
2481        """Extract the filtered TarInfo object tarinfo to a physical
2482           file called targetpath.
2483
2484           filter_function is only used when extracting a *different*
2485           member (e.g. as fallback to creating a symlink)
2486        """
2487        # Fetch the TarInfo object for the given name
2488        # and build the destination pathname, replacing
2489        # forward slashes to platform specific separators.
2490        targetpath = targetpath.rstrip("/")
2491        targetpath = targetpath.replace("/", os.sep)
2492
2493        # Create all upper directories.
2494        upperdirs = os.path.dirname(targetpath)
2495        if upperdirs and not os.path.exists(upperdirs):
2496            # Create directories that are not part of the archive with
2497            # default permissions.
2498            os.makedirs(upperdirs)
2499
2500        if tarinfo.islnk() or tarinfo.issym():
2501            self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2502        else:
2503            self._dbg(1, tarinfo.name)
2504
2505        if tarinfo.isreg():
2506            self.makefile(tarinfo, targetpath)
2507        elif tarinfo.isdir():
2508            self.makedir(tarinfo, targetpath)
2509        elif tarinfo.isfifo():
2510            self.makefifo(tarinfo, targetpath)
2511        elif tarinfo.ischr() or tarinfo.isblk():
2512            self.makedev(tarinfo, targetpath)
2513        elif tarinfo.islnk() or tarinfo.issym():
2514            self.makelink_with_filter(
2515                tarinfo, targetpath,
2516                filter_function=filter_function,
2517                extraction_root=extraction_root)
2518        elif tarinfo.type not in SUPPORTED_TYPES:
2519            self.makeunknown(tarinfo, targetpath)
2520        else:
2521            self.makefile(tarinfo, targetpath)
2522
2523        if set_attrs:
2524            self.chown(tarinfo, targetpath, numeric_owner)
2525            if not tarinfo.issym():
2526                self.chmod(tarinfo, targetpath)
2527                self.utime(tarinfo, targetpath)
2528
2529    #--------------------------------------------------------------------------
2530    # Below are the different file methods. They are called via
2531    # _extract_member() when extract() is called. They can be replaced in a
2532    # subclass to implement other functionality.
2533
2534    def makedir(self, tarinfo, targetpath):
2535        """Make a directory called targetpath.
2536        """
2537        try:
2538            if tarinfo.mode is None:
2539                # Use the system's default mode
2540                os.mkdir(targetpath)
2541            else:
2542                # Use a safe mode for the directory, the real mode is set
2543                # later in _extract_member().
2544                os.mkdir(targetpath, 0o700)
2545        except FileExistsError:
2546            pass
2547
2548    def makefile(self, tarinfo, targetpath):
2549        """Make a file called targetpath.
2550        """
2551        source = self.fileobj
2552        source.seek(tarinfo.offset_data)
2553        bufsize = self.copybufsize
2554        with bltn_open(targetpath, "wb") as target:
2555            if tarinfo.sparse is not None:
2556                for offset, size in tarinfo.sparse:
2557                    target.seek(offset)
2558                    copyfileobj(source, target, size, ReadError, bufsize)
2559                target.seek(tarinfo.size)
2560                target.truncate()
2561            else:
2562                copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
2563
2564    def makeunknown(self, tarinfo, targetpath):
2565        """Make a file from a TarInfo object with an unknown type
2566           at targetpath.
2567        """
2568        self.makefile(tarinfo, targetpath)
2569        self._dbg(1, "tarfile: Unknown file type %r, " \
2570                     "extracted as regular file." % tarinfo.type)
2571
2572    def makefifo(self, tarinfo, targetpath):
2573        """Make a fifo called targetpath.
2574        """
2575        if hasattr(os, "mkfifo"):
2576            os.mkfifo(targetpath)
2577        else:
2578            raise ExtractError("fifo not supported by system")
2579
2580    def makedev(self, tarinfo, targetpath):
2581        """Make a character or block device called targetpath.
2582        """
2583        if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2584            raise ExtractError("special devices not supported by system")
2585
2586        mode = tarinfo.mode
2587        if mode is None:
2588            # Use mknod's default
2589            mode = 0o600
2590        if tarinfo.isblk():
2591            mode |= stat.S_IFBLK
2592        else:
2593            mode |= stat.S_IFCHR
2594
2595        os.mknod(targetpath, mode,
2596                 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2597
2598    def makelink(self, tarinfo, targetpath):
2599        return self.makelink_with_filter(tarinfo, targetpath, None, None)
2600
2601    def makelink_with_filter(self, tarinfo, targetpath,
2602                             filter_function, extraction_root):
2603        """Make a (symbolic) link called targetpath. If it cannot be created
2604          (platform limitation), we try to make a copy of the referenced file
2605          instead of a link.
2606
2607          filter_function is only used when extracting a *different*
2608          member (e.g. as fallback to creating a link).
2609        """
2610        keyerror_to_extracterror = False
2611        try:
2612            # For systems that support symbolic and hard links.
2613            if tarinfo.issym():
2614                if os.path.lexists(targetpath):
2615                    # Avoid FileExistsError on following os.symlink.
2616                    os.unlink(targetpath)
2617                os.symlink(tarinfo.linkname, targetpath)
2618                return
2619            else:
2620                if os.path.exists(tarinfo._link_target):
2621                    os.link(tarinfo._link_target, targetpath)
2622                    return
2623        except symlink_exception:
2624            keyerror_to_extracterror = True
2625
2626        try:
2627            unfiltered = self._find_link_target(tarinfo)
2628        except KeyError:
2629            if keyerror_to_extracterror:
2630                raise ExtractError(
2631                    "unable to resolve link inside archive") from None
2632            else:
2633                raise
2634
2635        if filter_function is None:
2636            filtered = unfiltered
2637        else:
2638            if extraction_root is None:
2639                raise ExtractError(
2640                    "makelink_with_filter: if filter_function is not None, "
2641                    + "extraction_root must also not be None")
2642            try:
2643                filtered = filter_function(unfiltered, extraction_root)
2644            except _FILTER_ERRORS as cause:
2645                raise LinkFallbackError(tarinfo, unfiltered.name) from cause
2646        if filtered is not None:
2647            self._extract_member(filtered, targetpath,
2648                                 filter_function=filter_function,
2649                                 extraction_root=extraction_root)
2650
2651    def chown(self, tarinfo, targetpath, numeric_owner):
2652        """Set owner of targetpath according to tarinfo. If numeric_owner
2653           is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2654           is False, fall back to .gid/.uid when the search based on name
2655           fails.
2656        """
2657        if hasattr(os, "geteuid") and os.geteuid() == 0:
2658            # We have to be root to do so.
2659            g = tarinfo.gid
2660            u = tarinfo.uid
2661            if not numeric_owner:
2662                try:
2663                    if grp and tarinfo.gname:
2664                        g = grp.getgrnam(tarinfo.gname)[2]
2665                except KeyError:
2666                    pass
2667                try:
2668                    if pwd and tarinfo.uname:
2669                        u = pwd.getpwnam(tarinfo.uname)[2]
2670                except KeyError:
2671                    pass
2672            if g is None:
2673                g = -1
2674            if u is None:
2675                u = -1
2676            try:
2677                if tarinfo.issym() and hasattr(os, "lchown"):
2678                    os.lchown(targetpath, u, g)
2679                else:
2680                    os.chown(targetpath, u, g)
2681            except OSError as e:
2682                raise ExtractError("could not change owner") from e
2683
2684    def chmod(self, tarinfo, targetpath):
2685        """Set file permissions of targetpath according to tarinfo.
2686        """
2687        if tarinfo.mode is None:
2688            return
2689        try:
2690            os.chmod(targetpath, tarinfo.mode)
2691        except OSError as e:
2692            raise ExtractError("could not change mode") from e
2693
2694    def utime(self, tarinfo, targetpath):
2695        """Set modification time of targetpath according to tarinfo.
2696        """
2697        mtime = tarinfo.mtime
2698        if mtime is None:
2699            return
2700        if not hasattr(os, 'utime'):
2701            return
2702        try:
2703            os.utime(targetpath, (mtime, mtime))
2704        except OSError as e:
2705            raise ExtractError("could not change modification time") from e
2706
2707    #--------------------------------------------------------------------------
2708    def next(self):
2709        """Return the next member of the archive as a TarInfo object, when
2710           TarFile is opened for reading. Return None if there is no more
2711           available.
2712        """
2713        self._check("ra")
2714        if self.firstmember is not None:
2715            m = self.firstmember
2716            self.firstmember = None
2717            return m
2718
2719        # Advance the file pointer.
2720        if self.offset != self.fileobj.tell():
2721            if self.offset == 0:
2722                return None
2723            self.fileobj.seek(self.offset - 1)
2724            if not self.fileobj.read(1):
2725                raise ReadError("unexpected end of data")
2726
2727        # Read the next block.
2728        tarinfo = None
2729        while True:
2730            try:
2731                tarinfo = self.tarinfo.fromtarfile(self)
2732            except EOFHeaderError as e:
2733                if self.ignore_zeros:
2734                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2735                    self.offset += BLOCKSIZE
2736                    continue
2737            except InvalidHeaderError as e:
2738                if self.ignore_zeros:
2739                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2740                    self.offset += BLOCKSIZE
2741                    continue
2742                elif self.offset == 0:
2743                    raise ReadError(str(e)) from None
2744            except EmptyHeaderError:
2745                if self.offset == 0:
2746                    raise ReadError("empty file") from None
2747            except TruncatedHeaderError as e:
2748                if self.offset == 0:
2749                    raise ReadError(str(e)) from None
2750            except SubsequentHeaderError as e:
2751                raise ReadError(str(e)) from None
2752            except Exception as e:
2753                try:
2754                    import zlib
2755                    if isinstance(e, zlib.error):
2756                        raise ReadError(f'zlib error: {e}') from None
2757                    else:
2758                        raise e
2759                except ImportError:
2760                    raise e
2761            break
2762
2763        if tarinfo is not None:
2764            self.members.append(tarinfo)
2765        else:
2766            self._loaded = True
2767
2768        return tarinfo
2769
2770    #--------------------------------------------------------------------------
2771    # Little helper methods:
2772
2773    def _getmember(self, name, tarinfo=None, normalize=False):
2774        """Find an archive member by name from bottom to top.
2775           If tarinfo is given, it is used as the starting point.
2776        """
2777        # Ensure that all members have been loaded.
2778        members = self.getmembers()
2779
2780        # Limit the member search list up to tarinfo.
2781        skipping = False
2782        if tarinfo is not None:
2783            try:
2784                index = members.index(tarinfo)
2785            except ValueError:
2786                # The given starting point might be a (modified) copy.
2787                # We'll later skip members until we find an equivalent.
2788                skipping = True
2789            else:
2790                # Happy fast path
2791                members = members[:index]
2792
2793        if normalize:
2794            name = os.path.normpath(name)
2795
2796        for member in reversed(members):
2797            if skipping:
2798                if tarinfo.offset == member.offset:
2799                    skipping = False
2800                continue
2801            if normalize:
2802                member_name = os.path.normpath(member.name)
2803            else:
2804                member_name = member.name
2805
2806            if name == member_name:
2807                return member
2808
2809        if skipping:
2810            # Starting point was not found
2811            raise ValueError(tarinfo)
2812
2813    def _load(self):
2814        """Read through the entire archive file and look for readable
2815           members.
2816        """
2817        while True:
2818            tarinfo = self.next()
2819            if tarinfo is None:
2820                break
2821        self._loaded = True
2822
2823    def _check(self, mode=None):
2824        """Check if TarFile is still open, and if the operation's mode
2825           corresponds to TarFile's mode.
2826        """
2827        if self.closed:
2828            raise OSError("%s is closed" % self.__class__.__name__)
2829        if mode is not None and self.mode not in mode:
2830            raise OSError("bad operation for mode %r" % self.mode)
2831
2832    def _find_link_target(self, tarinfo):
2833        """Find the target member of a symlink or hardlink member in the
2834           archive.
2835        """
2836        if tarinfo.issym():
2837            # Always search the entire archive.
2838            linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
2839            limit = None
2840        else:
2841            # Search the archive before the link, because a hard link is
2842            # just a reference to an already archived file.
2843            linkname = tarinfo.linkname
2844            limit = tarinfo
2845
2846        member = self._getmember(linkname, tarinfo=limit, normalize=True)
2847        if member is None:
2848            raise KeyError("linkname %r not found" % linkname)
2849        return member
2850
2851    def __iter__(self):
2852        """Provide an iterator object.
2853        """
2854        if self._loaded:
2855            yield from self.members
2856            return
2857
2858        # Yield items using TarFile's next() method.
2859        # When all members have been read, set TarFile as _loaded.
2860        index = 0
2861        # Fix for SF #1100429: Under rare circumstances it can
2862        # happen that getmembers() is called during iteration,
2863        # which will have already exhausted the next() method.
2864        if self.firstmember is not None:
2865            tarinfo = self.next()
2866            index += 1
2867            yield tarinfo
2868
2869        while True:
2870            if index < len(self.members):
2871                tarinfo = self.members[index]
2872            elif not self._loaded:
2873                tarinfo = self.next()
2874                if not tarinfo:
2875                    self._loaded = True
2876                    return
2877            else:
2878                return
2879            index += 1
2880            yield tarinfo
2881
2882    def _dbg(self, level, msg):
2883        """Write debugging output to sys.stderr.
2884        """
2885        if level <= self.debug:
2886            print(msg, file=sys.stderr)
2887
2888    def __enter__(self):
2889        self._check()
2890        return self
2891
2892    def __exit__(self, type, value, traceback):
2893        if type is None:
2894            self.close()
2895        else:
2896            # An exception occurred. We must not call close() because
2897            # it would try to write end-of-archive blocks and padding.
2898            if not self._extfileobj:
2899                self.fileobj.close()
2900            self.closed = True
2901
2902#--------------------
2903# exported functions
2904#--------------------
2905
2906def is_tarfile(name):
2907    """Return True if name points to a tar archive that we
2908       are able to handle, else return False.
2909
2910       'name' should be a string, file, or file-like object.
2911    """
2912    try:
2913        if hasattr(name, "read"):
2914            pos = name.tell()
2915            t = open(fileobj=name)
2916            name.seek(pos)
2917        else:
2918            t = open(name)
2919        t.close()
2920        return True
2921    except TarError:
2922        return False
2923
2924open = TarFile.open
2925
2926
2927def main():
2928    import argparse
2929
2930    description = 'A simple command-line interface for tarfile module.'
2931    parser = argparse.ArgumentParser(description=description)
2932    parser.add_argument('-v', '--verbose', action='store_true', default=False,
2933                        help='Verbose output')
2934    parser.add_argument('--filter', metavar='<filtername>',
2935                        choices=_NAMED_FILTERS,
2936                        help='Filter for extraction')
2937
2938    group = parser.add_mutually_exclusive_group(required=True)
2939    group.add_argument('-l', '--list', metavar='<tarfile>',
2940                       help='Show listing of a tarfile')
2941    group.add_argument('-e', '--extract', nargs='+',
2942                       metavar=('<tarfile>', '<output_dir>'),
2943                       help='Extract tarfile into target dir')
2944    group.add_argument('-c', '--create', nargs='+',
2945                       metavar=('<name>', '<file>'),
2946                       help='Create tarfile from sources')
2947    group.add_argument('-t', '--test', metavar='<tarfile>',
2948                       help='Test if a tarfile is valid')
2949
2950    args = parser.parse_args()
2951
2952    if args.filter and args.extract is None:
2953        parser.exit(1, '--filter is only valid for extraction\n')
2954
2955    if args.test is not None:
2956        src = args.test
2957        if is_tarfile(src):
2958            with open(src, 'r') as tar:
2959                tar.getmembers()
2960                print(tar.getmembers(), file=sys.stderr)
2961            if args.verbose:
2962                print('{!r} is a tar archive.'.format(src))
2963        else:
2964            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2965
2966    elif args.list is not None:
2967        src = args.list
2968        if is_tarfile(src):
2969            with TarFile.open(src, 'r:*') as tf:
2970                tf.list(verbose=args.verbose)
2971        else:
2972            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2973
2974    elif args.extract is not None:
2975        if len(args.extract) == 1:
2976            src = args.extract[0]
2977            curdir = os.curdir
2978        elif len(args.extract) == 2:
2979            src, curdir = args.extract
2980        else:
2981            parser.exit(1, parser.format_help())
2982
2983        if is_tarfile(src):
2984            with TarFile.open(src, 'r:*') as tf:
2985                tf.extractall(path=curdir, filter=args.filter)
2986            if args.verbose:
2987                if curdir == '.':
2988                    msg = '{!r} file is extracted.'.format(src)
2989                else:
2990                    msg = ('{!r} file is extracted '
2991                           'into {!r} directory.').format(src, curdir)
2992                print(msg)
2993        else:
2994            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2995
2996    elif args.create is not None:
2997        tar_name = args.create.pop(0)
2998        _, ext = os.path.splitext(tar_name)
2999        compressions = {
3000            # gz
3001            '.gz': 'gz',
3002            '.tgz': 'gz',
3003            # xz
3004            '.xz': 'xz',
3005            '.txz': 'xz',
3006            # bz2
3007            '.bz2': 'bz2',
3008            '.tbz': 'bz2',
3009            '.tbz2': 'bz2',
3010            '.tb2': 'bz2',
3011        }
3012        tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
3013        tar_files = args.create
3014
3015        with TarFile.open(tar_name, tar_mode) as tf:
3016            for file_name in tar_files:
3017                tf.add(file_name)
3018
3019        if args.verbose:
3020            print('{!r} file created.'.format(tar_name))
3021
3022if __name__ == '__main__':
3023    main()
3024