• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
5# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
6# All rights reserved.
7#
8# Permission  is  hereby granted,  free  of charge,  to  any person
9# obtaining a  copy of  this software  and associated documentation
10# files  (the  "Software"),  to   deal  in  the  Software   without
11# restriction,  including  without limitation  the  rights to  use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies  of  the  Software,  and to  permit  persons  to  whom the
14# Software  is  furnished  to  do  so,  subject  to  the  following
15# conditions:
16#
17# The above copyright  notice and this  permission notice shall  be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
21# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
22# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
23# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
24# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
25# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
32version     = "0.9.0"
33__author__  = "Lars Gust\u00e4bel (lars@gustaebel.de)"
34__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
35
36#---------
37# Imports
38#---------
39from builtins import open as bltn_open
40import sys
41import os
42import io
43import shutil
44import stat
45import time
46import struct
47import copy
48import re
49
50try:
51    import pwd
52except ImportError:
53    pwd = None
54try:
55    import grp
56except ImportError:
57    grp = None
58
59# os.symlink on Windows prior to 6.0 raises NotImplementedError
60symlink_exception = (AttributeError, NotImplementedError)
61try:
62    # OSError (winerror=1314) will be raised if the caller does not hold the
63    # SeCreateSymbolicLinkPrivilege privilege
64    symlink_exception += (OSError,)
65except NameError:
66    pass
67
68# from tarfile import *
69__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
70           "CompressionError", "StreamError", "ExtractError", "HeaderError",
71           "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
72           "DEFAULT_FORMAT", "open"]
73
74#---------------------------------------------------------
75# tar constants
76#---------------------------------------------------------
77NUL = b"\0"                     # the null character
78BLOCKSIZE = 512                 # length of processing blocks
79RECORDSIZE = BLOCKSIZE * 20     # length of records
80GNU_MAGIC = b"ustar  \0"        # magic gnu tar string
81POSIX_MAGIC = b"ustar\x0000"    # magic posix tar string
82
83LENGTH_NAME = 100               # maximum length of a filename
84LENGTH_LINK = 100               # maximum length of a linkname
85LENGTH_PREFIX = 155             # maximum length of the prefix field
86
87REGTYPE = b"0"                  # regular file
88AREGTYPE = b"\0"                # regular file
89LNKTYPE = b"1"                  # link (inside tarfile)
90SYMTYPE = b"2"                  # symbolic link
91CHRTYPE = b"3"                  # character special device
92BLKTYPE = b"4"                  # block special device
93DIRTYPE = b"5"                  # directory
94FIFOTYPE = b"6"                 # fifo special device
95CONTTYPE = b"7"                 # contiguous file
96
97GNUTYPE_LONGNAME = b"L"         # GNU tar longname
98GNUTYPE_LONGLINK = b"K"         # GNU tar longlink
99GNUTYPE_SPARSE = b"S"           # GNU tar sparse file
100
101XHDTYPE = b"x"                  # POSIX.1-2001 extended header
102XGLTYPE = b"g"                  # POSIX.1-2001 global header
103SOLARIS_XHDTYPE = b"X"          # Solaris extended header
104
105USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
106GNU_FORMAT = 1                  # GNU tar format
107PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
108DEFAULT_FORMAT = PAX_FORMAT
109
110#---------------------------------------------------------
111# tarfile constants
112#---------------------------------------------------------
113# File types that tarfile supports:
114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
115                   SYMTYPE, DIRTYPE, FIFOTYPE,
116                   CONTTYPE, CHRTYPE, BLKTYPE,
117                   GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
118                   GNUTYPE_SPARSE)
119
120# File types that will be treated as a regular file.
121REGULAR_TYPES = (REGTYPE, AREGTYPE,
122                 CONTTYPE, GNUTYPE_SPARSE)
123
124# File types that are part of the GNU tar format.
125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
126             GNUTYPE_SPARSE)
127
128# Fields from a pax header that override a TarInfo attribute.
129PAX_FIELDS = ("path", "linkpath", "size", "mtime",
130              "uid", "gid", "uname", "gname")
131
132# Fields from a pax header that are affected by hdrcharset.
133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
134
135# Fields in a pax header that are numbers, all other fields
136# are treated as strings.
137PAX_NUMBER_FIELDS = {
138    "atime": float,
139    "ctime": float,
140    "mtime": float,
141    "uid": int,
142    "gid": int,
143    "size": int
144}
145
146#---------------------------------------------------------
147# initialization
148#---------------------------------------------------------
149if os.name == "nt":
150    ENCODING = "utf-8"
151else:
152    ENCODING = sys.getfilesystemencoding()
153
154#---------------------------------------------------------
155# Some useful functions
156#---------------------------------------------------------
157
158def stn(s, length, encoding, errors):
159    """Convert a string to a null-terminated bytes object.
160    """
161    s = s.encode(encoding, errors)
162    return s[:length] + (length - len(s)) * NUL
163
164def nts(s, encoding, errors):
165    """Convert a null-terminated bytes object to a string.
166    """
167    p = s.find(b"\0")
168    if p != -1:
169        s = s[:p]
170    return s.decode(encoding, errors)
171
172def nti(s):
173    """Convert a number field to a python number.
174    """
175    # There are two possible encodings for a number field, see
176    # itn() below.
177    if s[0] in (0o200, 0o377):
178        n = 0
179        for i in range(len(s) - 1):
180            n <<= 8
181            n += s[i + 1]
182        if s[0] == 0o377:
183            n = -(256 ** (len(s) - 1) - n)
184    else:
185        try:
186            s = nts(s, "ascii", "strict")
187            n = int(s.strip() or "0", 8)
188        except ValueError:
189            raise InvalidHeaderError("invalid header")
190    return n
191
192def itn(n, digits=8, format=DEFAULT_FORMAT):
193    """Convert a python number to a number field.
194    """
195    # POSIX 1003.1-1988 requires numbers to be encoded as a string of
196    # octal digits followed by a null-byte, this allows values up to
197    # (8**(digits-1))-1. GNU tar allows storing numbers greater than
198    # that if necessary. A leading 0o200 or 0o377 byte indicate this
199    # particular encoding, the following digits-1 bytes are a big-endian
200    # base-256 representation. This allows values up to (256**(digits-1))-1.
201    # A 0o200 byte indicates a positive number, a 0o377 byte a negative
202    # number.
203    original_n = n
204    n = int(n)
205    if 0 <= n < 8 ** (digits - 1):
206        s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
207    elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
208        if n >= 0:
209            s = bytearray([0o200])
210        else:
211            s = bytearray([0o377])
212            n = 256 ** digits + n
213
214        for i in range(digits - 1):
215            s.insert(1, n & 0o377)
216            n >>= 8
217    else:
218        raise ValueError("overflow in number field")
219
220    return s
221
222def calc_chksums(buf):
223    """Calculate the checksum for a member's header by summing up all
224       characters except for the chksum field which is treated as if
225       it was filled with spaces. According to the GNU tar sources,
226       some tars (Sun and NeXT) calculate chksum with signed char,
227       which will be different if there are chars in the buffer with
228       the high bit set. So we calculate two checksums, unsigned and
229       signed.
230    """
231    unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
232    signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
233    return unsigned_chksum, signed_chksum
234
235def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
236    """Copy length bytes from fileobj src to fileobj dst.
237       If length is None, copy the entire content.
238    """
239    bufsize = bufsize or 16 * 1024
240    if length == 0:
241        return
242    if length is None:
243        shutil.copyfileobj(src, dst, bufsize)
244        return
245
246    blocks, remainder = divmod(length, bufsize)
247    for b in range(blocks):
248        buf = src.read(bufsize)
249        if len(buf) < bufsize:
250            raise exception("unexpected end of data")
251        dst.write(buf)
252
253    if remainder != 0:
254        buf = src.read(remainder)
255        if len(buf) < remainder:
256            raise exception("unexpected end of data")
257        dst.write(buf)
258    return
259
260def _safe_print(s):
261    encoding = getattr(sys.stdout, 'encoding', None)
262    if encoding is not None:
263        s = s.encode(encoding, 'backslashreplace').decode(encoding)
264    print(s, end=' ')
265
266
267class TarError(Exception):
268    """Base exception."""
269    pass
270class ExtractError(TarError):
271    """General exception for extract errors."""
272    pass
273class ReadError(TarError):
274    """Exception for unreadable tar archives."""
275    pass
276class CompressionError(TarError):
277    """Exception for unavailable compression methods."""
278    pass
279class StreamError(TarError):
280    """Exception for unsupported operations on stream-like TarFiles."""
281    pass
282class HeaderError(TarError):
283    """Base exception for header errors."""
284    pass
285class EmptyHeaderError(HeaderError):
286    """Exception for empty headers."""
287    pass
288class TruncatedHeaderError(HeaderError):
289    """Exception for truncated headers."""
290    pass
291class EOFHeaderError(HeaderError):
292    """Exception for end of file headers."""
293    pass
294class InvalidHeaderError(HeaderError):
295    """Exception for invalid headers."""
296    pass
297class SubsequentHeaderError(HeaderError):
298    """Exception for missing and invalid extended headers."""
299    pass
300
301#---------------------------
302# internal stream interface
303#---------------------------
304class _LowLevelFile:
305    """Low-level file object. Supports reading and writing.
306       It is used instead of a regular file object for streaming
307       access.
308    """
309
310    def __init__(self, name, mode):
311        mode = {
312            "r": os.O_RDONLY,
313            "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
314        }[mode]
315        if hasattr(os, "O_BINARY"):
316            mode |= os.O_BINARY
317        self.fd = os.open(name, mode, 0o666)
318
319    def close(self):
320        os.close(self.fd)
321
322    def read(self, size):
323        return os.read(self.fd, size)
324
325    def write(self, s):
326        os.write(self.fd, s)
327
328class _Stream:
329    """Class that serves as an adapter between TarFile and
330       a stream-like object.  The stream-like object only
331       needs to have a read() or write() method and is accessed
332       blockwise.  Use of gzip or bzip2 compression is possible.
333       A stream-like object could be for example: sys.stdin,
334       sys.stdout, a socket, a tape device etc.
335
336       _Stream is intended to be used only internally.
337    """
338
339    def __init__(self, name, mode, comptype, fileobj, bufsize):
340        """Construct a _Stream object.
341        """
342        self._extfileobj = True
343        if fileobj is None:
344            fileobj = _LowLevelFile(name, mode)
345            self._extfileobj = False
346
347        if comptype == '*':
348            # Enable transparent compression detection for the
349            # stream interface
350            fileobj = _StreamProxy(fileobj)
351            comptype = fileobj.getcomptype()
352
353        self.name     = name or ""
354        self.mode     = mode
355        self.comptype = comptype
356        self.fileobj  = fileobj
357        self.bufsize  = bufsize
358        self.buf      = b""
359        self.pos      = 0
360        self.closed   = False
361
362        try:
363            if comptype == "gz":
364                try:
365                    import zlib
366                except ImportError:
367                    raise CompressionError("zlib module is not available") from None
368                self.zlib = zlib
369                self.crc = zlib.crc32(b"")
370                if mode == "r":
371                    self._init_read_gz()
372                    self.exception = zlib.error
373                else:
374                    self._init_write_gz()
375
376            elif comptype == "bz2":
377                try:
378                    import bz2
379                except ImportError:
380                    raise CompressionError("bz2 module is not available") from None
381                if mode == "r":
382                    self.dbuf = b""
383                    self.cmp = bz2.BZ2Decompressor()
384                    self.exception = OSError
385                else:
386                    self.cmp = bz2.BZ2Compressor()
387
388            elif comptype == "xz":
389                try:
390                    import lzma
391                except ImportError:
392                    raise CompressionError("lzma module is not available") from None
393                if mode == "r":
394                    self.dbuf = b""
395                    self.cmp = lzma.LZMADecompressor()
396                    self.exception = lzma.LZMAError
397                else:
398                    self.cmp = lzma.LZMACompressor()
399
400            elif comptype != "tar":
401                raise CompressionError("unknown compression type %r" % comptype)
402
403        except:
404            if not self._extfileobj:
405                self.fileobj.close()
406            self.closed = True
407            raise
408
409    def __del__(self):
410        if hasattr(self, "closed") and not self.closed:
411            self.close()
412
413    def _init_write_gz(self):
414        """Initialize for writing with gzip compression.
415        """
416        self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
417                                            -self.zlib.MAX_WBITS,
418                                            self.zlib.DEF_MEM_LEVEL,
419                                            0)
420        timestamp = struct.pack("<L", int(time.time()))
421        self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
422        if self.name.endswith(".gz"):
423            self.name = self.name[:-3]
424        # Honor "directory components removed" from RFC1952
425        self.name = os.path.basename(self.name)
426        # RFC1952 says we must use ISO-8859-1 for the FNAME field.
427        self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
428
429    def write(self, s):
430        """Write string s to the stream.
431        """
432        if self.comptype == "gz":
433            self.crc = self.zlib.crc32(s, self.crc)
434        self.pos += len(s)
435        if self.comptype != "tar":
436            s = self.cmp.compress(s)
437        self.__write(s)
438
439    def __write(self, s):
440        """Write string s to the stream if a whole new block
441           is ready to be written.
442        """
443        self.buf += s
444        while len(self.buf) > self.bufsize:
445            self.fileobj.write(self.buf[:self.bufsize])
446            self.buf = self.buf[self.bufsize:]
447
448    def close(self):
449        """Close the _Stream object. No operation should be
450           done on it afterwards.
451        """
452        if self.closed:
453            return
454
455        self.closed = True
456        try:
457            if self.mode == "w" and self.comptype != "tar":
458                self.buf += self.cmp.flush()
459
460            if self.mode == "w" and self.buf:
461                self.fileobj.write(self.buf)
462                self.buf = b""
463                if self.comptype == "gz":
464                    self.fileobj.write(struct.pack("<L", self.crc))
465                    self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
466        finally:
467            if not self._extfileobj:
468                self.fileobj.close()
469
470    def _init_read_gz(self):
471        """Initialize for reading a gzip compressed fileobj.
472        """
473        self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
474        self.dbuf = b""
475
476        # taken from gzip.GzipFile with some alterations
477        if self.__read(2) != b"\037\213":
478            raise ReadError("not a gzip file")
479        if self.__read(1) != b"\010":
480            raise CompressionError("unsupported compression method")
481
482        flag = ord(self.__read(1))
483        self.__read(6)
484
485        if flag & 4:
486            xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
487            self.read(xlen)
488        if flag & 8:
489            while True:
490                s = self.__read(1)
491                if not s or s == NUL:
492                    break
493        if flag & 16:
494            while True:
495                s = self.__read(1)
496                if not s or s == NUL:
497                    break
498        if flag & 2:
499            self.__read(2)
500
501    def tell(self):
502        """Return the stream's file pointer position.
503        """
504        return self.pos
505
506    def seek(self, pos=0):
507        """Set the stream's file pointer to pos. Negative seeking
508           is forbidden.
509        """
510        if pos - self.pos >= 0:
511            blocks, remainder = divmod(pos - self.pos, self.bufsize)
512            for i in range(blocks):
513                self.read(self.bufsize)
514            self.read(remainder)
515        else:
516            raise StreamError("seeking backwards is not allowed")
517        return self.pos
518
519    def read(self, size):
520        """Return the next size number of bytes from the stream."""
521        assert size is not None
522        buf = self._read(size)
523        self.pos += len(buf)
524        return buf
525
526    def _read(self, size):
527        """Return size bytes from the stream.
528        """
529        if self.comptype == "tar":
530            return self.__read(size)
531
532        c = len(self.dbuf)
533        t = [self.dbuf]
534        while c < size:
535            # Skip underlying buffer to avoid unaligned double buffering.
536            if self.buf:
537                buf = self.buf
538                self.buf = b""
539            else:
540                buf = self.fileobj.read(self.bufsize)
541                if not buf:
542                    break
543            try:
544                buf = self.cmp.decompress(buf)
545            except self.exception as e:
546                raise ReadError("invalid compressed data") from e
547            t.append(buf)
548            c += len(buf)
549        t = b"".join(t)
550        self.dbuf = t[size:]
551        return t[:size]
552
553    def __read(self, size):
554        """Return size bytes from stream. If internal buffer is empty,
555           read another block from the stream.
556        """
557        c = len(self.buf)
558        t = [self.buf]
559        while c < size:
560            buf = self.fileobj.read(self.bufsize)
561            if not buf:
562                break
563            t.append(buf)
564            c += len(buf)
565        t = b"".join(t)
566        self.buf = t[size:]
567        return t[:size]
568# class _Stream
569
570class _StreamProxy(object):
571    """Small proxy class that enables transparent compression
572       detection for the Stream interface (mode 'r|*').
573    """
574
575    def __init__(self, fileobj):
576        self.fileobj = fileobj
577        self.buf = self.fileobj.read(BLOCKSIZE)
578
579    def read(self, size):
580        self.read = self.fileobj.read
581        return self.buf
582
583    def getcomptype(self):
584        if self.buf.startswith(b"\x1f\x8b\x08"):
585            return "gz"
586        elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
587            return "bz2"
588        elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
589            return "xz"
590        else:
591            return "tar"
592
593    def close(self):
594        self.fileobj.close()
595# class StreamProxy
596
597#------------------------
598# Extraction file object
599#------------------------
600class _FileInFile(object):
601    """A thin wrapper around an existing file object that
602       provides a part of its data as an individual file
603       object.
604    """
605
606    def __init__(self, fileobj, offset, size, blockinfo=None):
607        self.fileobj = fileobj
608        self.offset = offset
609        self.size = size
610        self.position = 0
611        self.name = getattr(fileobj, "name", None)
612        self.closed = False
613
614        if blockinfo is None:
615            blockinfo = [(0, size)]
616
617        # Construct a map with data and zero blocks.
618        self.map_index = 0
619        self.map = []
620        lastpos = 0
621        realpos = self.offset
622        for offset, size in blockinfo:
623            if offset > lastpos:
624                self.map.append((False, lastpos, offset, None))
625            self.map.append((True, offset, offset + size, realpos))
626            realpos += size
627            lastpos = offset + size
628        if lastpos < self.size:
629            self.map.append((False, lastpos, self.size, None))
630
631    def flush(self):
632        pass
633
634    def readable(self):
635        return True
636
637    def writable(self):
638        return False
639
640    def seekable(self):
641        return self.fileobj.seekable()
642
643    def tell(self):
644        """Return the current file position.
645        """
646        return self.position
647
648    def seek(self, position, whence=io.SEEK_SET):
649        """Seek to a position in the file.
650        """
651        if whence == io.SEEK_SET:
652            self.position = min(max(position, 0), self.size)
653        elif whence == io.SEEK_CUR:
654            if position < 0:
655                self.position = max(self.position + position, 0)
656            else:
657                self.position = min(self.position + position, self.size)
658        elif whence == io.SEEK_END:
659            self.position = max(min(self.size + position, self.size), 0)
660        else:
661            raise ValueError("Invalid argument")
662        return self.position
663
664    def read(self, size=None):
665        """Read data from the file.
666        """
667        if size is None:
668            size = self.size - self.position
669        else:
670            size = min(size, self.size - self.position)
671
672        buf = b""
673        while size > 0:
674            while True:
675                data, start, stop, offset = self.map[self.map_index]
676                if start <= self.position < stop:
677                    break
678                else:
679                    self.map_index += 1
680                    if self.map_index == len(self.map):
681                        self.map_index = 0
682            length = min(size, stop - self.position)
683            if data:
684                self.fileobj.seek(offset + (self.position - start))
685                b = self.fileobj.read(length)
686                if len(b) != length:
687                    raise ReadError("unexpected end of data")
688                buf += b
689            else:
690                buf += NUL * length
691            size -= length
692            self.position += length
693        return buf
694
695    def readinto(self, b):
696        buf = self.read(len(b))
697        b[:len(buf)] = buf
698        return len(buf)
699
700    def close(self):
701        self.closed = True
702#class _FileInFile
703
704class ExFileObject(io.BufferedReader):
705
706    def __init__(self, tarfile, tarinfo):
707        fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
708                tarinfo.size, tarinfo.sparse)
709        super().__init__(fileobj)
710#class ExFileObject
711
712#------------------
713# Exported Classes
714#------------------
715class TarInfo(object):
716    """Informational class which holds the details about an
717       archive member given by a tar header block.
718       TarInfo objects are returned by TarFile.getmember(),
719       TarFile.getmembers() and TarFile.gettarinfo() and are
720       usually created internally.
721    """
722
723    __slots__ = dict(
724        name = 'Name of the archive member.',
725        mode = 'Permission bits.',
726        uid = 'User ID of the user who originally stored this member.',
727        gid = 'Group ID of the user who originally stored this member.',
728        size = 'Size in bytes.',
729        mtime = 'Time of last modification.',
730        chksum = 'Header checksum.',
731        type = ('File type. type is usually one of these constants: '
732                'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
733                'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
734        linkname = ('Name of the target file name, which is only present '
735                    'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
736        uname = 'User name.',
737        gname = 'Group name.',
738        devmajor = 'Device major number.',
739        devminor = 'Device minor number.',
740        offset = 'The tar header starts here.',
741        offset_data = "The file's data starts here.",
742        pax_headers = ('A dictionary containing key-value pairs of an '
743                       'associated pax extended header.'),
744        sparse = 'Sparse member information.',
745        tarfile = None,
746        _sparse_structs = None,
747        _link_target = None,
748        )
749
750    def __init__(self, name=""):
751        """Construct a TarInfo object. name is the optional name
752           of the member.
753        """
754        self.name = name        # member name
755        self.mode = 0o644       # file permissions
756        self.uid = 0            # user id
757        self.gid = 0            # group id
758        self.size = 0           # file size
759        self.mtime = 0          # modification time
760        self.chksum = 0         # header checksum
761        self.type = REGTYPE     # member type
762        self.linkname = ""      # link name
763        self.uname = ""         # user name
764        self.gname = ""         # group name
765        self.devmajor = 0       # device major number
766        self.devminor = 0       # device minor number
767
768        self.offset = 0         # the tar header starts here
769        self.offset_data = 0    # the file's data starts here
770
771        self.sparse = None      # sparse member information
772        self.pax_headers = {}   # pax header information
773
774    @property
775    def path(self):
776        'In pax headers, "name" is called "path".'
777        return self.name
778
779    @path.setter
780    def path(self, name):
781        self.name = name
782
783    @property
784    def linkpath(self):
785        'In pax headers, "linkname" is called "linkpath".'
786        return self.linkname
787
788    @linkpath.setter
789    def linkpath(self, linkname):
790        self.linkname = linkname
791
792    def __repr__(self):
793        return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
794
795    def get_info(self):
796        """Return the TarInfo's attributes as a dictionary.
797        """
798        info = {
799            "name":     self.name,
800            "mode":     self.mode & 0o7777,
801            "uid":      self.uid,
802            "gid":      self.gid,
803            "size":     self.size,
804            "mtime":    self.mtime,
805            "chksum":   self.chksum,
806            "type":     self.type,
807            "linkname": self.linkname,
808            "uname":    self.uname,
809            "gname":    self.gname,
810            "devmajor": self.devmajor,
811            "devminor": self.devminor
812        }
813
814        if info["type"] == DIRTYPE and not info["name"].endswith("/"):
815            info["name"] += "/"
816
817        return info
818
819    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
820        """Return a tar header as a string of 512 byte blocks.
821        """
822        info = self.get_info()
823
824        if format == USTAR_FORMAT:
825            return self.create_ustar_header(info, encoding, errors)
826        elif format == GNU_FORMAT:
827            return self.create_gnu_header(info, encoding, errors)
828        elif format == PAX_FORMAT:
829            return self.create_pax_header(info, encoding)
830        else:
831            raise ValueError("invalid format")
832
833    def create_ustar_header(self, info, encoding, errors):
834        """Return the object as a ustar header block.
835        """
836        info["magic"] = POSIX_MAGIC
837
838        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
839            raise ValueError("linkname is too long")
840
841        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
842            info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
843
844        return self._create_header(info, USTAR_FORMAT, encoding, errors)
845
846    def create_gnu_header(self, info, encoding, errors):
847        """Return the object as a GNU header block sequence.
848        """
849        info["magic"] = GNU_MAGIC
850
851        buf = b""
852        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
853            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
854
855        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
856            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
857
858        return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
859
860    def create_pax_header(self, info, encoding):
861        """Return the object as a ustar header block. If it cannot be
862           represented this way, prepend a pax extended header sequence
863           with supplement information.
864        """
865        info["magic"] = POSIX_MAGIC
866        pax_headers = self.pax_headers.copy()
867
868        # Test string fields for values that exceed the field length or cannot
869        # be represented in ASCII encoding.
870        for name, hname, length in (
871                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
872                ("uname", "uname", 32), ("gname", "gname", 32)):
873
874            if hname in pax_headers:
875                # The pax header has priority.
876                continue
877
878            # Try to encode the string as ASCII.
879            try:
880                info[name].encode("ascii", "strict")
881            except UnicodeEncodeError:
882                pax_headers[hname] = info[name]
883                continue
884
885            if len(info[name]) > length:
886                pax_headers[hname] = info[name]
887
888        # Test number fields for values that exceed the field limit or values
889        # that like to be stored as float.
890        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
891            if name in pax_headers:
892                # The pax header has priority. Avoid overflow.
893                info[name] = 0
894                continue
895
896            val = info[name]
897            if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
898                pax_headers[name] = str(val)
899                info[name] = 0
900
901        # Create a pax extended header if necessary.
902        if pax_headers:
903            buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
904        else:
905            buf = b""
906
907        return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
908
909    @classmethod
910    def create_pax_global_header(cls, pax_headers):
911        """Return the object as a pax global header block sequence.
912        """
913        return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
914
915    def _posix_split_name(self, name, encoding, errors):
916        """Split a name longer than 100 chars into a prefix
917           and a name part.
918        """
919        components = name.split("/")
920        for i in range(1, len(components)):
921            prefix = "/".join(components[:i])
922            name = "/".join(components[i:])
923            if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
924                    len(name.encode(encoding, errors)) <= LENGTH_NAME:
925                break
926        else:
927            raise ValueError("name is too long")
928
929        return prefix, name
930
931    @staticmethod
932    def _create_header(info, format, encoding, errors):
933        """Return a header block. info is a dictionary with file
934           information, format must be one of the *_FORMAT constants.
935        """
936        has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)
937        if has_device_fields:
938            devmajor = itn(info.get("devmajor", 0), 8, format)
939            devminor = itn(info.get("devminor", 0), 8, format)
940        else:
941            devmajor = stn("", 8, encoding, errors)
942            devminor = stn("", 8, encoding, errors)
943
944        parts = [
945            stn(info.get("name", ""), 100, encoding, errors),
946            itn(info.get("mode", 0) & 0o7777, 8, format),
947            itn(info.get("uid", 0), 8, format),
948            itn(info.get("gid", 0), 8, format),
949            itn(info.get("size", 0), 12, format),
950            itn(info.get("mtime", 0), 12, format),
951            b"        ", # checksum field
952            info.get("type", REGTYPE),
953            stn(info.get("linkname", ""), 100, encoding, errors),
954            info.get("magic", POSIX_MAGIC),
955            stn(info.get("uname", ""), 32, encoding, errors),
956            stn(info.get("gname", ""), 32, encoding, errors),
957            devmajor,
958            devminor,
959            stn(info.get("prefix", ""), 155, encoding, errors)
960        ]
961
962        buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
963        chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
964        buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
965        return buf
966
967    @staticmethod
968    def _create_payload(payload):
969        """Return the string payload filled with zero bytes
970           up to the next 512 byte border.
971        """
972        blocks, remainder = divmod(len(payload), BLOCKSIZE)
973        if remainder > 0:
974            payload += (BLOCKSIZE - remainder) * NUL
975        return payload
976
977    @classmethod
978    def _create_gnu_long_header(cls, name, type, encoding, errors):
979        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
980           for name.
981        """
982        name = name.encode(encoding, errors) + NUL
983
984        info = {}
985        info["name"] = "././@LongLink"
986        info["type"] = type
987        info["size"] = len(name)
988        info["magic"] = GNU_MAGIC
989
990        # create extended header + name blocks.
991        return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
992                cls._create_payload(name)
993
994    @classmethod
995    def _create_pax_generic_header(cls, pax_headers, type, encoding):
996        """Return a POSIX.1-2008 extended or global header sequence
997           that contains a list of keyword, value pairs. The values
998           must be strings.
999        """
1000        # Check if one of the fields contains surrogate characters and thereby
1001        # forces hdrcharset=BINARY, see _proc_pax() for more information.
1002        binary = False
1003        for keyword, value in pax_headers.items():
1004            try:
1005                value.encode("utf-8", "strict")
1006            except UnicodeEncodeError:
1007                binary = True
1008                break
1009
1010        records = b""
1011        if binary:
1012            # Put the hdrcharset field at the beginning of the header.
1013            records += b"21 hdrcharset=BINARY\n"
1014
1015        for keyword, value in pax_headers.items():
1016            keyword = keyword.encode("utf-8")
1017            if binary:
1018                # Try to restore the original byte representation of `value'.
1019                # Needless to say, that the encoding must match the string.
1020                value = value.encode(encoding, "surrogateescape")
1021            else:
1022                value = value.encode("utf-8")
1023
1024            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1025            n = p = 0
1026            while True:
1027                n = l + len(str(p))
1028                if n == p:
1029                    break
1030                p = n
1031            records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1032
1033        # We use a hardcoded "././@PaxHeader" name like star does
1034        # instead of the one that POSIX recommends.
1035        info = {}
1036        info["name"] = "././@PaxHeader"
1037        info["type"] = type
1038        info["size"] = len(records)
1039        info["magic"] = POSIX_MAGIC
1040
1041        # Create pax header + record blocks.
1042        return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1043                cls._create_payload(records)
1044
1045    @classmethod
1046    def frombuf(cls, buf, encoding, errors):
1047        """Construct a TarInfo object from a 512 byte bytes object.
1048        """
1049        if len(buf) == 0:
1050            raise EmptyHeaderError("empty header")
1051        if len(buf) != BLOCKSIZE:
1052            raise TruncatedHeaderError("truncated header")
1053        if buf.count(NUL) == BLOCKSIZE:
1054            raise EOFHeaderError("end of file header")
1055
1056        chksum = nti(buf[148:156])
1057        if chksum not in calc_chksums(buf):
1058            raise InvalidHeaderError("bad checksum")
1059
1060        obj = cls()
1061        obj.name = nts(buf[0:100], encoding, errors)
1062        obj.mode = nti(buf[100:108])
1063        obj.uid = nti(buf[108:116])
1064        obj.gid = nti(buf[116:124])
1065        obj.size = nti(buf[124:136])
1066        obj.mtime = nti(buf[136:148])
1067        obj.chksum = chksum
1068        obj.type = buf[156:157]
1069        obj.linkname = nts(buf[157:257], encoding, errors)
1070        obj.uname = nts(buf[265:297], encoding, errors)
1071        obj.gname = nts(buf[297:329], encoding, errors)
1072        obj.devmajor = nti(buf[329:337])
1073        obj.devminor = nti(buf[337:345])
1074        prefix = nts(buf[345:500], encoding, errors)
1075
1076        # Old V7 tar format represents a directory as a regular
1077        # file with a trailing slash.
1078        if obj.type == AREGTYPE and obj.name.endswith("/"):
1079            obj.type = DIRTYPE
1080
1081        # The old GNU sparse format occupies some of the unused
1082        # space in the buffer for up to 4 sparse structures.
1083        # Save them for later processing in _proc_sparse().
1084        if obj.type == GNUTYPE_SPARSE:
1085            pos = 386
1086            structs = []
1087            for i in range(4):
1088                try:
1089                    offset = nti(buf[pos:pos + 12])
1090                    numbytes = nti(buf[pos + 12:pos + 24])
1091                except ValueError:
1092                    break
1093                structs.append((offset, numbytes))
1094                pos += 24
1095            isextended = bool(buf[482])
1096            origsize = nti(buf[483:495])
1097            obj._sparse_structs = (structs, isextended, origsize)
1098
1099        # Remove redundant slashes from directories.
1100        if obj.isdir():
1101            obj.name = obj.name.rstrip("/")
1102
1103        # Reconstruct a ustar longname.
1104        if prefix and obj.type not in GNU_TYPES:
1105            obj.name = prefix + "/" + obj.name
1106        return obj
1107
1108    @classmethod
1109    def fromtarfile(cls, tarfile):
1110        """Return the next TarInfo object from TarFile object
1111           tarfile.
1112        """
1113        buf = tarfile.fileobj.read(BLOCKSIZE)
1114        obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1115        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1116        return obj._proc_member(tarfile)
1117
1118    #--------------------------------------------------------------------------
1119    # The following are methods that are called depending on the type of a
1120    # member. The entry point is _proc_member() which can be overridden in a
1121    # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1122    # implement the following
1123    # operations:
1124    # 1. Set self.offset_data to the position where the data blocks begin,
1125    #    if there is data that follows.
1126    # 2. Set tarfile.offset to the position where the next member's header will
1127    #    begin.
1128    # 3. Return self or another valid TarInfo object.
1129    def _proc_member(self, tarfile):
1130        """Choose the right processing method depending on
1131           the type and call it.
1132        """
1133        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1134            return self._proc_gnulong(tarfile)
1135        elif self.type == GNUTYPE_SPARSE:
1136            return self._proc_sparse(tarfile)
1137        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1138            return self._proc_pax(tarfile)
1139        else:
1140            return self._proc_builtin(tarfile)
1141
1142    def _proc_builtin(self, tarfile):
1143        """Process a builtin type or an unknown type which
1144           will be treated as a regular file.
1145        """
1146        self.offset_data = tarfile.fileobj.tell()
1147        offset = self.offset_data
1148        if self.isreg() or self.type not in SUPPORTED_TYPES:
1149            # Skip the following data blocks.
1150            offset += self._block(self.size)
1151        tarfile.offset = offset
1152
1153        # Patch the TarInfo object with saved global
1154        # header information.
1155        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1156
1157        return self
1158
1159    def _proc_gnulong(self, tarfile):
1160        """Process the blocks that hold a GNU longname
1161           or longlink member.
1162        """
1163        buf = tarfile.fileobj.read(self._block(self.size))
1164
1165        # Fetch the next header and process it.
1166        try:
1167            next = self.fromtarfile(tarfile)
1168        except HeaderError as e:
1169            raise SubsequentHeaderError(str(e)) from None
1170
1171        # Patch the TarInfo object from the next header with
1172        # the longname information.
1173        next.offset = self.offset
1174        if self.type == GNUTYPE_LONGNAME:
1175            next.name = nts(buf, tarfile.encoding, tarfile.errors)
1176        elif self.type == GNUTYPE_LONGLINK:
1177            next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1178
1179        return next
1180
1181    def _proc_sparse(self, tarfile):
1182        """Process a GNU sparse header plus extra headers.
1183        """
1184        # We already collected some sparse structures in frombuf().
1185        structs, isextended, origsize = self._sparse_structs
1186        del self._sparse_structs
1187
1188        # Collect sparse structures from extended header blocks.
1189        while isextended:
1190            buf = tarfile.fileobj.read(BLOCKSIZE)
1191            pos = 0
1192            for i in range(21):
1193                try:
1194                    offset = nti(buf[pos:pos + 12])
1195                    numbytes = nti(buf[pos + 12:pos + 24])
1196                except ValueError:
1197                    break
1198                if offset and numbytes:
1199                    structs.append((offset, numbytes))
1200                pos += 24
1201            isextended = bool(buf[504])
1202        self.sparse = structs
1203
1204        self.offset_data = tarfile.fileobj.tell()
1205        tarfile.offset = self.offset_data + self._block(self.size)
1206        self.size = origsize
1207        return self
1208
1209    def _proc_pax(self, tarfile):
1210        """Process an extended or global header as described in
1211           POSIX.1-2008.
1212        """
1213        # Read the header information.
1214        buf = tarfile.fileobj.read(self._block(self.size))
1215
1216        # A pax header stores supplemental information for either
1217        # the following file (extended) or all following files
1218        # (global).
1219        if self.type == XGLTYPE:
1220            pax_headers = tarfile.pax_headers
1221        else:
1222            pax_headers = tarfile.pax_headers.copy()
1223
1224        # Check if the pax header contains a hdrcharset field. This tells us
1225        # the encoding of the path, linkpath, uname and gname fields. Normally,
1226        # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1227        # implementations are allowed to store them as raw binary strings if
1228        # the translation to UTF-8 fails.
1229        match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1230        if match is not None:
1231            pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
1232
1233        # For the time being, we don't care about anything other than "BINARY".
1234        # The only other value that is currently allowed by the standard is
1235        # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1236        hdrcharset = pax_headers.get("hdrcharset")
1237        if hdrcharset == "BINARY":
1238            encoding = tarfile.encoding
1239        else:
1240            encoding = "utf-8"
1241
1242        # Parse pax header information. A record looks like that:
1243        # "%d %s=%s\n" % (length, keyword, value). length is the size
1244        # of the complete record including the length field itself and
1245        # the newline. keyword and value are both UTF-8 encoded strings.
1246        regex = re.compile(br"(\d+) ([^=]+)=")
1247        pos = 0
1248        while True:
1249            match = regex.match(buf, pos)
1250            if not match:
1251                break
1252
1253            length, keyword = match.groups()
1254            length = int(length)
1255            if length == 0:
1256                raise InvalidHeaderError("invalid header")
1257            value = buf[match.end(2) + 1:match.start(1) + length - 1]
1258
1259            # Normally, we could just use "utf-8" as the encoding and "strict"
1260            # as the error handler, but we better not take the risk. For
1261            # example, GNU tar <= 1.23 is known to store filenames it cannot
1262            # translate to UTF-8 as raw strings (unfortunately without a
1263            # hdrcharset=BINARY header).
1264            # We first try the strict standard encoding, and if that fails we
1265            # fall back on the user's encoding and error handler.
1266            keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
1267                    tarfile.errors)
1268            if keyword in PAX_NAME_FIELDS:
1269                value = self._decode_pax_field(value, encoding, tarfile.encoding,
1270                        tarfile.errors)
1271            else:
1272                value = self._decode_pax_field(value, "utf-8", "utf-8",
1273                        tarfile.errors)
1274
1275            pax_headers[keyword] = value
1276            pos += length
1277
1278        # Fetch the next header.
1279        try:
1280            next = self.fromtarfile(tarfile)
1281        except HeaderError as e:
1282            raise SubsequentHeaderError(str(e)) from None
1283
1284        # Process GNU sparse information.
1285        if "GNU.sparse.map" in pax_headers:
1286            # GNU extended sparse format version 0.1.
1287            self._proc_gnusparse_01(next, pax_headers)
1288
1289        elif "GNU.sparse.size" in pax_headers:
1290            # GNU extended sparse format version 0.0.
1291            self._proc_gnusparse_00(next, pax_headers, buf)
1292
1293        elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1294            # GNU extended sparse format version 1.0.
1295            self._proc_gnusparse_10(next, pax_headers, tarfile)
1296
1297        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1298            # Patch the TarInfo object with the extended header info.
1299            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1300            next.offset = self.offset
1301
1302            if "size" in pax_headers:
1303                # If the extended header replaces the size field,
1304                # we need to recalculate the offset where the next
1305                # header starts.
1306                offset = next.offset_data
1307                if next.isreg() or next.type not in SUPPORTED_TYPES:
1308                    offset += next._block(next.size)
1309                tarfile.offset = offset
1310
1311        return next
1312
1313    def _proc_gnusparse_00(self, next, pax_headers, buf):
1314        """Process a GNU tar extended sparse header, version 0.0.
1315        """
1316        offsets = []
1317        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1318            offsets.append(int(match.group(1)))
1319        numbytes = []
1320        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1321            numbytes.append(int(match.group(1)))
1322        next.sparse = list(zip(offsets, numbytes))
1323
1324    def _proc_gnusparse_01(self, next, pax_headers):
1325        """Process a GNU tar extended sparse header, version 0.1.
1326        """
1327        sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1328        next.sparse = list(zip(sparse[::2], sparse[1::2]))
1329
1330    def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1331        """Process a GNU tar extended sparse header, version 1.0.
1332        """
1333        fields = None
1334        sparse = []
1335        buf = tarfile.fileobj.read(BLOCKSIZE)
1336        fields, buf = buf.split(b"\n", 1)
1337        fields = int(fields)
1338        while len(sparse) < fields * 2:
1339            if b"\n" not in buf:
1340                buf += tarfile.fileobj.read(BLOCKSIZE)
1341            number, buf = buf.split(b"\n", 1)
1342            sparse.append(int(number))
1343        next.offset_data = tarfile.fileobj.tell()
1344        next.sparse = list(zip(sparse[::2], sparse[1::2]))
1345
1346    def _apply_pax_info(self, pax_headers, encoding, errors):
1347        """Replace fields with supplemental information from a previous
1348           pax extended or global header.
1349        """
1350        for keyword, value in pax_headers.items():
1351            if keyword == "GNU.sparse.name":
1352                setattr(self, "path", value)
1353            elif keyword == "GNU.sparse.size":
1354                setattr(self, "size", int(value))
1355            elif keyword == "GNU.sparse.realsize":
1356                setattr(self, "size", int(value))
1357            elif keyword in PAX_FIELDS:
1358                if keyword in PAX_NUMBER_FIELDS:
1359                    try:
1360                        value = PAX_NUMBER_FIELDS[keyword](value)
1361                    except ValueError:
1362                        value = 0
1363                if keyword == "path":
1364                    value = value.rstrip("/")
1365                setattr(self, keyword, value)
1366
1367        self.pax_headers = pax_headers.copy()
1368
1369    def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1370        """Decode a single field from a pax record.
1371        """
1372        try:
1373            return value.decode(encoding, "strict")
1374        except UnicodeDecodeError:
1375            return value.decode(fallback_encoding, fallback_errors)
1376
1377    def _block(self, count):
1378        """Round up a byte count by BLOCKSIZE and return it,
1379           e.g. _block(834) => 1024.
1380        """
1381        blocks, remainder = divmod(count, BLOCKSIZE)
1382        if remainder:
1383            blocks += 1
1384        return blocks * BLOCKSIZE
1385
1386    def isreg(self):
1387        'Return True if the Tarinfo object is a regular file.'
1388        return self.type in REGULAR_TYPES
1389
1390    def isfile(self):
1391        'Return True if the Tarinfo object is a regular file.'
1392        return self.isreg()
1393
1394    def isdir(self):
1395        'Return True if it is a directory.'
1396        return self.type == DIRTYPE
1397
1398    def issym(self):
1399        'Return True if it is a symbolic link.'
1400        return self.type == SYMTYPE
1401
1402    def islnk(self):
1403        'Return True if it is a hard link.'
1404        return self.type == LNKTYPE
1405
1406    def ischr(self):
1407        'Return True if it is a character device.'
1408        return self.type == CHRTYPE
1409
1410    def isblk(self):
1411        'Return True if it is a block device.'
1412        return self.type == BLKTYPE
1413
1414    def isfifo(self):
1415        'Return True if it is a FIFO.'
1416        return self.type == FIFOTYPE
1417
1418    def issparse(self):
1419        return self.sparse is not None
1420
1421    def isdev(self):
1422        'Return True if it is one of character device, block device or FIFO.'
1423        return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1424# class TarInfo
1425
1426class TarFile(object):
1427    """The TarFile Class provides an interface to tar archives.
1428    """
1429
1430    debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1431
1432    dereference = False         # If true, add content of linked file to the
1433                                # tar file, else the link.
1434
1435    ignore_zeros = False        # If true, skips empty or invalid blocks and
1436                                # continues processing.
1437
1438    errorlevel = 1              # If 0, fatal errors only appear in debug
1439                                # messages (if debug >= 0). If > 0, errors
1440                                # are passed to the caller as exceptions.
1441
1442    format = DEFAULT_FORMAT     # The format to use when creating an archive.
1443
1444    encoding = ENCODING         # Encoding for 8-bit character strings.
1445
1446    errors = None               # Error handler for unicode conversion.
1447
1448    tarinfo = TarInfo           # The default TarInfo class to use.
1449
1450    fileobject = ExFileObject   # The file-object for extractfile().
1451
1452    def __init__(self, name=None, mode="r", fileobj=None, format=None,
1453            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1454            errors="surrogateescape", pax_headers=None, debug=None,
1455            errorlevel=None, copybufsize=None):
1456        """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1457           read from an existing archive, 'a' to append data to an existing
1458           file or 'w' to create a new file overwriting an existing one. `mode'
1459           defaults to 'r'.
1460           If `fileobj' is given, it is used for reading or writing data. If it
1461           can be determined, `mode' is overridden by `fileobj's mode.
1462           `fileobj' is not closed, when TarFile is closed.
1463        """
1464        modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
1465        if mode not in modes:
1466            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1467        self.mode = mode
1468        self._mode = modes[mode]
1469
1470        if not fileobj:
1471            if self.mode == "a" and not os.path.exists(name):
1472                # Create nonexistent files in append mode.
1473                self.mode = "w"
1474                self._mode = "wb"
1475            fileobj = bltn_open(name, self._mode)
1476            self._extfileobj = False
1477        else:
1478            if (name is None and hasattr(fileobj, "name") and
1479                isinstance(fileobj.name, (str, bytes))):
1480                name = fileobj.name
1481            if hasattr(fileobj, "mode"):
1482                self._mode = fileobj.mode
1483            self._extfileobj = True
1484        self.name = os.path.abspath(name) if name else None
1485        self.fileobj = fileobj
1486
1487        # Init attributes.
1488        if format is not None:
1489            self.format = format
1490        if tarinfo is not None:
1491            self.tarinfo = tarinfo
1492        if dereference is not None:
1493            self.dereference = dereference
1494        if ignore_zeros is not None:
1495            self.ignore_zeros = ignore_zeros
1496        if encoding is not None:
1497            self.encoding = encoding
1498        self.errors = errors
1499
1500        if pax_headers is not None and self.format == PAX_FORMAT:
1501            self.pax_headers = pax_headers
1502        else:
1503            self.pax_headers = {}
1504
1505        if debug is not None:
1506            self.debug = debug
1507        if errorlevel is not None:
1508            self.errorlevel = errorlevel
1509
1510        # Init datastructures.
1511        self.copybufsize = copybufsize
1512        self.closed = False
1513        self.members = []       # list of members as TarInfo objects
1514        self._loaded = False    # flag if all members have been read
1515        self.offset = self.fileobj.tell()
1516                                # current position in the archive file
1517        self.inodes = {}        # dictionary caching the inodes of
1518                                # archive members already added
1519
1520        try:
1521            if self.mode == "r":
1522                self.firstmember = None
1523                self.firstmember = self.next()
1524
1525            if self.mode == "a":
1526                # Move to the end of the archive,
1527                # before the first empty block.
1528                while True:
1529                    self.fileobj.seek(self.offset)
1530                    try:
1531                        tarinfo = self.tarinfo.fromtarfile(self)
1532                        self.members.append(tarinfo)
1533                    except EOFHeaderError:
1534                        self.fileobj.seek(self.offset)
1535                        break
1536                    except HeaderError as e:
1537                        raise ReadError(str(e)) from None
1538
1539            if self.mode in ("a", "w", "x"):
1540                self._loaded = True
1541
1542                if self.pax_headers:
1543                    buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1544                    self.fileobj.write(buf)
1545                    self.offset += len(buf)
1546        except:
1547            if not self._extfileobj:
1548                self.fileobj.close()
1549            self.closed = True
1550            raise
1551
1552    #--------------------------------------------------------------------------
1553    # Below are the classmethods which act as alternate constructors to the
1554    # TarFile class. The open() method is the only one that is needed for
1555    # public use; it is the "super"-constructor and is able to select an
1556    # adequate "sub"-constructor for a particular compression using the mapping
1557    # from OPEN_METH.
1558    #
1559    # This concept allows one to subclass TarFile without losing the comfort of
1560    # the super-constructor. A sub-constructor is registered and made available
1561    # by adding it to the mapping in OPEN_METH.
1562
1563    @classmethod
1564    def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1565        """Open a tar archive for reading, writing or appending. Return
1566           an appropriate TarFile class.
1567
1568           mode:
1569           'r' or 'r:*' open for reading with transparent compression
1570           'r:'         open for reading exclusively uncompressed
1571           'r:gz'       open for reading with gzip compression
1572           'r:bz2'      open for reading with bzip2 compression
1573           'r:xz'       open for reading with lzma compression
1574           'a' or 'a:'  open for appending, creating the file if necessary
1575           'w' or 'w:'  open for writing without compression
1576           'w:gz'       open for writing with gzip compression
1577           'w:bz2'      open for writing with bzip2 compression
1578           'w:xz'       open for writing with lzma compression
1579
1580           'x' or 'x:'  create a tarfile exclusively without compression, raise
1581                        an exception if the file is already created
1582           'x:gz'       create a gzip compressed tarfile, raise an exception
1583                        if the file is already created
1584           'x:bz2'      create a bzip2 compressed tarfile, raise an exception
1585                        if the file is already created
1586           'x:xz'       create an lzma compressed tarfile, raise an exception
1587                        if the file is already created
1588
1589           'r|*'        open a stream of tar blocks with transparent compression
1590           'r|'         open an uncompressed stream of tar blocks for reading
1591           'r|gz'       open a gzip compressed stream of tar blocks
1592           'r|bz2'      open a bzip2 compressed stream of tar blocks
1593           'r|xz'       open an lzma compressed stream of tar blocks
1594           'w|'         open an uncompressed stream for writing
1595           'w|gz'       open a gzip compressed stream for writing
1596           'w|bz2'      open a bzip2 compressed stream for writing
1597           'w|xz'       open an lzma compressed stream for writing
1598        """
1599
1600        if not name and not fileobj:
1601            raise ValueError("nothing to open")
1602
1603        if mode in ("r", "r:*"):
1604            # Find out which *open() is appropriate for opening the file.
1605            def not_compressed(comptype):
1606                return cls.OPEN_METH[comptype] == 'taropen'
1607            error_msgs = []
1608            for comptype in sorted(cls.OPEN_METH, key=not_compressed):
1609                func = getattr(cls, cls.OPEN_METH[comptype])
1610                if fileobj is not None:
1611                    saved_pos = fileobj.tell()
1612                try:
1613                    return func(name, "r", fileobj, **kwargs)
1614                except (ReadError, CompressionError) as e:
1615                    error_msgs.append(f'- method {comptype}: {e!r}')
1616                    if fileobj is not None:
1617                        fileobj.seek(saved_pos)
1618                    continue
1619            error_msgs_summary = '\n'.join(error_msgs)
1620            raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}")
1621
1622        elif ":" in mode:
1623            filemode, comptype = mode.split(":", 1)
1624            filemode = filemode or "r"
1625            comptype = comptype or "tar"
1626
1627            # Select the *open() function according to
1628            # given compression.
1629            if comptype in cls.OPEN_METH:
1630                func = getattr(cls, cls.OPEN_METH[comptype])
1631            else:
1632                raise CompressionError("unknown compression type %r" % comptype)
1633            return func(name, filemode, fileobj, **kwargs)
1634
1635        elif "|" in mode:
1636            filemode, comptype = mode.split("|", 1)
1637            filemode = filemode or "r"
1638            comptype = comptype or "tar"
1639
1640            if filemode not in ("r", "w"):
1641                raise ValueError("mode must be 'r' or 'w'")
1642
1643            stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1644            try:
1645                t = cls(name, filemode, stream, **kwargs)
1646            except:
1647                stream.close()
1648                raise
1649            t._extfileobj = False
1650            return t
1651
1652        elif mode in ("a", "w", "x"):
1653            return cls.taropen(name, mode, fileobj, **kwargs)
1654
1655        raise ValueError("undiscernible mode")
1656
1657    @classmethod
1658    def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1659        """Open uncompressed tar archive name for reading or writing.
1660        """
1661        if mode not in ("r", "a", "w", "x"):
1662            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1663        return cls(name, mode, fileobj, **kwargs)
1664
1665    @classmethod
1666    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1667        """Open gzip compressed tar archive name for reading or writing.
1668           Appending is not allowed.
1669        """
1670        if mode not in ("r", "w", "x"):
1671            raise ValueError("mode must be 'r', 'w' or 'x'")
1672
1673        try:
1674            from gzip import GzipFile
1675        except ImportError:
1676            raise CompressionError("gzip module is not available") from None
1677
1678        try:
1679            fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
1680        except OSError as e:
1681            if fileobj is not None and mode == 'r':
1682                raise ReadError("not a gzip file") from e
1683            raise
1684
1685        try:
1686            t = cls.taropen(name, mode, fileobj, **kwargs)
1687        except OSError as e:
1688            fileobj.close()
1689            if mode == 'r':
1690                raise ReadError("not a gzip file") from e
1691            raise
1692        except:
1693            fileobj.close()
1694            raise
1695        t._extfileobj = False
1696        return t
1697
1698    @classmethod
1699    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1700        """Open bzip2 compressed tar archive name for reading or writing.
1701           Appending is not allowed.
1702        """
1703        if mode not in ("r", "w", "x"):
1704            raise ValueError("mode must be 'r', 'w' or 'x'")
1705
1706        try:
1707            from bz2 import BZ2File
1708        except ImportError:
1709            raise CompressionError("bz2 module is not available") from None
1710
1711        fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)
1712
1713        try:
1714            t = cls.taropen(name, mode, fileobj, **kwargs)
1715        except (OSError, EOFError) as e:
1716            fileobj.close()
1717            if mode == 'r':
1718                raise ReadError("not a bzip2 file") from e
1719            raise
1720        except:
1721            fileobj.close()
1722            raise
1723        t._extfileobj = False
1724        return t
1725
1726    @classmethod
1727    def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
1728        """Open lzma compressed tar archive name for reading or writing.
1729           Appending is not allowed.
1730        """
1731        if mode not in ("r", "w", "x"):
1732            raise ValueError("mode must be 'r', 'w' or 'x'")
1733
1734        try:
1735            from lzma import LZMAFile, LZMAError
1736        except ImportError:
1737            raise CompressionError("lzma module is not available") from None
1738
1739        fileobj = LZMAFile(fileobj or name, mode, preset=preset)
1740
1741        try:
1742            t = cls.taropen(name, mode, fileobj, **kwargs)
1743        except (LZMAError, EOFError) as e:
1744            fileobj.close()
1745            if mode == 'r':
1746                raise ReadError("not an lzma file") from e
1747            raise
1748        except:
1749            fileobj.close()
1750            raise
1751        t._extfileobj = False
1752        return t
1753
1754    # All *open() methods are registered here.
1755    OPEN_METH = {
1756        "tar": "taropen",   # uncompressed tar
1757        "gz":  "gzopen",    # gzip compressed tar
1758        "bz2": "bz2open",   # bzip2 compressed tar
1759        "xz":  "xzopen"     # lzma compressed tar
1760    }
1761
1762    #--------------------------------------------------------------------------
1763    # The public methods which TarFile provides:
1764
1765    def close(self):
1766        """Close the TarFile. In write-mode, two finishing zero blocks are
1767           appended to the archive.
1768        """
1769        if self.closed:
1770            return
1771
1772        self.closed = True
1773        try:
1774            if self.mode in ("a", "w", "x"):
1775                self.fileobj.write(NUL * (BLOCKSIZE * 2))
1776                self.offset += (BLOCKSIZE * 2)
1777                # fill up the end with zero-blocks
1778                # (like option -b20 for tar does)
1779                blocks, remainder = divmod(self.offset, RECORDSIZE)
1780                if remainder > 0:
1781                    self.fileobj.write(NUL * (RECORDSIZE - remainder))
1782        finally:
1783            if not self._extfileobj:
1784                self.fileobj.close()
1785
1786    def getmember(self, name):
1787        """Return a TarInfo object for member `name'. If `name' can not be
1788           found in the archive, KeyError is raised. If a member occurs more
1789           than once in the archive, its last occurrence is assumed to be the
1790           most up-to-date version.
1791        """
1792        tarinfo = self._getmember(name)
1793        if tarinfo is None:
1794            raise KeyError("filename %r not found" % name)
1795        return tarinfo
1796
1797    def getmembers(self):
1798        """Return the members of the archive as a list of TarInfo objects. The
1799           list has the same order as the members in the archive.
1800        """
1801        self._check()
1802        if not self._loaded:    # if we want to obtain a list of
1803            self._load()        # all members, we first have to
1804                                # scan the whole archive.
1805        return self.members
1806
1807    def getnames(self):
1808        """Return the members of the archive as a list of their names. It has
1809           the same order as the list returned by getmembers().
1810        """
1811        return [tarinfo.name for tarinfo in self.getmembers()]
1812
1813    def gettarinfo(self, name=None, arcname=None, fileobj=None):
1814        """Create a TarInfo object from the result of os.stat or equivalent
1815           on an existing file. The file is either named by `name', or
1816           specified as a file object `fileobj' with a file descriptor. If
1817           given, `arcname' specifies an alternative name for the file in the
1818           archive, otherwise, the name is taken from the 'name' attribute of
1819           'fileobj', or the 'name' argument. The name should be a text
1820           string.
1821        """
1822        self._check("awx")
1823
1824        # When fileobj is given, replace name by
1825        # fileobj's real name.
1826        if fileobj is not None:
1827            name = fileobj.name
1828
1829        # Building the name of the member in the archive.
1830        # Backward slashes are converted to forward slashes,
1831        # Absolute paths are turned to relative paths.
1832        if arcname is None:
1833            arcname = name
1834        drv, arcname = os.path.splitdrive(arcname)
1835        arcname = arcname.replace(os.sep, "/")
1836        arcname = arcname.lstrip("/")
1837
1838        # Now, fill the TarInfo object with
1839        # information specific for the file.
1840        tarinfo = self.tarinfo()
1841        tarinfo.tarfile = self  # Not needed
1842
1843        # Use os.stat or os.lstat, depending on if symlinks shall be resolved.
1844        if fileobj is None:
1845            if not self.dereference:
1846                statres = os.lstat(name)
1847            else:
1848                statres = os.stat(name)
1849        else:
1850            statres = os.fstat(fileobj.fileno())
1851        linkname = ""
1852
1853        stmd = statres.st_mode
1854        if stat.S_ISREG(stmd):
1855            inode = (statres.st_ino, statres.st_dev)
1856            if not self.dereference and statres.st_nlink > 1 and \
1857                    inode in self.inodes and arcname != self.inodes[inode]:
1858                # Is it a hardlink to an already
1859                # archived file?
1860                type = LNKTYPE
1861                linkname = self.inodes[inode]
1862            else:
1863                # The inode is added only if its valid.
1864                # For win32 it is always 0.
1865                type = REGTYPE
1866                if inode[0]:
1867                    self.inodes[inode] = arcname
1868        elif stat.S_ISDIR(stmd):
1869            type = DIRTYPE
1870        elif stat.S_ISFIFO(stmd):
1871            type = FIFOTYPE
1872        elif stat.S_ISLNK(stmd):
1873            type = SYMTYPE
1874            linkname = os.readlink(name)
1875        elif stat.S_ISCHR(stmd):
1876            type = CHRTYPE
1877        elif stat.S_ISBLK(stmd):
1878            type = BLKTYPE
1879        else:
1880            return None
1881
1882        # Fill the TarInfo object with all
1883        # information we can get.
1884        tarinfo.name = arcname
1885        tarinfo.mode = stmd
1886        tarinfo.uid = statres.st_uid
1887        tarinfo.gid = statres.st_gid
1888        if type == REGTYPE:
1889            tarinfo.size = statres.st_size
1890        else:
1891            tarinfo.size = 0
1892        tarinfo.mtime = statres.st_mtime
1893        tarinfo.type = type
1894        tarinfo.linkname = linkname
1895        if pwd:
1896            try:
1897                tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1898            except KeyError:
1899                pass
1900        if grp:
1901            try:
1902                tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1903            except KeyError:
1904                pass
1905
1906        if type in (CHRTYPE, BLKTYPE):
1907            if hasattr(os, "major") and hasattr(os, "minor"):
1908                tarinfo.devmajor = os.major(statres.st_rdev)
1909                tarinfo.devminor = os.minor(statres.st_rdev)
1910        return tarinfo
1911
1912    def list(self, verbose=True, *, members=None):
1913        """Print a table of contents to sys.stdout. If `verbose' is False, only
1914           the names of the members are printed. If it is True, an `ls -l'-like
1915           output is produced. `members' is optional and must be a subset of the
1916           list returned by getmembers().
1917        """
1918        self._check()
1919
1920        if members is None:
1921            members = self
1922        for tarinfo in members:
1923            if verbose:
1924                _safe_print(stat.filemode(tarinfo.mode))
1925                _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1926                                       tarinfo.gname or tarinfo.gid))
1927                if tarinfo.ischr() or tarinfo.isblk():
1928                    _safe_print("%10s" %
1929                            ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
1930                else:
1931                    _safe_print("%10d" % tarinfo.size)
1932                _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1933                            % time.localtime(tarinfo.mtime)[:6])
1934
1935            _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
1936
1937            if verbose:
1938                if tarinfo.issym():
1939                    _safe_print("-> " + tarinfo.linkname)
1940                if tarinfo.islnk():
1941                    _safe_print("link to " + tarinfo.linkname)
1942            print()
1943
1944    def add(self, name, arcname=None, recursive=True, *, filter=None):
1945        """Add the file `name' to the archive. `name' may be any type of file
1946           (directory, fifo, symbolic link, etc.). If given, `arcname'
1947           specifies an alternative name for the file in the archive.
1948           Directories are added recursively by default. This can be avoided by
1949           setting `recursive' to False. `filter' is a function
1950           that expects a TarInfo object argument and returns the changed
1951           TarInfo object, if it returns None the TarInfo object will be
1952           excluded from the archive.
1953        """
1954        self._check("awx")
1955
1956        if arcname is None:
1957            arcname = name
1958
1959        # Skip if somebody tries to archive the archive...
1960        if self.name is not None and os.path.abspath(name) == self.name:
1961            self._dbg(2, "tarfile: Skipped %r" % name)
1962            return
1963
1964        self._dbg(1, name)
1965
1966        # Create a TarInfo object from the file.
1967        tarinfo = self.gettarinfo(name, arcname)
1968
1969        if tarinfo is None:
1970            self._dbg(1, "tarfile: Unsupported type %r" % name)
1971            return
1972
1973        # Change or exclude the TarInfo object.
1974        if filter is not None:
1975            tarinfo = filter(tarinfo)
1976            if tarinfo is None:
1977                self._dbg(2, "tarfile: Excluded %r" % name)
1978                return
1979
1980        # Append the tar header and data to the archive.
1981        if tarinfo.isreg():
1982            with bltn_open(name, "rb") as f:
1983                self.addfile(tarinfo, f)
1984
1985        elif tarinfo.isdir():
1986            self.addfile(tarinfo)
1987            if recursive:
1988                for f in sorted(os.listdir(name)):
1989                    self.add(os.path.join(name, f), os.path.join(arcname, f),
1990                            recursive, filter=filter)
1991
1992        else:
1993            self.addfile(tarinfo)
1994
1995    def addfile(self, tarinfo, fileobj=None):
1996        """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1997           given, it should be a binary file, and tarinfo.size bytes are read
1998           from it and added to the archive. You can create TarInfo objects
1999           directly, or by using gettarinfo().
2000        """
2001        self._check("awx")
2002
2003        tarinfo = copy.copy(tarinfo)
2004
2005        buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2006        self.fileobj.write(buf)
2007        self.offset += len(buf)
2008        bufsize=self.copybufsize
2009        # If there's data to follow, append it.
2010        if fileobj is not None:
2011            copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
2012            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2013            if remainder > 0:
2014                self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2015                blocks += 1
2016            self.offset += blocks * BLOCKSIZE
2017
2018        self.members.append(tarinfo)
2019
2020    def extractall(self, path=".", members=None, *, numeric_owner=False):
2021        """Extract all members from the archive to the current working
2022           directory and set owner, modification time and permissions on
2023           directories afterwards. `path' specifies a different directory
2024           to extract to. `members' is optional and must be a subset of the
2025           list returned by getmembers(). If `numeric_owner` is True, only
2026           the numbers for user/group names are used and not the names.
2027        """
2028        directories = []
2029
2030        if members is None:
2031            members = self
2032
2033        for tarinfo in members:
2034            if tarinfo.isdir():
2035                # Extract directories with a safe mode.
2036                directories.append(tarinfo)
2037                tarinfo = copy.copy(tarinfo)
2038                tarinfo.mode = 0o700
2039            # Do not set_attrs directories, as we will do that further down
2040            self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
2041                         numeric_owner=numeric_owner)
2042
2043        # Reverse sort directories.
2044        directories.sort(key=lambda a: a.name)
2045        directories.reverse()
2046
2047        # Set correct owner, mtime and filemode on directories.
2048        for tarinfo in directories:
2049            dirpath = os.path.join(path, tarinfo.name)
2050            try:
2051                self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
2052                self.utime(tarinfo, dirpath)
2053                self.chmod(tarinfo, dirpath)
2054            except ExtractError as e:
2055                if self.errorlevel > 1:
2056                    raise
2057                else:
2058                    self._dbg(1, "tarfile: %s" % e)
2059
2060    def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
2061        """Extract a member from the archive to the current working directory,
2062           using its full name. Its file information is extracted as accurately
2063           as possible. `member' may be a filename or a TarInfo object. You can
2064           specify a different directory using `path'. File attributes (owner,
2065           mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2066           is True, only the numbers for user/group names are used and not
2067           the names.
2068        """
2069        self._check("r")
2070
2071        if isinstance(member, str):
2072            tarinfo = self.getmember(member)
2073        else:
2074            tarinfo = member
2075
2076        # Prepare the link target for makelink().
2077        if tarinfo.islnk():
2078            tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2079
2080        try:
2081            self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2082                                 set_attrs=set_attrs,
2083                                 numeric_owner=numeric_owner)
2084        except OSError as e:
2085            if self.errorlevel > 0:
2086                raise
2087            else:
2088                if e.filename is None:
2089                    self._dbg(1, "tarfile: %s" % e.strerror)
2090                else:
2091                    self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2092        except ExtractError as e:
2093            if self.errorlevel > 1:
2094                raise
2095            else:
2096                self._dbg(1, "tarfile: %s" % e)
2097
2098    def extractfile(self, member):
2099        """Extract a member from the archive as a file object. `member' may be
2100           a filename or a TarInfo object. If `member' is a regular file or
2101           a link, an io.BufferedReader object is returned. For all other
2102           existing members, None is returned. If `member' does not appear
2103           in the archive, KeyError is raised.
2104        """
2105        self._check("r")
2106
2107        if isinstance(member, str):
2108            tarinfo = self.getmember(member)
2109        else:
2110            tarinfo = member
2111
2112        if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2113            # Members with unknown types are treated as regular files.
2114            return self.fileobject(self, tarinfo)
2115
2116        elif tarinfo.islnk() or tarinfo.issym():
2117            if isinstance(self.fileobj, _Stream):
2118                # A small but ugly workaround for the case that someone tries
2119                # to extract a (sym)link as a file-object from a non-seekable
2120                # stream of tar blocks.
2121                raise StreamError("cannot extract (sym)link as file object")
2122            else:
2123                # A (sym)link's file object is its target's file object.
2124                return self.extractfile(self._find_link_target(tarinfo))
2125        else:
2126            # If there's no data associated with the member (directory, chrdev,
2127            # blkdev, etc.), return None instead of a file object.
2128            return None
2129
2130    def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2131                        numeric_owner=False):
2132        """Extract the TarInfo object tarinfo to a physical
2133           file called targetpath.
2134        """
2135        # Fetch the TarInfo object for the given name
2136        # and build the destination pathname, replacing
2137        # forward slashes to platform specific separators.
2138        targetpath = targetpath.rstrip("/")
2139        targetpath = targetpath.replace("/", os.sep)
2140
2141        # Create all upper directories.
2142        upperdirs = os.path.dirname(targetpath)
2143        if upperdirs and not os.path.exists(upperdirs):
2144            # Create directories that are not part of the archive with
2145            # default permissions.
2146            os.makedirs(upperdirs)
2147
2148        if tarinfo.islnk() or tarinfo.issym():
2149            self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2150        else:
2151            self._dbg(1, tarinfo.name)
2152
2153        if tarinfo.isreg():
2154            self.makefile(tarinfo, targetpath)
2155        elif tarinfo.isdir():
2156            self.makedir(tarinfo, targetpath)
2157        elif tarinfo.isfifo():
2158            self.makefifo(tarinfo, targetpath)
2159        elif tarinfo.ischr() or tarinfo.isblk():
2160            self.makedev(tarinfo, targetpath)
2161        elif tarinfo.islnk() or tarinfo.issym():
2162            self.makelink(tarinfo, targetpath)
2163        elif tarinfo.type not in SUPPORTED_TYPES:
2164            self.makeunknown(tarinfo, targetpath)
2165        else:
2166            self.makefile(tarinfo, targetpath)
2167
2168        if set_attrs:
2169            self.chown(tarinfo, targetpath, numeric_owner)
2170            if not tarinfo.issym():
2171                self.chmod(tarinfo, targetpath)
2172                self.utime(tarinfo, targetpath)
2173
2174    #--------------------------------------------------------------------------
2175    # Below are the different file methods. They are called via
2176    # _extract_member() when extract() is called. They can be replaced in a
2177    # subclass to implement other functionality.
2178
2179    def makedir(self, tarinfo, targetpath):
2180        """Make a directory called targetpath.
2181        """
2182        try:
2183            # Use a safe mode for the directory, the real mode is set
2184            # later in _extract_member().
2185            os.mkdir(targetpath, 0o700)
2186        except FileExistsError:
2187            pass
2188
2189    def makefile(self, tarinfo, targetpath):
2190        """Make a file called targetpath.
2191        """
2192        source = self.fileobj
2193        source.seek(tarinfo.offset_data)
2194        bufsize = self.copybufsize
2195        with bltn_open(targetpath, "wb") as target:
2196            if tarinfo.sparse is not None:
2197                for offset, size in tarinfo.sparse:
2198                    target.seek(offset)
2199                    copyfileobj(source, target, size, ReadError, bufsize)
2200                target.seek(tarinfo.size)
2201                target.truncate()
2202            else:
2203                copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
2204
2205    def makeunknown(self, tarinfo, targetpath):
2206        """Make a file from a TarInfo object with an unknown type
2207           at targetpath.
2208        """
2209        self.makefile(tarinfo, targetpath)
2210        self._dbg(1, "tarfile: Unknown file type %r, " \
2211                     "extracted as regular file." % tarinfo.type)
2212
2213    def makefifo(self, tarinfo, targetpath):
2214        """Make a fifo called targetpath.
2215        """
2216        if hasattr(os, "mkfifo"):
2217            os.mkfifo(targetpath)
2218        else:
2219            raise ExtractError("fifo not supported by system")
2220
2221    def makedev(self, tarinfo, targetpath):
2222        """Make a character or block device called targetpath.
2223        """
2224        if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2225            raise ExtractError("special devices not supported by system")
2226
2227        mode = tarinfo.mode
2228        if tarinfo.isblk():
2229            mode |= stat.S_IFBLK
2230        else:
2231            mode |= stat.S_IFCHR
2232
2233        os.mknod(targetpath, mode,
2234                 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2235
2236    def makelink(self, tarinfo, targetpath):
2237        """Make a (symbolic) link called targetpath. If it cannot be created
2238          (platform limitation), we try to make a copy of the referenced file
2239          instead of a link.
2240        """
2241        try:
2242            # For systems that support symbolic and hard links.
2243            if tarinfo.issym():
2244                if os.path.lexists(targetpath):
2245                    # Avoid FileExistsError on following os.symlink.
2246                    os.unlink(targetpath)
2247                os.symlink(tarinfo.linkname, targetpath)
2248            else:
2249                # See extract().
2250                if os.path.exists(tarinfo._link_target):
2251                    os.link(tarinfo._link_target, targetpath)
2252                else:
2253                    self._extract_member(self._find_link_target(tarinfo),
2254                                         targetpath)
2255        except symlink_exception:
2256            try:
2257                self._extract_member(self._find_link_target(tarinfo),
2258                                     targetpath)
2259            except KeyError:
2260                raise ExtractError("unable to resolve link inside archive") from None
2261
2262    def chown(self, tarinfo, targetpath, numeric_owner):
2263        """Set owner of targetpath according to tarinfo. If numeric_owner
2264           is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2265           is False, fall back to .gid/.uid when the search based on name
2266           fails.
2267        """
2268        if hasattr(os, "geteuid") and os.geteuid() == 0:
2269            # We have to be root to do so.
2270            g = tarinfo.gid
2271            u = tarinfo.uid
2272            if not numeric_owner:
2273                try:
2274                    if grp:
2275                        g = grp.getgrnam(tarinfo.gname)[2]
2276                except KeyError:
2277                    pass
2278                try:
2279                    if pwd:
2280                        u = pwd.getpwnam(tarinfo.uname)[2]
2281                except KeyError:
2282                    pass
2283            try:
2284                if tarinfo.issym() and hasattr(os, "lchown"):
2285                    os.lchown(targetpath, u, g)
2286                else:
2287                    os.chown(targetpath, u, g)
2288            except OSError as e:
2289                raise ExtractError("could not change owner") from e
2290
2291    def chmod(self, tarinfo, targetpath):
2292        """Set file permissions of targetpath according to tarinfo.
2293        """
2294        try:
2295            os.chmod(targetpath, tarinfo.mode)
2296        except OSError as e:
2297            raise ExtractError("could not change mode") from e
2298
2299    def utime(self, tarinfo, targetpath):
2300        """Set modification time of targetpath according to tarinfo.
2301        """
2302        if not hasattr(os, 'utime'):
2303            return
2304        try:
2305            os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2306        except OSError as e:
2307            raise ExtractError("could not change modification time") from e
2308
2309    #--------------------------------------------------------------------------
2310    def next(self):
2311        """Return the next member of the archive as a TarInfo object, when
2312           TarFile is opened for reading. Return None if there is no more
2313           available.
2314        """
2315        self._check("ra")
2316        if self.firstmember is not None:
2317            m = self.firstmember
2318            self.firstmember = None
2319            return m
2320
2321        # Advance the file pointer.
2322        if self.offset != self.fileobj.tell():
2323            self.fileobj.seek(self.offset - 1)
2324            if not self.fileobj.read(1):
2325                raise ReadError("unexpected end of data")
2326
2327        # Read the next block.
2328        tarinfo = None
2329        while True:
2330            try:
2331                tarinfo = self.tarinfo.fromtarfile(self)
2332            except EOFHeaderError as e:
2333                if self.ignore_zeros:
2334                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2335                    self.offset += BLOCKSIZE
2336                    continue
2337            except InvalidHeaderError as e:
2338                if self.ignore_zeros:
2339                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2340                    self.offset += BLOCKSIZE
2341                    continue
2342                elif self.offset == 0:
2343                    raise ReadError(str(e)) from None
2344            except EmptyHeaderError:
2345                if self.offset == 0:
2346                    raise ReadError("empty file") from None
2347            except TruncatedHeaderError as e:
2348                if self.offset == 0:
2349                    raise ReadError(str(e)) from None
2350            except SubsequentHeaderError as e:
2351                raise ReadError(str(e)) from None
2352            except Exception as e:
2353                try:
2354                    import zlib
2355                    if isinstance(e, zlib.error):
2356                        raise ReadError(f'zlib error: {e}') from None
2357                    else:
2358                        raise e
2359                except ImportError:
2360                    raise e
2361            break
2362
2363        if tarinfo is not None:
2364            self.members.append(tarinfo)
2365        else:
2366            self._loaded = True
2367
2368        return tarinfo
2369
2370    #--------------------------------------------------------------------------
2371    # Little helper methods:
2372
2373    def _getmember(self, name, tarinfo=None, normalize=False):
2374        """Find an archive member by name from bottom to top.
2375           If tarinfo is given, it is used as the starting point.
2376        """
2377        # Ensure that all members have been loaded.
2378        members = self.getmembers()
2379
2380        # Limit the member search list up to tarinfo.
2381        if tarinfo is not None:
2382            members = members[:members.index(tarinfo)]
2383
2384        if normalize:
2385            name = os.path.normpath(name)
2386
2387        for member in reversed(members):
2388            if normalize:
2389                member_name = os.path.normpath(member.name)
2390            else:
2391                member_name = member.name
2392
2393            if name == member_name:
2394                return member
2395
2396    def _load(self):
2397        """Read through the entire archive file and look for readable
2398           members.
2399        """
2400        while True:
2401            tarinfo = self.next()
2402            if tarinfo is None:
2403                break
2404        self._loaded = True
2405
2406    def _check(self, mode=None):
2407        """Check if TarFile is still open, and if the operation's mode
2408           corresponds to TarFile's mode.
2409        """
2410        if self.closed:
2411            raise OSError("%s is closed" % self.__class__.__name__)
2412        if mode is not None and self.mode not in mode:
2413            raise OSError("bad operation for mode %r" % self.mode)
2414
2415    def _find_link_target(self, tarinfo):
2416        """Find the target member of a symlink or hardlink member in the
2417           archive.
2418        """
2419        if tarinfo.issym():
2420            # Always search the entire archive.
2421            linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
2422            limit = None
2423        else:
2424            # Search the archive before the link, because a hard link is
2425            # just a reference to an already archived file.
2426            linkname = tarinfo.linkname
2427            limit = tarinfo
2428
2429        member = self._getmember(linkname, tarinfo=limit, normalize=True)
2430        if member is None:
2431            raise KeyError("linkname %r not found" % linkname)
2432        return member
2433
2434    def __iter__(self):
2435        """Provide an iterator object.
2436        """
2437        if self._loaded:
2438            yield from self.members
2439            return
2440
2441        # Yield items using TarFile's next() method.
2442        # When all members have been read, set TarFile as _loaded.
2443        index = 0
2444        # Fix for SF #1100429: Under rare circumstances it can
2445        # happen that getmembers() is called during iteration,
2446        # which will have already exhausted the next() method.
2447        if self.firstmember is not None:
2448            tarinfo = self.next()
2449            index += 1
2450            yield tarinfo
2451
2452        while True:
2453            if index < len(self.members):
2454                tarinfo = self.members[index]
2455            elif not self._loaded:
2456                tarinfo = self.next()
2457                if not tarinfo:
2458                    self._loaded = True
2459                    return
2460            else:
2461                return
2462            index += 1
2463            yield tarinfo
2464
2465    def _dbg(self, level, msg):
2466        """Write debugging output to sys.stderr.
2467        """
2468        if level <= self.debug:
2469            print(msg, file=sys.stderr)
2470
2471    def __enter__(self):
2472        self._check()
2473        return self
2474
2475    def __exit__(self, type, value, traceback):
2476        if type is None:
2477            self.close()
2478        else:
2479            # An exception occurred. We must not call close() because
2480            # it would try to write end-of-archive blocks and padding.
2481            if not self._extfileobj:
2482                self.fileobj.close()
2483            self.closed = True
2484
2485#--------------------
2486# exported functions
2487#--------------------
2488def is_tarfile(name):
2489    """Return True if name points to a tar archive that we
2490       are able to handle, else return False.
2491
2492       'name' should be a string, file, or file-like object.
2493    """
2494    try:
2495        if hasattr(name, "read"):
2496            t = open(fileobj=name)
2497        else:
2498            t = open(name)
2499        t.close()
2500        return True
2501    except TarError:
2502        return False
2503
2504open = TarFile.open
2505
2506
2507def main():
2508    import argparse
2509
2510    description = 'A simple command-line interface for tarfile module.'
2511    parser = argparse.ArgumentParser(description=description)
2512    parser.add_argument('-v', '--verbose', action='store_true', default=False,
2513                        help='Verbose output')
2514    group = parser.add_mutually_exclusive_group(required=True)
2515    group.add_argument('-l', '--list', metavar='<tarfile>',
2516                       help='Show listing of a tarfile')
2517    group.add_argument('-e', '--extract', nargs='+',
2518                       metavar=('<tarfile>', '<output_dir>'),
2519                       help='Extract tarfile into target dir')
2520    group.add_argument('-c', '--create', nargs='+',
2521                       metavar=('<name>', '<file>'),
2522                       help='Create tarfile from sources')
2523    group.add_argument('-t', '--test', metavar='<tarfile>',
2524                       help='Test if a tarfile is valid')
2525    args = parser.parse_args()
2526
2527    if args.test is not None:
2528        src = args.test
2529        if is_tarfile(src):
2530            with open(src, 'r') as tar:
2531                tar.getmembers()
2532                print(tar.getmembers(), file=sys.stderr)
2533            if args.verbose:
2534                print('{!r} is a tar archive.'.format(src))
2535        else:
2536            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2537
2538    elif args.list is not None:
2539        src = args.list
2540        if is_tarfile(src):
2541            with TarFile.open(src, 'r:*') as tf:
2542                tf.list(verbose=args.verbose)
2543        else:
2544            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2545
2546    elif args.extract is not None:
2547        if len(args.extract) == 1:
2548            src = args.extract[0]
2549            curdir = os.curdir
2550        elif len(args.extract) == 2:
2551            src, curdir = args.extract
2552        else:
2553            parser.exit(1, parser.format_help())
2554
2555        if is_tarfile(src):
2556            with TarFile.open(src, 'r:*') as tf:
2557                tf.extractall(path=curdir)
2558            if args.verbose:
2559                if curdir == '.':
2560                    msg = '{!r} file is extracted.'.format(src)
2561                else:
2562                    msg = ('{!r} file is extracted '
2563                           'into {!r} directory.').format(src, curdir)
2564                print(msg)
2565        else:
2566            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2567
2568    elif args.create is not None:
2569        tar_name = args.create.pop(0)
2570        _, ext = os.path.splitext(tar_name)
2571        compressions = {
2572            # gz
2573            '.gz': 'gz',
2574            '.tgz': 'gz',
2575            # xz
2576            '.xz': 'xz',
2577            '.txz': 'xz',
2578            # bz2
2579            '.bz2': 'bz2',
2580            '.tbz': 'bz2',
2581            '.tbz2': 'bz2',
2582            '.tb2': 'bz2',
2583        }
2584        tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2585        tar_files = args.create
2586
2587        with TarFile.open(tar_name, tar_mode) as tf:
2588            for file_name in tar_files:
2589                tf.add(file_name)
2590
2591        if args.verbose:
2592            print('{!r} file created.'.format(tar_name))
2593
2594if __name__ == '__main__':
2595    main()
2596