• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
5# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
6# All rights reserved.
7#
8# Permission  is  hereby granted,  free  of charge,  to  any person
9# obtaining a  copy of  this software  and associated documentation
10# files  (the  "Software"),  to   deal  in  the  Software   without
11# restriction,  including  without limitation  the  rights to  use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies  of  the  Software,  and to  permit  persons  to  whom the
14# Software  is  furnished  to  do  so,  subject  to  the  following
15# conditions:
16#
17# The above copyright  notice and this  permission notice shall  be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
21# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
22# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
23# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
24# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
25# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
32version     = "0.9.0"
33__author__  = "Lars Gust\u00e4bel (lars@gustaebel.de)"
34__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
35
36#---------
37# Imports
38#---------
39from builtins import open as bltn_open
40import sys
41import os
42import io
43import shutil
44import stat
45import time
46import struct
47import copy
48import re
49
50try:
51    import pwd
52except ImportError:
53    pwd = None
54try:
55    import grp
56except ImportError:
57    grp = None
58
59# os.symlink on Windows prior to 6.0 raises NotImplementedError
60symlink_exception = (AttributeError, NotImplementedError)
61try:
62    # OSError (winerror=1314) will be raised if the caller does not hold the
63    # SeCreateSymbolicLinkPrivilege privilege
64    symlink_exception += (OSError,)
65except NameError:
66    pass
67
68# from tarfile import *
69__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
70           "CompressionError", "StreamError", "ExtractError", "HeaderError",
71           "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
72           "DEFAULT_FORMAT", "open"]
73
74#---------------------------------------------------------
75# tar constants
76#---------------------------------------------------------
77NUL = b"\0"                     # the null character
78BLOCKSIZE = 512                 # length of processing blocks
79RECORDSIZE = BLOCKSIZE * 20     # length of records
80GNU_MAGIC = b"ustar  \0"        # magic gnu tar string
81POSIX_MAGIC = b"ustar\x0000"    # magic posix tar string
82
83LENGTH_NAME = 100               # maximum length of a filename
84LENGTH_LINK = 100               # maximum length of a linkname
85LENGTH_PREFIX = 155             # maximum length of the prefix field
86
87REGTYPE = b"0"                  # regular file
88AREGTYPE = b"\0"                # regular file
89LNKTYPE = b"1"                  # link (inside tarfile)
90SYMTYPE = b"2"                  # symbolic link
91CHRTYPE = b"3"                  # character special device
92BLKTYPE = b"4"                  # block special device
93DIRTYPE = b"5"                  # directory
94FIFOTYPE = b"6"                 # fifo special device
95CONTTYPE = b"7"                 # contiguous file
96
97GNUTYPE_LONGNAME = b"L"         # GNU tar longname
98GNUTYPE_LONGLINK = b"K"         # GNU tar longlink
99GNUTYPE_SPARSE = b"S"           # GNU tar sparse file
100
101XHDTYPE = b"x"                  # POSIX.1-2001 extended header
102XGLTYPE = b"g"                  # POSIX.1-2001 global header
103SOLARIS_XHDTYPE = b"X"          # Solaris extended header
104
105USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
106GNU_FORMAT = 1                  # GNU tar format
107PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
108DEFAULT_FORMAT = PAX_FORMAT
109
110#---------------------------------------------------------
111# tarfile constants
112#---------------------------------------------------------
113# File types that tarfile supports:
114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
115                   SYMTYPE, DIRTYPE, FIFOTYPE,
116                   CONTTYPE, CHRTYPE, BLKTYPE,
117                   GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
118                   GNUTYPE_SPARSE)
119
120# File types that will be treated as a regular file.
121REGULAR_TYPES = (REGTYPE, AREGTYPE,
122                 CONTTYPE, GNUTYPE_SPARSE)
123
124# File types that are part of the GNU tar format.
125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
126             GNUTYPE_SPARSE)
127
128# Fields from a pax header that override a TarInfo attribute.
129PAX_FIELDS = ("path", "linkpath", "size", "mtime",
130              "uid", "gid", "uname", "gname")
131
132# Fields from a pax header that are affected by hdrcharset.
133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
134
135# Fields in a pax header that are numbers, all other fields
136# are treated as strings.
137PAX_NUMBER_FIELDS = {
138    "atime": float,
139    "ctime": float,
140    "mtime": float,
141    "uid": int,
142    "gid": int,
143    "size": int
144}
145
146#---------------------------------------------------------
147# initialization
148#---------------------------------------------------------
149if os.name == "nt":
150    ENCODING = "utf-8"
151else:
152    ENCODING = sys.getfilesystemencoding()
153
154#---------------------------------------------------------
155# Some useful functions
156#---------------------------------------------------------
157
158def stn(s, length, encoding, errors):
159    """Convert a string to a null-terminated bytes object.
160    """
161    s = s.encode(encoding, errors)
162    return s[:length] + (length - len(s)) * NUL
163
164def nts(s, encoding, errors):
165    """Convert a null-terminated bytes object to a string.
166    """
167    p = s.find(b"\0")
168    if p != -1:
169        s = s[:p]
170    return s.decode(encoding, errors)
171
172def nti(s):
173    """Convert a number field to a python number.
174    """
175    # There are two possible encodings for a number field, see
176    # itn() below.
177    if s[0] in (0o200, 0o377):
178        n = 0
179        for i in range(len(s) - 1):
180            n <<= 8
181            n += s[i + 1]
182        if s[0] == 0o377:
183            n = -(256 ** (len(s) - 1) - n)
184    else:
185        try:
186            s = nts(s, "ascii", "strict")
187            n = int(s.strip() or "0", 8)
188        except ValueError:
189            raise InvalidHeaderError("invalid header")
190    return n
191
192def itn(n, digits=8, format=DEFAULT_FORMAT):
193    """Convert a python number to a number field.
194    """
195    # POSIX 1003.1-1988 requires numbers to be encoded as a string of
196    # octal digits followed by a null-byte, this allows values up to
197    # (8**(digits-1))-1. GNU tar allows storing numbers greater than
198    # that if necessary. A leading 0o200 or 0o377 byte indicate this
199    # particular encoding, the following digits-1 bytes are a big-endian
200    # base-256 representation. This allows values up to (256**(digits-1))-1.
201    # A 0o200 byte indicates a positive number, a 0o377 byte a negative
202    # number.
203    original_n = n
204    n = int(n)
205    if 0 <= n < 8 ** (digits - 1):
206        s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
207    elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
208        if n >= 0:
209            s = bytearray([0o200])
210        else:
211            s = bytearray([0o377])
212            n = 256 ** digits + n
213
214        for i in range(digits - 1):
215            s.insert(1, n & 0o377)
216            n >>= 8
217    else:
218        raise ValueError("overflow in number field")
219
220    return s
221
222def calc_chksums(buf):
223    """Calculate the checksum for a member's header by summing up all
224       characters except for the chksum field which is treated as if
225       it was filled with spaces. According to the GNU tar sources,
226       some tars (Sun and NeXT) calculate chksum with signed char,
227       which will be different if there are chars in the buffer with
228       the high bit set. So we calculate two checksums, unsigned and
229       signed.
230    """
231    unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
232    signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
233    return unsigned_chksum, signed_chksum
234
235def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
236    """Copy length bytes from fileobj src to fileobj dst.
237       If length is None, copy the entire content.
238    """
239    bufsize = bufsize or 16 * 1024
240    if length == 0:
241        return
242    if length is None:
243        shutil.copyfileobj(src, dst, bufsize)
244        return
245
246    blocks, remainder = divmod(length, bufsize)
247    for b in range(blocks):
248        buf = src.read(bufsize)
249        if len(buf) < bufsize:
250            raise exception("unexpected end of data")
251        dst.write(buf)
252
253    if remainder != 0:
254        buf = src.read(remainder)
255        if len(buf) < remainder:
256            raise exception("unexpected end of data")
257        dst.write(buf)
258    return
259
260def _safe_print(s):
261    encoding = getattr(sys.stdout, 'encoding', None)
262    if encoding is not None:
263        s = s.encode(encoding, 'backslashreplace').decode(encoding)
264    print(s, end=' ')
265
266
267class TarError(Exception):
268    """Base exception."""
269    pass
270class ExtractError(TarError):
271    """General exception for extract errors."""
272    pass
273class ReadError(TarError):
274    """Exception for unreadable tar archives."""
275    pass
276class CompressionError(TarError):
277    """Exception for unavailable compression methods."""
278    pass
279class StreamError(TarError):
280    """Exception for unsupported operations on stream-like TarFiles."""
281    pass
282class HeaderError(TarError):
283    """Base exception for header errors."""
284    pass
285class EmptyHeaderError(HeaderError):
286    """Exception for empty headers."""
287    pass
288class TruncatedHeaderError(HeaderError):
289    """Exception for truncated headers."""
290    pass
291class EOFHeaderError(HeaderError):
292    """Exception for end of file headers."""
293    pass
294class InvalidHeaderError(HeaderError):
295    """Exception for invalid headers."""
296    pass
297class SubsequentHeaderError(HeaderError):
298    """Exception for missing and invalid extended headers."""
299    pass
300
301#---------------------------
302# internal stream interface
303#---------------------------
304class _LowLevelFile:
305    """Low-level file object. Supports reading and writing.
306       It is used instead of a regular file object for streaming
307       access.
308    """
309
310    def __init__(self, name, mode):
311        mode = {
312            "r": os.O_RDONLY,
313            "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
314        }[mode]
315        if hasattr(os, "O_BINARY"):
316            mode |= os.O_BINARY
317        self.fd = os.open(name, mode, 0o666)
318
319    def close(self):
320        os.close(self.fd)
321
322    def read(self, size):
323        return os.read(self.fd, size)
324
325    def write(self, s):
326        os.write(self.fd, s)
327
328class _Stream:
329    """Class that serves as an adapter between TarFile and
330       a stream-like object.  The stream-like object only
331       needs to have a read() or write() method and is accessed
332       blockwise.  Use of gzip or bzip2 compression is possible.
333       A stream-like object could be for example: sys.stdin,
334       sys.stdout, a socket, a tape device etc.
335
336       _Stream is intended to be used only internally.
337    """
338
339    def __init__(self, name, mode, comptype, fileobj, bufsize):
340        """Construct a _Stream object.
341        """
342        self._extfileobj = True
343        if fileobj is None:
344            fileobj = _LowLevelFile(name, mode)
345            self._extfileobj = False
346
347        if comptype == '*':
348            # Enable transparent compression detection for the
349            # stream interface
350            fileobj = _StreamProxy(fileobj)
351            comptype = fileobj.getcomptype()
352
353        self.name     = name or ""
354        self.mode     = mode
355        self.comptype = comptype
356        self.fileobj  = fileobj
357        self.bufsize  = bufsize
358        self.buf      = b""
359        self.pos      = 0
360        self.closed   = False
361
362        try:
363            if comptype == "gz":
364                try:
365                    import zlib
366                except ImportError:
367                    raise CompressionError("zlib module is not available") from None
368                self.zlib = zlib
369                self.crc = zlib.crc32(b"")
370                if mode == "r":
371                    self._init_read_gz()
372                    self.exception = zlib.error
373                else:
374                    self._init_write_gz()
375
376            elif comptype == "bz2":
377                try:
378                    import bz2
379                except ImportError:
380                    raise CompressionError("bz2 module is not available") from None
381                if mode == "r":
382                    self.dbuf = b""
383                    self.cmp = bz2.BZ2Decompressor()
384                    self.exception = OSError
385                else:
386                    self.cmp = bz2.BZ2Compressor()
387
388            elif comptype == "xz":
389                try:
390                    import lzma
391                except ImportError:
392                    raise CompressionError("lzma module is not available") from None
393                if mode == "r":
394                    self.dbuf = b""
395                    self.cmp = lzma.LZMADecompressor()
396                    self.exception = lzma.LZMAError
397                else:
398                    self.cmp = lzma.LZMACompressor()
399
400            elif comptype != "tar":
401                raise CompressionError("unknown compression type %r" % comptype)
402
403        except:
404            if not self._extfileobj:
405                self.fileobj.close()
406            self.closed = True
407            raise
408
409    def __del__(self):
410        if hasattr(self, "closed") and not self.closed:
411            self.close()
412
413    def _init_write_gz(self):
414        """Initialize for writing with gzip compression.
415        """
416        self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
417                                            -self.zlib.MAX_WBITS,
418                                            self.zlib.DEF_MEM_LEVEL,
419                                            0)
420        timestamp = struct.pack("<L", int(time.time()))
421        self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
422        if self.name.endswith(".gz"):
423            self.name = self.name[:-3]
424        # Honor "directory components removed" from RFC1952
425        self.name = os.path.basename(self.name)
426        # RFC1952 says we must use ISO-8859-1 for the FNAME field.
427        self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
428
429    def write(self, s):
430        """Write string s to the stream.
431        """
432        if self.comptype == "gz":
433            self.crc = self.zlib.crc32(s, self.crc)
434        self.pos += len(s)
435        if self.comptype != "tar":
436            s = self.cmp.compress(s)
437        self.__write(s)
438
439    def __write(self, s):
440        """Write string s to the stream if a whole new block
441           is ready to be written.
442        """
443        self.buf += s
444        while len(self.buf) > self.bufsize:
445            self.fileobj.write(self.buf[:self.bufsize])
446            self.buf = self.buf[self.bufsize:]
447
448    def close(self):
449        """Close the _Stream object. No operation should be
450           done on it afterwards.
451        """
452        if self.closed:
453            return
454
455        self.closed = True
456        try:
457            if self.mode == "w" and self.comptype != "tar":
458                self.buf += self.cmp.flush()
459
460            if self.mode == "w" and self.buf:
461                self.fileobj.write(self.buf)
462                self.buf = b""
463                if self.comptype == "gz":
464                    self.fileobj.write(struct.pack("<L", self.crc))
465                    self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
466        finally:
467            if not self._extfileobj:
468                self.fileobj.close()
469
470    def _init_read_gz(self):
471        """Initialize for reading a gzip compressed fileobj.
472        """
473        self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
474        self.dbuf = b""
475
476        # taken from gzip.GzipFile with some alterations
477        if self.__read(2) != b"\037\213":
478            raise ReadError("not a gzip file")
479        if self.__read(1) != b"\010":
480            raise CompressionError("unsupported compression method")
481
482        flag = ord(self.__read(1))
483        self.__read(6)
484
485        if flag & 4:
486            xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
487            self.read(xlen)
488        if flag & 8:
489            while True:
490                s = self.__read(1)
491                if not s or s == NUL:
492                    break
493        if flag & 16:
494            while True:
495                s = self.__read(1)
496                if not s or s == NUL:
497                    break
498        if flag & 2:
499            self.__read(2)
500
501    def tell(self):
502        """Return the stream's file pointer position.
503        """
504        return self.pos
505
506    def seek(self, pos=0):
507        """Set the stream's file pointer to pos. Negative seeking
508           is forbidden.
509        """
510        if pos - self.pos >= 0:
511            blocks, remainder = divmod(pos - self.pos, self.bufsize)
512            for i in range(blocks):
513                self.read(self.bufsize)
514            self.read(remainder)
515        else:
516            raise StreamError("seeking backwards is not allowed")
517        return self.pos
518
519    def read(self, size):
520        """Return the next size number of bytes from the stream."""
521        assert size is not None
522        buf = self._read(size)
523        self.pos += len(buf)
524        return buf
525
526    def _read(self, size):
527        """Return size bytes from the stream.
528        """
529        if self.comptype == "tar":
530            return self.__read(size)
531
532        c = len(self.dbuf)
533        t = [self.dbuf]
534        while c < size:
535            # Skip underlying buffer to avoid unaligned double buffering.
536            if self.buf:
537                buf = self.buf
538                self.buf = b""
539            else:
540                buf = self.fileobj.read(self.bufsize)
541                if not buf:
542                    break
543            try:
544                buf = self.cmp.decompress(buf)
545            except self.exception as e:
546                raise ReadError("invalid compressed data") from e
547            t.append(buf)
548            c += len(buf)
549        t = b"".join(t)
550        self.dbuf = t[size:]
551        return t[:size]
552
553    def __read(self, size):
554        """Return size bytes from stream. If internal buffer is empty,
555           read another block from the stream.
556        """
557        c = len(self.buf)
558        t = [self.buf]
559        while c < size:
560            buf = self.fileobj.read(self.bufsize)
561            if not buf:
562                break
563            t.append(buf)
564            c += len(buf)
565        t = b"".join(t)
566        self.buf = t[size:]
567        return t[:size]
568# class _Stream
569
570class _StreamProxy(object):
571    """Small proxy class that enables transparent compression
572       detection for the Stream interface (mode 'r|*').
573    """
574
575    def __init__(self, fileobj):
576        self.fileobj = fileobj
577        self.buf = self.fileobj.read(BLOCKSIZE)
578
579    def read(self, size):
580        self.read = self.fileobj.read
581        return self.buf
582
583    def getcomptype(self):
584        if self.buf.startswith(b"\x1f\x8b\x08"):
585            return "gz"
586        elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
587            return "bz2"
588        elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
589            return "xz"
590        else:
591            return "tar"
592
593    def close(self):
594        self.fileobj.close()
595# class StreamProxy
596
597#------------------------
598# Extraction file object
599#------------------------
600class _FileInFile(object):
601    """A thin wrapper around an existing file object that
602       provides a part of its data as an individual file
603       object.
604    """
605
606    def __init__(self, fileobj, offset, size, blockinfo=None):
607        self.fileobj = fileobj
608        self.offset = offset
609        self.size = size
610        self.position = 0
611        self.name = getattr(fileobj, "name", None)
612        self.closed = False
613
614        if blockinfo is None:
615            blockinfo = [(0, size)]
616
617        # Construct a map with data and zero blocks.
618        self.map_index = 0
619        self.map = []
620        lastpos = 0
621        realpos = self.offset
622        for offset, size in blockinfo:
623            if offset > lastpos:
624                self.map.append((False, lastpos, offset, None))
625            self.map.append((True, offset, offset + size, realpos))
626            realpos += size
627            lastpos = offset + size
628        if lastpos < self.size:
629            self.map.append((False, lastpos, self.size, None))
630
631    def flush(self):
632        pass
633
634    def readable(self):
635        return True
636
637    def writable(self):
638        return False
639
640    def seekable(self):
641        return self.fileobj.seekable()
642
643    def tell(self):
644        """Return the current file position.
645        """
646        return self.position
647
648    def seek(self, position, whence=io.SEEK_SET):
649        """Seek to a position in the file.
650        """
651        if whence == io.SEEK_SET:
652            self.position = min(max(position, 0), self.size)
653        elif whence == io.SEEK_CUR:
654            if position < 0:
655                self.position = max(self.position + position, 0)
656            else:
657                self.position = min(self.position + position, self.size)
658        elif whence == io.SEEK_END:
659            self.position = max(min(self.size + position, self.size), 0)
660        else:
661            raise ValueError("Invalid argument")
662        return self.position
663
664    def read(self, size=None):
665        """Read data from the file.
666        """
667        if size is None:
668            size = self.size - self.position
669        else:
670            size = min(size, self.size - self.position)
671
672        buf = b""
673        while size > 0:
674            while True:
675                data, start, stop, offset = self.map[self.map_index]
676                if start <= self.position < stop:
677                    break
678                else:
679                    self.map_index += 1
680                    if self.map_index == len(self.map):
681                        self.map_index = 0
682            length = min(size, stop - self.position)
683            if data:
684                self.fileobj.seek(offset + (self.position - start))
685                b = self.fileobj.read(length)
686                if len(b) != length:
687                    raise ReadError("unexpected end of data")
688                buf += b
689            else:
690                buf += NUL * length
691            size -= length
692            self.position += length
693        return buf
694
695    def readinto(self, b):
696        buf = self.read(len(b))
697        b[:len(buf)] = buf
698        return len(buf)
699
700    def close(self):
701        self.closed = True
702#class _FileInFile
703
704class ExFileObject(io.BufferedReader):
705
706    def __init__(self, tarfile, tarinfo):
707        fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
708                tarinfo.size, tarinfo.sparse)
709        super().__init__(fileobj)
710#class ExFileObject
711
712#------------------
713# Exported Classes
714#------------------
715class TarInfo(object):
716    """Informational class which holds the details about an
717       archive member given by a tar header block.
718       TarInfo objects are returned by TarFile.getmember(),
719       TarFile.getmembers() and TarFile.gettarinfo() and are
720       usually created internally.
721    """
722
723    __slots__ = dict(
724        name = 'Name of the archive member.',
725        mode = 'Permission bits.',
726        uid = 'User ID of the user who originally stored this member.',
727        gid = 'Group ID of the user who originally stored this member.',
728        size = 'Size in bytes.',
729        mtime = 'Time of last modification.',
730        chksum = 'Header checksum.',
731        type = ('File type. type is usually one of these constants: '
732                'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
733                'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
734        linkname = ('Name of the target file name, which is only present '
735                    'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
736        uname = 'User name.',
737        gname = 'Group name.',
738        devmajor = 'Device major number.',
739        devminor = 'Device minor number.',
740        offset = 'The tar header starts here.',
741        offset_data = "The file's data starts here.",
742        pax_headers = ('A dictionary containing key-value pairs of an '
743                       'associated pax extended header.'),
744        sparse = 'Sparse member information.',
745        tarfile = None,
746        _sparse_structs = None,
747        _link_target = None,
748        )
749
750    def __init__(self, name=""):
751        """Construct a TarInfo object. name is the optional name
752           of the member.
753        """
754        self.name = name        # member name
755        self.mode = 0o644       # file permissions
756        self.uid = 0            # user id
757        self.gid = 0            # group id
758        self.size = 0           # file size
759        self.mtime = 0          # modification time
760        self.chksum = 0         # header checksum
761        self.type = REGTYPE     # member type
762        self.linkname = ""      # link name
763        self.uname = ""         # user name
764        self.gname = ""         # group name
765        self.devmajor = 0       # device major number
766        self.devminor = 0       # device minor number
767
768        self.offset = 0         # the tar header starts here
769        self.offset_data = 0    # the file's data starts here
770
771        self.sparse = None      # sparse member information
772        self.pax_headers = {}   # pax header information
773
774    @property
775    def path(self):
776        'In pax headers, "name" is called "path".'
777        return self.name
778
779    @path.setter
780    def path(self, name):
781        self.name = name
782
783    @property
784    def linkpath(self):
785        'In pax headers, "linkname" is called "linkpath".'
786        return self.linkname
787
788    @linkpath.setter
789    def linkpath(self, linkname):
790        self.linkname = linkname
791
792    def __repr__(self):
793        return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
794
795    def get_info(self):
796        """Return the TarInfo's attributes as a dictionary.
797        """
798        info = {
799            "name":     self.name,
800            "mode":     self.mode & 0o7777,
801            "uid":      self.uid,
802            "gid":      self.gid,
803            "size":     self.size,
804            "mtime":    self.mtime,
805            "chksum":   self.chksum,
806            "type":     self.type,
807            "linkname": self.linkname,
808            "uname":    self.uname,
809            "gname":    self.gname,
810            "devmajor": self.devmajor,
811            "devminor": self.devminor
812        }
813
814        if info["type"] == DIRTYPE and not info["name"].endswith("/"):
815            info["name"] += "/"
816
817        return info
818
819    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
820        """Return a tar header as a string of 512 byte blocks.
821        """
822        info = self.get_info()
823
824        if format == USTAR_FORMAT:
825            return self.create_ustar_header(info, encoding, errors)
826        elif format == GNU_FORMAT:
827            return self.create_gnu_header(info, encoding, errors)
828        elif format == PAX_FORMAT:
829            return self.create_pax_header(info, encoding)
830        else:
831            raise ValueError("invalid format")
832
833    def create_ustar_header(self, info, encoding, errors):
834        """Return the object as a ustar header block.
835        """
836        info["magic"] = POSIX_MAGIC
837
838        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
839            raise ValueError("linkname is too long")
840
841        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
842            info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
843
844        return self._create_header(info, USTAR_FORMAT, encoding, errors)
845
846    def create_gnu_header(self, info, encoding, errors):
847        """Return the object as a GNU header block sequence.
848        """
849        info["magic"] = GNU_MAGIC
850
851        buf = b""
852        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
853            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
854
855        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
856            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
857
858        return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
859
860    def create_pax_header(self, info, encoding):
861        """Return the object as a ustar header block. If it cannot be
862           represented this way, prepend a pax extended header sequence
863           with supplement information.
864        """
865        info["magic"] = POSIX_MAGIC
866        pax_headers = self.pax_headers.copy()
867
868        # Test string fields for values that exceed the field length or cannot
869        # be represented in ASCII encoding.
870        for name, hname, length in (
871                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
872                ("uname", "uname", 32), ("gname", "gname", 32)):
873
874            if hname in pax_headers:
875                # The pax header has priority.
876                continue
877
878            # Try to encode the string as ASCII.
879            try:
880                info[name].encode("ascii", "strict")
881            except UnicodeEncodeError:
882                pax_headers[hname] = info[name]
883                continue
884
885            if len(info[name]) > length:
886                pax_headers[hname] = info[name]
887
888        # Test number fields for values that exceed the field limit or values
889        # that like to be stored as float.
890        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
891            needs_pax = False
892
893            val = info[name]
894            val_is_float = isinstance(val, float)
895            val_int = round(val) if val_is_float else val
896            if not 0 <= val_int < 8 ** (digits - 1):
897                # Avoid overflow.
898                info[name] = 0
899                needs_pax = True
900            elif val_is_float:
901                # Put rounded value in ustar header, and full
902                # precision value in pax header.
903                info[name] = val_int
904                needs_pax = True
905
906            # The existing pax header has priority.
907            if needs_pax and name not in pax_headers:
908                pax_headers[name] = str(val)
909
910        # Create a pax extended header if necessary.
911        if pax_headers:
912            buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
913        else:
914            buf = b""
915
916        return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
917
918    @classmethod
919    def create_pax_global_header(cls, pax_headers):
920        """Return the object as a pax global header block sequence.
921        """
922        return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
923
924    def _posix_split_name(self, name, encoding, errors):
925        """Split a name longer than 100 chars into a prefix
926           and a name part.
927        """
928        components = name.split("/")
929        for i in range(1, len(components)):
930            prefix = "/".join(components[:i])
931            name = "/".join(components[i:])
932            if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
933                    len(name.encode(encoding, errors)) <= LENGTH_NAME:
934                break
935        else:
936            raise ValueError("name is too long")
937
938        return prefix, name
939
940    @staticmethod
941    def _create_header(info, format, encoding, errors):
942        """Return a header block. info is a dictionary with file
943           information, format must be one of the *_FORMAT constants.
944        """
945        has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)
946        if has_device_fields:
947            devmajor = itn(info.get("devmajor", 0), 8, format)
948            devminor = itn(info.get("devminor", 0), 8, format)
949        else:
950            devmajor = stn("", 8, encoding, errors)
951            devminor = stn("", 8, encoding, errors)
952
953        parts = [
954            stn(info.get("name", ""), 100, encoding, errors),
955            itn(info.get("mode", 0) & 0o7777, 8, format),
956            itn(info.get("uid", 0), 8, format),
957            itn(info.get("gid", 0), 8, format),
958            itn(info.get("size", 0), 12, format),
959            itn(info.get("mtime", 0), 12, format),
960            b"        ", # checksum field
961            info.get("type", REGTYPE),
962            stn(info.get("linkname", ""), 100, encoding, errors),
963            info.get("magic", POSIX_MAGIC),
964            stn(info.get("uname", ""), 32, encoding, errors),
965            stn(info.get("gname", ""), 32, encoding, errors),
966            devmajor,
967            devminor,
968            stn(info.get("prefix", ""), 155, encoding, errors)
969        ]
970
971        buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
972        chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
973        buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
974        return buf
975
976    @staticmethod
977    def _create_payload(payload):
978        """Return the string payload filled with zero bytes
979           up to the next 512 byte border.
980        """
981        blocks, remainder = divmod(len(payload), BLOCKSIZE)
982        if remainder > 0:
983            payload += (BLOCKSIZE - remainder) * NUL
984        return payload
985
986    @classmethod
987    def _create_gnu_long_header(cls, name, type, encoding, errors):
988        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
989           for name.
990        """
991        name = name.encode(encoding, errors) + NUL
992
993        info = {}
994        info["name"] = "././@LongLink"
995        info["type"] = type
996        info["size"] = len(name)
997        info["magic"] = GNU_MAGIC
998
999        # create extended header + name blocks.
1000        return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
1001                cls._create_payload(name)
1002
1003    @classmethod
1004    def _create_pax_generic_header(cls, pax_headers, type, encoding):
1005        """Return a POSIX.1-2008 extended or global header sequence
1006           that contains a list of keyword, value pairs. The values
1007           must be strings.
1008        """
1009        # Check if one of the fields contains surrogate characters and thereby
1010        # forces hdrcharset=BINARY, see _proc_pax() for more information.
1011        binary = False
1012        for keyword, value in pax_headers.items():
1013            try:
1014                value.encode("utf-8", "strict")
1015            except UnicodeEncodeError:
1016                binary = True
1017                break
1018
1019        records = b""
1020        if binary:
1021            # Put the hdrcharset field at the beginning of the header.
1022            records += b"21 hdrcharset=BINARY\n"
1023
1024        for keyword, value in pax_headers.items():
1025            keyword = keyword.encode("utf-8")
1026            if binary:
1027                # Try to restore the original byte representation of `value'.
1028                # Needless to say, that the encoding must match the string.
1029                value = value.encode(encoding, "surrogateescape")
1030            else:
1031                value = value.encode("utf-8")
1032
1033            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1034            n = p = 0
1035            while True:
1036                n = l + len(str(p))
1037                if n == p:
1038                    break
1039                p = n
1040            records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1041
1042        # We use a hardcoded "././@PaxHeader" name like star does
1043        # instead of the one that POSIX recommends.
1044        info = {}
1045        info["name"] = "././@PaxHeader"
1046        info["type"] = type
1047        info["size"] = len(records)
1048        info["magic"] = POSIX_MAGIC
1049
1050        # Create pax header + record blocks.
1051        return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1052                cls._create_payload(records)
1053
1054    @classmethod
1055    def frombuf(cls, buf, encoding, errors):
1056        """Construct a TarInfo object from a 512 byte bytes object.
1057        """
1058        if len(buf) == 0:
1059            raise EmptyHeaderError("empty header")
1060        if len(buf) != BLOCKSIZE:
1061            raise TruncatedHeaderError("truncated header")
1062        if buf.count(NUL) == BLOCKSIZE:
1063            raise EOFHeaderError("end of file header")
1064
1065        chksum = nti(buf[148:156])
1066        if chksum not in calc_chksums(buf):
1067            raise InvalidHeaderError("bad checksum")
1068
1069        obj = cls()
1070        obj.name = nts(buf[0:100], encoding, errors)
1071        obj.mode = nti(buf[100:108])
1072        obj.uid = nti(buf[108:116])
1073        obj.gid = nti(buf[116:124])
1074        obj.size = nti(buf[124:136])
1075        obj.mtime = nti(buf[136:148])
1076        obj.chksum = chksum
1077        obj.type = buf[156:157]
1078        obj.linkname = nts(buf[157:257], encoding, errors)
1079        obj.uname = nts(buf[265:297], encoding, errors)
1080        obj.gname = nts(buf[297:329], encoding, errors)
1081        obj.devmajor = nti(buf[329:337])
1082        obj.devminor = nti(buf[337:345])
1083        prefix = nts(buf[345:500], encoding, errors)
1084
1085        # Old V7 tar format represents a directory as a regular
1086        # file with a trailing slash.
1087        if obj.type == AREGTYPE and obj.name.endswith("/"):
1088            obj.type = DIRTYPE
1089
1090        # The old GNU sparse format occupies some of the unused
1091        # space in the buffer for up to 4 sparse structures.
1092        # Save them for later processing in _proc_sparse().
1093        if obj.type == GNUTYPE_SPARSE:
1094            pos = 386
1095            structs = []
1096            for i in range(4):
1097                try:
1098                    offset = nti(buf[pos:pos + 12])
1099                    numbytes = nti(buf[pos + 12:pos + 24])
1100                except ValueError:
1101                    break
1102                structs.append((offset, numbytes))
1103                pos += 24
1104            isextended = bool(buf[482])
1105            origsize = nti(buf[483:495])
1106            obj._sparse_structs = (structs, isextended, origsize)
1107
1108        # Remove redundant slashes from directories.
1109        if obj.isdir():
1110            obj.name = obj.name.rstrip("/")
1111
1112        # Reconstruct a ustar longname.
1113        if prefix and obj.type not in GNU_TYPES:
1114            obj.name = prefix + "/" + obj.name
1115        return obj
1116
1117    @classmethod
1118    def fromtarfile(cls, tarfile):
1119        """Return the next TarInfo object from TarFile object
1120           tarfile.
1121        """
1122        buf = tarfile.fileobj.read(BLOCKSIZE)
1123        obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1124        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1125        return obj._proc_member(tarfile)
1126
1127    #--------------------------------------------------------------------------
1128    # The following are methods that are called depending on the type of a
1129    # member. The entry point is _proc_member() which can be overridden in a
1130    # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1131    # implement the following
1132    # operations:
1133    # 1. Set self.offset_data to the position where the data blocks begin,
1134    #    if there is data that follows.
1135    # 2. Set tarfile.offset to the position where the next member's header will
1136    #    begin.
1137    # 3. Return self or another valid TarInfo object.
1138    def _proc_member(self, tarfile):
1139        """Choose the right processing method depending on
1140           the type and call it.
1141        """
1142        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1143            return self._proc_gnulong(tarfile)
1144        elif self.type == GNUTYPE_SPARSE:
1145            return self._proc_sparse(tarfile)
1146        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1147            return self._proc_pax(tarfile)
1148        else:
1149            return self._proc_builtin(tarfile)
1150
1151    def _proc_builtin(self, tarfile):
1152        """Process a builtin type or an unknown type which
1153           will be treated as a regular file.
1154        """
1155        self.offset_data = tarfile.fileobj.tell()
1156        offset = self.offset_data
1157        if self.isreg() or self.type not in SUPPORTED_TYPES:
1158            # Skip the following data blocks.
1159            offset += self._block(self.size)
1160        tarfile.offset = offset
1161
1162        # Patch the TarInfo object with saved global
1163        # header information.
1164        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1165
1166        return self
1167
1168    def _proc_gnulong(self, tarfile):
1169        """Process the blocks that hold a GNU longname
1170           or longlink member.
1171        """
1172        buf = tarfile.fileobj.read(self._block(self.size))
1173
1174        # Fetch the next header and process it.
1175        try:
1176            next = self.fromtarfile(tarfile)
1177        except HeaderError as e:
1178            raise SubsequentHeaderError(str(e)) from None
1179
1180        # Patch the TarInfo object from the next header with
1181        # the longname information.
1182        next.offset = self.offset
1183        if self.type == GNUTYPE_LONGNAME:
1184            next.name = nts(buf, tarfile.encoding, tarfile.errors)
1185        elif self.type == GNUTYPE_LONGLINK:
1186            next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1187
1188        return next
1189
1190    def _proc_sparse(self, tarfile):
1191        """Process a GNU sparse header plus extra headers.
1192        """
1193        # We already collected some sparse structures in frombuf().
1194        structs, isextended, origsize = self._sparse_structs
1195        del self._sparse_structs
1196
1197        # Collect sparse structures from extended header blocks.
1198        while isextended:
1199            buf = tarfile.fileobj.read(BLOCKSIZE)
1200            pos = 0
1201            for i in range(21):
1202                try:
1203                    offset = nti(buf[pos:pos + 12])
1204                    numbytes = nti(buf[pos + 12:pos + 24])
1205                except ValueError:
1206                    break
1207                if offset and numbytes:
1208                    structs.append((offset, numbytes))
1209                pos += 24
1210            isextended = bool(buf[504])
1211        self.sparse = structs
1212
1213        self.offset_data = tarfile.fileobj.tell()
1214        tarfile.offset = self.offset_data + self._block(self.size)
1215        self.size = origsize
1216        return self
1217
1218    def _proc_pax(self, tarfile):
1219        """Process an extended or global header as described in
1220           POSIX.1-2008.
1221        """
1222        # Read the header information.
1223        buf = tarfile.fileobj.read(self._block(self.size))
1224
1225        # A pax header stores supplemental information for either
1226        # the following file (extended) or all following files
1227        # (global).
1228        if self.type == XGLTYPE:
1229            pax_headers = tarfile.pax_headers
1230        else:
1231            pax_headers = tarfile.pax_headers.copy()
1232
1233        # Check if the pax header contains a hdrcharset field. This tells us
1234        # the encoding of the path, linkpath, uname and gname fields. Normally,
1235        # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1236        # implementations are allowed to store them as raw binary strings if
1237        # the translation to UTF-8 fails.
1238        match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1239        if match is not None:
1240            pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
1241
1242        # For the time being, we don't care about anything other than "BINARY".
1243        # The only other value that is currently allowed by the standard is
1244        # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1245        hdrcharset = pax_headers.get("hdrcharset")
1246        if hdrcharset == "BINARY":
1247            encoding = tarfile.encoding
1248        else:
1249            encoding = "utf-8"
1250
1251        # Parse pax header information. A record looks like that:
1252        # "%d %s=%s\n" % (length, keyword, value). length is the size
1253        # of the complete record including the length field itself and
1254        # the newline. keyword and value are both UTF-8 encoded strings.
1255        regex = re.compile(br"(\d+) ([^=]+)=")
1256        pos = 0
1257        while True:
1258            match = regex.match(buf, pos)
1259            if not match:
1260                break
1261
1262            length, keyword = match.groups()
1263            length = int(length)
1264            if length == 0:
1265                raise InvalidHeaderError("invalid header")
1266            value = buf[match.end(2) + 1:match.start(1) + length - 1]
1267
1268            # Normally, we could just use "utf-8" as the encoding and "strict"
1269            # as the error handler, but we better not take the risk. For
1270            # example, GNU tar <= 1.23 is known to store filenames it cannot
1271            # translate to UTF-8 as raw strings (unfortunately without a
1272            # hdrcharset=BINARY header).
1273            # We first try the strict standard encoding, and if that fails we
1274            # fall back on the user's encoding and error handler.
1275            keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
1276                    tarfile.errors)
1277            if keyword in PAX_NAME_FIELDS:
1278                value = self._decode_pax_field(value, encoding, tarfile.encoding,
1279                        tarfile.errors)
1280            else:
1281                value = self._decode_pax_field(value, "utf-8", "utf-8",
1282                        tarfile.errors)
1283
1284            pax_headers[keyword] = value
1285            pos += length
1286
1287        # Fetch the next header.
1288        try:
1289            next = self.fromtarfile(tarfile)
1290        except HeaderError as e:
1291            raise SubsequentHeaderError(str(e)) from None
1292
1293        # Process GNU sparse information.
1294        if "GNU.sparse.map" in pax_headers:
1295            # GNU extended sparse format version 0.1.
1296            self._proc_gnusparse_01(next, pax_headers)
1297
1298        elif "GNU.sparse.size" in pax_headers:
1299            # GNU extended sparse format version 0.0.
1300            self._proc_gnusparse_00(next, pax_headers, buf)
1301
1302        elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1303            # GNU extended sparse format version 1.0.
1304            self._proc_gnusparse_10(next, pax_headers, tarfile)
1305
1306        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1307            # Patch the TarInfo object with the extended header info.
1308            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1309            next.offset = self.offset
1310
1311            if "size" in pax_headers:
1312                # If the extended header replaces the size field,
1313                # we need to recalculate the offset where the next
1314                # header starts.
1315                offset = next.offset_data
1316                if next.isreg() or next.type not in SUPPORTED_TYPES:
1317                    offset += next._block(next.size)
1318                tarfile.offset = offset
1319
1320        return next
1321
1322    def _proc_gnusparse_00(self, next, pax_headers, buf):
1323        """Process a GNU tar extended sparse header, version 0.0.
1324        """
1325        offsets = []
1326        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1327            offsets.append(int(match.group(1)))
1328        numbytes = []
1329        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1330            numbytes.append(int(match.group(1)))
1331        next.sparse = list(zip(offsets, numbytes))
1332
1333    def _proc_gnusparse_01(self, next, pax_headers):
1334        """Process a GNU tar extended sparse header, version 0.1.
1335        """
1336        sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1337        next.sparse = list(zip(sparse[::2], sparse[1::2]))
1338
1339    def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1340        """Process a GNU tar extended sparse header, version 1.0.
1341        """
1342        fields = None
1343        sparse = []
1344        buf = tarfile.fileobj.read(BLOCKSIZE)
1345        fields, buf = buf.split(b"\n", 1)
1346        fields = int(fields)
1347        while len(sparse) < fields * 2:
1348            if b"\n" not in buf:
1349                buf += tarfile.fileobj.read(BLOCKSIZE)
1350            number, buf = buf.split(b"\n", 1)
1351            sparse.append(int(number))
1352        next.offset_data = tarfile.fileobj.tell()
1353        next.sparse = list(zip(sparse[::2], sparse[1::2]))
1354
1355    def _apply_pax_info(self, pax_headers, encoding, errors):
1356        """Replace fields with supplemental information from a previous
1357           pax extended or global header.
1358        """
1359        for keyword, value in pax_headers.items():
1360            if keyword == "GNU.sparse.name":
1361                setattr(self, "path", value)
1362            elif keyword == "GNU.sparse.size":
1363                setattr(self, "size", int(value))
1364            elif keyword == "GNU.sparse.realsize":
1365                setattr(self, "size", int(value))
1366            elif keyword in PAX_FIELDS:
1367                if keyword in PAX_NUMBER_FIELDS:
1368                    try:
1369                        value = PAX_NUMBER_FIELDS[keyword](value)
1370                    except ValueError:
1371                        value = 0
1372                if keyword == "path":
1373                    value = value.rstrip("/")
1374                setattr(self, keyword, value)
1375
1376        self.pax_headers = pax_headers.copy()
1377
1378    def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1379        """Decode a single field from a pax record.
1380        """
1381        try:
1382            return value.decode(encoding, "strict")
1383        except UnicodeDecodeError:
1384            return value.decode(fallback_encoding, fallback_errors)
1385
1386    def _block(self, count):
1387        """Round up a byte count by BLOCKSIZE and return it,
1388           e.g. _block(834) => 1024.
1389        """
1390        blocks, remainder = divmod(count, BLOCKSIZE)
1391        if remainder:
1392            blocks += 1
1393        return blocks * BLOCKSIZE
1394
1395    def isreg(self):
1396        'Return True if the Tarinfo object is a regular file.'
1397        return self.type in REGULAR_TYPES
1398
1399    def isfile(self):
1400        'Return True if the Tarinfo object is a regular file.'
1401        return self.isreg()
1402
1403    def isdir(self):
1404        'Return True if it is a directory.'
1405        return self.type == DIRTYPE
1406
1407    def issym(self):
1408        'Return True if it is a symbolic link.'
1409        return self.type == SYMTYPE
1410
1411    def islnk(self):
1412        'Return True if it is a hard link.'
1413        return self.type == LNKTYPE
1414
1415    def ischr(self):
1416        'Return True if it is a character device.'
1417        return self.type == CHRTYPE
1418
1419    def isblk(self):
1420        'Return True if it is a block device.'
1421        return self.type == BLKTYPE
1422
1423    def isfifo(self):
1424        'Return True if it is a FIFO.'
1425        return self.type == FIFOTYPE
1426
1427    def issparse(self):
1428        return self.sparse is not None
1429
1430    def isdev(self):
1431        'Return True if it is one of character device, block device or FIFO.'
1432        return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1433# class TarInfo
1434
1435class TarFile(object):
1436    """The TarFile Class provides an interface to tar archives.
1437    """
1438
1439    debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1440
1441    dereference = False         # If true, add content of linked file to the
1442                                # tar file, else the link.
1443
1444    ignore_zeros = False        # If true, skips empty or invalid blocks and
1445                                # continues processing.
1446
1447    errorlevel = 1              # If 0, fatal errors only appear in debug
1448                                # messages (if debug >= 0). If > 0, errors
1449                                # are passed to the caller as exceptions.
1450
1451    format = DEFAULT_FORMAT     # The format to use when creating an archive.
1452
1453    encoding = ENCODING         # Encoding for 8-bit character strings.
1454
1455    errors = None               # Error handler for unicode conversion.
1456
1457    tarinfo = TarInfo           # The default TarInfo class to use.
1458
1459    fileobject = ExFileObject   # The file-object for extractfile().
1460
1461    def __init__(self, name=None, mode="r", fileobj=None, format=None,
1462            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1463            errors="surrogateescape", pax_headers=None, debug=None,
1464            errorlevel=None, copybufsize=None):
1465        """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1466           read from an existing archive, 'a' to append data to an existing
1467           file or 'w' to create a new file overwriting an existing one. `mode'
1468           defaults to 'r'.
1469           If `fileobj' is given, it is used for reading or writing data. If it
1470           can be determined, `mode' is overridden by `fileobj's mode.
1471           `fileobj' is not closed, when TarFile is closed.
1472        """
1473        modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
1474        if mode not in modes:
1475            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1476        self.mode = mode
1477        self._mode = modes[mode]
1478
1479        if not fileobj:
1480            if self.mode == "a" and not os.path.exists(name):
1481                # Create nonexistent files in append mode.
1482                self.mode = "w"
1483                self._mode = "wb"
1484            fileobj = bltn_open(name, self._mode)
1485            self._extfileobj = False
1486        else:
1487            if (name is None and hasattr(fileobj, "name") and
1488                isinstance(fileobj.name, (str, bytes))):
1489                name = fileobj.name
1490            if hasattr(fileobj, "mode"):
1491                self._mode = fileobj.mode
1492            self._extfileobj = True
1493        self.name = os.path.abspath(name) if name else None
1494        self.fileobj = fileobj
1495
1496        # Init attributes.
1497        if format is not None:
1498            self.format = format
1499        if tarinfo is not None:
1500            self.tarinfo = tarinfo
1501        if dereference is not None:
1502            self.dereference = dereference
1503        if ignore_zeros is not None:
1504            self.ignore_zeros = ignore_zeros
1505        if encoding is not None:
1506            self.encoding = encoding
1507        self.errors = errors
1508
1509        if pax_headers is not None and self.format == PAX_FORMAT:
1510            self.pax_headers = pax_headers
1511        else:
1512            self.pax_headers = {}
1513
1514        if debug is not None:
1515            self.debug = debug
1516        if errorlevel is not None:
1517            self.errorlevel = errorlevel
1518
1519        # Init datastructures.
1520        self.copybufsize = copybufsize
1521        self.closed = False
1522        self.members = []       # list of members as TarInfo objects
1523        self._loaded = False    # flag if all members have been read
1524        self.offset = self.fileobj.tell()
1525                                # current position in the archive file
1526        self.inodes = {}        # dictionary caching the inodes of
1527                                # archive members already added
1528
1529        try:
1530            if self.mode == "r":
1531                self.firstmember = None
1532                self.firstmember = self.next()
1533
1534            if self.mode == "a":
1535                # Move to the end of the archive,
1536                # before the first empty block.
1537                while True:
1538                    self.fileobj.seek(self.offset)
1539                    try:
1540                        tarinfo = self.tarinfo.fromtarfile(self)
1541                        self.members.append(tarinfo)
1542                    except EOFHeaderError:
1543                        self.fileobj.seek(self.offset)
1544                        break
1545                    except HeaderError as e:
1546                        raise ReadError(str(e)) from None
1547
1548            if self.mode in ("a", "w", "x"):
1549                self._loaded = True
1550
1551                if self.pax_headers:
1552                    buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1553                    self.fileobj.write(buf)
1554                    self.offset += len(buf)
1555        except:
1556            if not self._extfileobj:
1557                self.fileobj.close()
1558            self.closed = True
1559            raise
1560
1561    #--------------------------------------------------------------------------
1562    # Below are the classmethods which act as alternate constructors to the
1563    # TarFile class. The open() method is the only one that is needed for
1564    # public use; it is the "super"-constructor and is able to select an
1565    # adequate "sub"-constructor for a particular compression using the mapping
1566    # from OPEN_METH.
1567    #
1568    # This concept allows one to subclass TarFile without losing the comfort of
1569    # the super-constructor. A sub-constructor is registered and made available
1570    # by adding it to the mapping in OPEN_METH.
1571
1572    @classmethod
1573    def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1574        """Open a tar archive for reading, writing or appending. Return
1575           an appropriate TarFile class.
1576
1577           mode:
1578           'r' or 'r:*' open for reading with transparent compression
1579           'r:'         open for reading exclusively uncompressed
1580           'r:gz'       open for reading with gzip compression
1581           'r:bz2'      open for reading with bzip2 compression
1582           'r:xz'       open for reading with lzma compression
1583           'a' or 'a:'  open for appending, creating the file if necessary
1584           'w' or 'w:'  open for writing without compression
1585           'w:gz'       open for writing with gzip compression
1586           'w:bz2'      open for writing with bzip2 compression
1587           'w:xz'       open for writing with lzma compression
1588
1589           'x' or 'x:'  create a tarfile exclusively without compression, raise
1590                        an exception if the file is already created
1591           'x:gz'       create a gzip compressed tarfile, raise an exception
1592                        if the file is already created
1593           'x:bz2'      create a bzip2 compressed tarfile, raise an exception
1594                        if the file is already created
1595           'x:xz'       create an lzma compressed tarfile, raise an exception
1596                        if the file is already created
1597
1598           'r|*'        open a stream of tar blocks with transparent compression
1599           'r|'         open an uncompressed stream of tar blocks for reading
1600           'r|gz'       open a gzip compressed stream of tar blocks
1601           'r|bz2'      open a bzip2 compressed stream of tar blocks
1602           'r|xz'       open an lzma compressed stream of tar blocks
1603           'w|'         open an uncompressed stream for writing
1604           'w|gz'       open a gzip compressed stream for writing
1605           'w|bz2'      open a bzip2 compressed stream for writing
1606           'w|xz'       open an lzma compressed stream for writing
1607        """
1608
1609        if not name and not fileobj:
1610            raise ValueError("nothing to open")
1611
1612        if mode in ("r", "r:*"):
1613            # Find out which *open() is appropriate for opening the file.
1614            def not_compressed(comptype):
1615                return cls.OPEN_METH[comptype] == 'taropen'
1616            error_msgs = []
1617            for comptype in sorted(cls.OPEN_METH, key=not_compressed):
1618                func = getattr(cls, cls.OPEN_METH[comptype])
1619                if fileobj is not None:
1620                    saved_pos = fileobj.tell()
1621                try:
1622                    return func(name, "r", fileobj, **kwargs)
1623                except (ReadError, CompressionError) as e:
1624                    error_msgs.append(f'- method {comptype}: {e!r}')
1625                    if fileobj is not None:
1626                        fileobj.seek(saved_pos)
1627                    continue
1628            error_msgs_summary = '\n'.join(error_msgs)
1629            raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}")
1630
1631        elif ":" in mode:
1632            filemode, comptype = mode.split(":", 1)
1633            filemode = filemode or "r"
1634            comptype = comptype or "tar"
1635
1636            # Select the *open() function according to
1637            # given compression.
1638            if comptype in cls.OPEN_METH:
1639                func = getattr(cls, cls.OPEN_METH[comptype])
1640            else:
1641                raise CompressionError("unknown compression type %r" % comptype)
1642            return func(name, filemode, fileobj, **kwargs)
1643
1644        elif "|" in mode:
1645            filemode, comptype = mode.split("|", 1)
1646            filemode = filemode or "r"
1647            comptype = comptype or "tar"
1648
1649            if filemode not in ("r", "w"):
1650                raise ValueError("mode must be 'r' or 'w'")
1651
1652            stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1653            try:
1654                t = cls(name, filemode, stream, **kwargs)
1655            except:
1656                stream.close()
1657                raise
1658            t._extfileobj = False
1659            return t
1660
1661        elif mode in ("a", "w", "x"):
1662            return cls.taropen(name, mode, fileobj, **kwargs)
1663
1664        raise ValueError("undiscernible mode")
1665
1666    @classmethod
1667    def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1668        """Open uncompressed tar archive name for reading or writing.
1669        """
1670        if mode not in ("r", "a", "w", "x"):
1671            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1672        return cls(name, mode, fileobj, **kwargs)
1673
1674    @classmethod
1675    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1676        """Open gzip compressed tar archive name for reading or writing.
1677           Appending is not allowed.
1678        """
1679        if mode not in ("r", "w", "x"):
1680            raise ValueError("mode must be 'r', 'w' or 'x'")
1681
1682        try:
1683            from gzip import GzipFile
1684        except ImportError:
1685            raise CompressionError("gzip module is not available") from None
1686
1687        try:
1688            fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
1689        except OSError as e:
1690            if fileobj is not None and mode == 'r':
1691                raise ReadError("not a gzip file") from e
1692            raise
1693
1694        try:
1695            t = cls.taropen(name, mode, fileobj, **kwargs)
1696        except OSError as e:
1697            fileobj.close()
1698            if mode == 'r':
1699                raise ReadError("not a gzip file") from e
1700            raise
1701        except:
1702            fileobj.close()
1703            raise
1704        t._extfileobj = False
1705        return t
1706
1707    @classmethod
1708    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1709        """Open bzip2 compressed tar archive name for reading or writing.
1710           Appending is not allowed.
1711        """
1712        if mode not in ("r", "w", "x"):
1713            raise ValueError("mode must be 'r', 'w' or 'x'")
1714
1715        try:
1716            from bz2 import BZ2File
1717        except ImportError:
1718            raise CompressionError("bz2 module is not available") from None
1719
1720        fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)
1721
1722        try:
1723            t = cls.taropen(name, mode, fileobj, **kwargs)
1724        except (OSError, EOFError) as e:
1725            fileobj.close()
1726            if mode == 'r':
1727                raise ReadError("not a bzip2 file") from e
1728            raise
1729        except:
1730            fileobj.close()
1731            raise
1732        t._extfileobj = False
1733        return t
1734
1735    @classmethod
1736    def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
1737        """Open lzma compressed tar archive name for reading or writing.
1738           Appending is not allowed.
1739        """
1740        if mode not in ("r", "w", "x"):
1741            raise ValueError("mode must be 'r', 'w' or 'x'")
1742
1743        try:
1744            from lzma import LZMAFile, LZMAError
1745        except ImportError:
1746            raise CompressionError("lzma module is not available") from None
1747
1748        fileobj = LZMAFile(fileobj or name, mode, preset=preset)
1749
1750        try:
1751            t = cls.taropen(name, mode, fileobj, **kwargs)
1752        except (LZMAError, EOFError) as e:
1753            fileobj.close()
1754            if mode == 'r':
1755                raise ReadError("not an lzma file") from e
1756            raise
1757        except:
1758            fileobj.close()
1759            raise
1760        t._extfileobj = False
1761        return t
1762
1763    # All *open() methods are registered here.
1764    OPEN_METH = {
1765        "tar": "taropen",   # uncompressed tar
1766        "gz":  "gzopen",    # gzip compressed tar
1767        "bz2": "bz2open",   # bzip2 compressed tar
1768        "xz":  "xzopen"     # lzma compressed tar
1769    }
1770
1771    #--------------------------------------------------------------------------
1772    # The public methods which TarFile provides:
1773
1774    def close(self):
1775        """Close the TarFile. In write-mode, two finishing zero blocks are
1776           appended to the archive.
1777        """
1778        if self.closed:
1779            return
1780
1781        self.closed = True
1782        try:
1783            if self.mode in ("a", "w", "x"):
1784                self.fileobj.write(NUL * (BLOCKSIZE * 2))
1785                self.offset += (BLOCKSIZE * 2)
1786                # fill up the end with zero-blocks
1787                # (like option -b20 for tar does)
1788                blocks, remainder = divmod(self.offset, RECORDSIZE)
1789                if remainder > 0:
1790                    self.fileobj.write(NUL * (RECORDSIZE - remainder))
1791        finally:
1792            if not self._extfileobj:
1793                self.fileobj.close()
1794
1795    def getmember(self, name):
1796        """Return a TarInfo object for member `name'. If `name' can not be
1797           found in the archive, KeyError is raised. If a member occurs more
1798           than once in the archive, its last occurrence is assumed to be the
1799           most up-to-date version.
1800        """
1801        tarinfo = self._getmember(name.rstrip('/'))
1802        if tarinfo is None:
1803            raise KeyError("filename %r not found" % name)
1804        return tarinfo
1805
1806    def getmembers(self):
1807        """Return the members of the archive as a list of TarInfo objects. The
1808           list has the same order as the members in the archive.
1809        """
1810        self._check()
1811        if not self._loaded:    # if we want to obtain a list of
1812            self._load()        # all members, we first have to
1813                                # scan the whole archive.
1814        return self.members
1815
1816    def getnames(self):
1817        """Return the members of the archive as a list of their names. It has
1818           the same order as the list returned by getmembers().
1819        """
1820        return [tarinfo.name for tarinfo in self.getmembers()]
1821
1822    def gettarinfo(self, name=None, arcname=None, fileobj=None):
1823        """Create a TarInfo object from the result of os.stat or equivalent
1824           on an existing file. The file is either named by `name', or
1825           specified as a file object `fileobj' with a file descriptor. If
1826           given, `arcname' specifies an alternative name for the file in the
1827           archive, otherwise, the name is taken from the 'name' attribute of
1828           'fileobj', or the 'name' argument. The name should be a text
1829           string.
1830        """
1831        self._check("awx")
1832
1833        # When fileobj is given, replace name by
1834        # fileobj's real name.
1835        if fileobj is not None:
1836            name = fileobj.name
1837
1838        # Building the name of the member in the archive.
1839        # Backward slashes are converted to forward slashes,
1840        # Absolute paths are turned to relative paths.
1841        if arcname is None:
1842            arcname = name
1843        drv, arcname = os.path.splitdrive(arcname)
1844        arcname = arcname.replace(os.sep, "/")
1845        arcname = arcname.lstrip("/")
1846
1847        # Now, fill the TarInfo object with
1848        # information specific for the file.
1849        tarinfo = self.tarinfo()
1850        tarinfo.tarfile = self  # Not needed
1851
1852        # Use os.stat or os.lstat, depending on if symlinks shall be resolved.
1853        if fileobj is None:
1854            if not self.dereference:
1855                statres = os.lstat(name)
1856            else:
1857                statres = os.stat(name)
1858        else:
1859            statres = os.fstat(fileobj.fileno())
1860        linkname = ""
1861
1862        stmd = statres.st_mode
1863        if stat.S_ISREG(stmd):
1864            inode = (statres.st_ino, statres.st_dev)
1865            if not self.dereference and statres.st_nlink > 1 and \
1866                    inode in self.inodes and arcname != self.inodes[inode]:
1867                # Is it a hardlink to an already
1868                # archived file?
1869                type = LNKTYPE
1870                linkname = self.inodes[inode]
1871            else:
1872                # The inode is added only if its valid.
1873                # For win32 it is always 0.
1874                type = REGTYPE
1875                if inode[0]:
1876                    self.inodes[inode] = arcname
1877        elif stat.S_ISDIR(stmd):
1878            type = DIRTYPE
1879        elif stat.S_ISFIFO(stmd):
1880            type = FIFOTYPE
1881        elif stat.S_ISLNK(stmd):
1882            type = SYMTYPE
1883            linkname = os.readlink(name)
1884        elif stat.S_ISCHR(stmd):
1885            type = CHRTYPE
1886        elif stat.S_ISBLK(stmd):
1887            type = BLKTYPE
1888        else:
1889            return None
1890
1891        # Fill the TarInfo object with all
1892        # information we can get.
1893        tarinfo.name = arcname
1894        tarinfo.mode = stmd
1895        tarinfo.uid = statres.st_uid
1896        tarinfo.gid = statres.st_gid
1897        if type == REGTYPE:
1898            tarinfo.size = statres.st_size
1899        else:
1900            tarinfo.size = 0
1901        tarinfo.mtime = statres.st_mtime
1902        tarinfo.type = type
1903        tarinfo.linkname = linkname
1904        if pwd:
1905            try:
1906                tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1907            except KeyError:
1908                pass
1909        if grp:
1910            try:
1911                tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1912            except KeyError:
1913                pass
1914
1915        if type in (CHRTYPE, BLKTYPE):
1916            if hasattr(os, "major") and hasattr(os, "minor"):
1917                tarinfo.devmajor = os.major(statres.st_rdev)
1918                tarinfo.devminor = os.minor(statres.st_rdev)
1919        return tarinfo
1920
1921    def list(self, verbose=True, *, members=None):
1922        """Print a table of contents to sys.stdout. If `verbose' is False, only
1923           the names of the members are printed. If it is True, an `ls -l'-like
1924           output is produced. `members' is optional and must be a subset of the
1925           list returned by getmembers().
1926        """
1927        self._check()
1928
1929        if members is None:
1930            members = self
1931        for tarinfo in members:
1932            if verbose:
1933                _safe_print(stat.filemode(tarinfo.mode))
1934                _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1935                                       tarinfo.gname or tarinfo.gid))
1936                if tarinfo.ischr() or tarinfo.isblk():
1937                    _safe_print("%10s" %
1938                            ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
1939                else:
1940                    _safe_print("%10d" % tarinfo.size)
1941                _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1942                            % time.localtime(tarinfo.mtime)[:6])
1943
1944            _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
1945
1946            if verbose:
1947                if tarinfo.issym():
1948                    _safe_print("-> " + tarinfo.linkname)
1949                if tarinfo.islnk():
1950                    _safe_print("link to " + tarinfo.linkname)
1951            print()
1952
1953    def add(self, name, arcname=None, recursive=True, *, filter=None):
1954        """Add the file `name' to the archive. `name' may be any type of file
1955           (directory, fifo, symbolic link, etc.). If given, `arcname'
1956           specifies an alternative name for the file in the archive.
1957           Directories are added recursively by default. This can be avoided by
1958           setting `recursive' to False. `filter' is a function
1959           that expects a TarInfo object argument and returns the changed
1960           TarInfo object, if it returns None the TarInfo object will be
1961           excluded from the archive.
1962        """
1963        self._check("awx")
1964
1965        if arcname is None:
1966            arcname = name
1967
1968        # Skip if somebody tries to archive the archive...
1969        if self.name is not None and os.path.abspath(name) == self.name:
1970            self._dbg(2, "tarfile: Skipped %r" % name)
1971            return
1972
1973        self._dbg(1, name)
1974
1975        # Create a TarInfo object from the file.
1976        tarinfo = self.gettarinfo(name, arcname)
1977
1978        if tarinfo is None:
1979            self._dbg(1, "tarfile: Unsupported type %r" % name)
1980            return
1981
1982        # Change or exclude the TarInfo object.
1983        if filter is not None:
1984            tarinfo = filter(tarinfo)
1985            if tarinfo is None:
1986                self._dbg(2, "tarfile: Excluded %r" % name)
1987                return
1988
1989        # Append the tar header and data to the archive.
1990        if tarinfo.isreg():
1991            with bltn_open(name, "rb") as f:
1992                self.addfile(tarinfo, f)
1993
1994        elif tarinfo.isdir():
1995            self.addfile(tarinfo)
1996            if recursive:
1997                for f in sorted(os.listdir(name)):
1998                    self.add(os.path.join(name, f), os.path.join(arcname, f),
1999                            recursive, filter=filter)
2000
2001        else:
2002            self.addfile(tarinfo)
2003
2004    def addfile(self, tarinfo, fileobj=None):
2005        """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2006           given, it should be a binary file, and tarinfo.size bytes are read
2007           from it and added to the archive. You can create TarInfo objects
2008           directly, or by using gettarinfo().
2009        """
2010        self._check("awx")
2011
2012        tarinfo = copy.copy(tarinfo)
2013
2014        buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2015        self.fileobj.write(buf)
2016        self.offset += len(buf)
2017        bufsize=self.copybufsize
2018        # If there's data to follow, append it.
2019        if fileobj is not None:
2020            copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
2021            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2022            if remainder > 0:
2023                self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2024                blocks += 1
2025            self.offset += blocks * BLOCKSIZE
2026
2027        self.members.append(tarinfo)
2028
2029    def extractall(self, path=".", members=None, *, numeric_owner=False):
2030        """Extract all members from the archive to the current working
2031           directory and set owner, modification time and permissions on
2032           directories afterwards. `path' specifies a different directory
2033           to extract to. `members' is optional and must be a subset of the
2034           list returned by getmembers(). If `numeric_owner` is True, only
2035           the numbers for user/group names are used and not the names.
2036        """
2037        directories = []
2038
2039        if members is None:
2040            members = self
2041
2042        for tarinfo in members:
2043            if tarinfo.isdir():
2044                # Extract directories with a safe mode.
2045                directories.append(tarinfo)
2046                tarinfo = copy.copy(tarinfo)
2047                tarinfo.mode = 0o700
2048            # Do not set_attrs directories, as we will do that further down
2049            self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
2050                         numeric_owner=numeric_owner)
2051
2052        # Reverse sort directories.
2053        directories.sort(key=lambda a: a.name)
2054        directories.reverse()
2055
2056        # Set correct owner, mtime and filemode on directories.
2057        for tarinfo in directories:
2058            dirpath = os.path.join(path, tarinfo.name)
2059            try:
2060                self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
2061                self.utime(tarinfo, dirpath)
2062                self.chmod(tarinfo, dirpath)
2063            except ExtractError as e:
2064                if self.errorlevel > 1:
2065                    raise
2066                else:
2067                    self._dbg(1, "tarfile: %s" % e)
2068
2069    def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
2070        """Extract a member from the archive to the current working directory,
2071           using its full name. Its file information is extracted as accurately
2072           as possible. `member' may be a filename or a TarInfo object. You can
2073           specify a different directory using `path'. File attributes (owner,
2074           mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2075           is True, only the numbers for user/group names are used and not
2076           the names.
2077        """
2078        self._check("r")
2079
2080        if isinstance(member, str):
2081            tarinfo = self.getmember(member)
2082        else:
2083            tarinfo = member
2084
2085        # Prepare the link target for makelink().
2086        if tarinfo.islnk():
2087            tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2088
2089        try:
2090            self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2091                                 set_attrs=set_attrs,
2092                                 numeric_owner=numeric_owner)
2093        except OSError as e:
2094            if self.errorlevel > 0:
2095                raise
2096            else:
2097                if e.filename is None:
2098                    self._dbg(1, "tarfile: %s" % e.strerror)
2099                else:
2100                    self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2101        except ExtractError as e:
2102            if self.errorlevel > 1:
2103                raise
2104            else:
2105                self._dbg(1, "tarfile: %s" % e)
2106
2107    def extractfile(self, member):
2108        """Extract a member from the archive as a file object. `member' may be
2109           a filename or a TarInfo object. If `member' is a regular file or
2110           a link, an io.BufferedReader object is returned. For all other
2111           existing members, None is returned. If `member' does not appear
2112           in the archive, KeyError is raised.
2113        """
2114        self._check("r")
2115
2116        if isinstance(member, str):
2117            tarinfo = self.getmember(member)
2118        else:
2119            tarinfo = member
2120
2121        if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2122            # Members with unknown types are treated as regular files.
2123            return self.fileobject(self, tarinfo)
2124
2125        elif tarinfo.islnk() or tarinfo.issym():
2126            if isinstance(self.fileobj, _Stream):
2127                # A small but ugly workaround for the case that someone tries
2128                # to extract a (sym)link as a file-object from a non-seekable
2129                # stream of tar blocks.
2130                raise StreamError("cannot extract (sym)link as file object")
2131            else:
2132                # A (sym)link's file object is its target's file object.
2133                return self.extractfile(self._find_link_target(tarinfo))
2134        else:
2135            # If there's no data associated with the member (directory, chrdev,
2136            # blkdev, etc.), return None instead of a file object.
2137            return None
2138
2139    def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2140                        numeric_owner=False):
2141        """Extract the TarInfo object tarinfo to a physical
2142           file called targetpath.
2143        """
2144        # Fetch the TarInfo object for the given name
2145        # and build the destination pathname, replacing
2146        # forward slashes to platform specific separators.
2147        targetpath = targetpath.rstrip("/")
2148        targetpath = targetpath.replace("/", os.sep)
2149
2150        # Create all upper directories.
2151        upperdirs = os.path.dirname(targetpath)
2152        if upperdirs and not os.path.exists(upperdirs):
2153            # Create directories that are not part of the archive with
2154            # default permissions.
2155            os.makedirs(upperdirs)
2156
2157        if tarinfo.islnk() or tarinfo.issym():
2158            self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2159        else:
2160            self._dbg(1, tarinfo.name)
2161
2162        if tarinfo.isreg():
2163            self.makefile(tarinfo, targetpath)
2164        elif tarinfo.isdir():
2165            self.makedir(tarinfo, targetpath)
2166        elif tarinfo.isfifo():
2167            self.makefifo(tarinfo, targetpath)
2168        elif tarinfo.ischr() or tarinfo.isblk():
2169            self.makedev(tarinfo, targetpath)
2170        elif tarinfo.islnk() or tarinfo.issym():
2171            self.makelink(tarinfo, targetpath)
2172        elif tarinfo.type not in SUPPORTED_TYPES:
2173            self.makeunknown(tarinfo, targetpath)
2174        else:
2175            self.makefile(tarinfo, targetpath)
2176
2177        if set_attrs:
2178            self.chown(tarinfo, targetpath, numeric_owner)
2179            if not tarinfo.issym():
2180                self.chmod(tarinfo, targetpath)
2181                self.utime(tarinfo, targetpath)
2182
2183    #--------------------------------------------------------------------------
2184    # Below are the different file methods. They are called via
2185    # _extract_member() when extract() is called. They can be replaced in a
2186    # subclass to implement other functionality.
2187
2188    def makedir(self, tarinfo, targetpath):
2189        """Make a directory called targetpath.
2190        """
2191        try:
2192            # Use a safe mode for the directory, the real mode is set
2193            # later in _extract_member().
2194            os.mkdir(targetpath, 0o700)
2195        except FileExistsError:
2196            pass
2197
2198    def makefile(self, tarinfo, targetpath):
2199        """Make a file called targetpath.
2200        """
2201        source = self.fileobj
2202        source.seek(tarinfo.offset_data)
2203        bufsize = self.copybufsize
2204        with bltn_open(targetpath, "wb") as target:
2205            if tarinfo.sparse is not None:
2206                for offset, size in tarinfo.sparse:
2207                    target.seek(offset)
2208                    copyfileobj(source, target, size, ReadError, bufsize)
2209                target.seek(tarinfo.size)
2210                target.truncate()
2211            else:
2212                copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
2213
2214    def makeunknown(self, tarinfo, targetpath):
2215        """Make a file from a TarInfo object with an unknown type
2216           at targetpath.
2217        """
2218        self.makefile(tarinfo, targetpath)
2219        self._dbg(1, "tarfile: Unknown file type %r, " \
2220                     "extracted as regular file." % tarinfo.type)
2221
2222    def makefifo(self, tarinfo, targetpath):
2223        """Make a fifo called targetpath.
2224        """
2225        if hasattr(os, "mkfifo"):
2226            os.mkfifo(targetpath)
2227        else:
2228            raise ExtractError("fifo not supported by system")
2229
2230    def makedev(self, tarinfo, targetpath):
2231        """Make a character or block device called targetpath.
2232        """
2233        if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2234            raise ExtractError("special devices not supported by system")
2235
2236        mode = tarinfo.mode
2237        if tarinfo.isblk():
2238            mode |= stat.S_IFBLK
2239        else:
2240            mode |= stat.S_IFCHR
2241
2242        os.mknod(targetpath, mode,
2243                 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2244
2245    def makelink(self, tarinfo, targetpath):
2246        """Make a (symbolic) link called targetpath. If it cannot be created
2247          (platform limitation), we try to make a copy of the referenced file
2248          instead of a link.
2249        """
2250        try:
2251            # For systems that support symbolic and hard links.
2252            if tarinfo.issym():
2253                if os.path.lexists(targetpath):
2254                    # Avoid FileExistsError on following os.symlink.
2255                    os.unlink(targetpath)
2256                os.symlink(tarinfo.linkname, targetpath)
2257            else:
2258                # See extract().
2259                if os.path.exists(tarinfo._link_target):
2260                    os.link(tarinfo._link_target, targetpath)
2261                else:
2262                    self._extract_member(self._find_link_target(tarinfo),
2263                                         targetpath)
2264        except symlink_exception:
2265            try:
2266                self._extract_member(self._find_link_target(tarinfo),
2267                                     targetpath)
2268            except KeyError:
2269                raise ExtractError("unable to resolve link inside archive") from None
2270
2271    def chown(self, tarinfo, targetpath, numeric_owner):
2272        """Set owner of targetpath according to tarinfo. If numeric_owner
2273           is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2274           is False, fall back to .gid/.uid when the search based on name
2275           fails.
2276        """
2277        if hasattr(os, "geteuid") and os.geteuid() == 0:
2278            # We have to be root to do so.
2279            g = tarinfo.gid
2280            u = tarinfo.uid
2281            if not numeric_owner:
2282                try:
2283                    if grp:
2284                        g = grp.getgrnam(tarinfo.gname)[2]
2285                except KeyError:
2286                    pass
2287                try:
2288                    if pwd:
2289                        u = pwd.getpwnam(tarinfo.uname)[2]
2290                except KeyError:
2291                    pass
2292            try:
2293                if tarinfo.issym() and hasattr(os, "lchown"):
2294                    os.lchown(targetpath, u, g)
2295                else:
2296                    os.chown(targetpath, u, g)
2297            except OSError as e:
2298                raise ExtractError("could not change owner") from e
2299
2300    def chmod(self, tarinfo, targetpath):
2301        """Set file permissions of targetpath according to tarinfo.
2302        """
2303        try:
2304            os.chmod(targetpath, tarinfo.mode)
2305        except OSError as e:
2306            raise ExtractError("could not change mode") from e
2307
2308    def utime(self, tarinfo, targetpath):
2309        """Set modification time of targetpath according to tarinfo.
2310        """
2311        if not hasattr(os, 'utime'):
2312            return
2313        try:
2314            os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2315        except OSError as e:
2316            raise ExtractError("could not change modification time") from e
2317
2318    #--------------------------------------------------------------------------
2319    def next(self):
2320        """Return the next member of the archive as a TarInfo object, when
2321           TarFile is opened for reading. Return None if there is no more
2322           available.
2323        """
2324        self._check("ra")
2325        if self.firstmember is not None:
2326            m = self.firstmember
2327            self.firstmember = None
2328            return m
2329
2330        # Advance the file pointer.
2331        if self.offset != self.fileobj.tell():
2332            self.fileobj.seek(self.offset - 1)
2333            if not self.fileobj.read(1):
2334                raise ReadError("unexpected end of data")
2335
2336        # Read the next block.
2337        tarinfo = None
2338        while True:
2339            try:
2340                tarinfo = self.tarinfo.fromtarfile(self)
2341            except EOFHeaderError as e:
2342                if self.ignore_zeros:
2343                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2344                    self.offset += BLOCKSIZE
2345                    continue
2346            except InvalidHeaderError as e:
2347                if self.ignore_zeros:
2348                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2349                    self.offset += BLOCKSIZE
2350                    continue
2351                elif self.offset == 0:
2352                    raise ReadError(str(e)) from None
2353            except EmptyHeaderError:
2354                if self.offset == 0:
2355                    raise ReadError("empty file") from None
2356            except TruncatedHeaderError as e:
2357                if self.offset == 0:
2358                    raise ReadError(str(e)) from None
2359            except SubsequentHeaderError as e:
2360                raise ReadError(str(e)) from None
2361            except Exception as e:
2362                try:
2363                    import zlib
2364                    if isinstance(e, zlib.error):
2365                        raise ReadError(f'zlib error: {e}') from None
2366                    else:
2367                        raise e
2368                except ImportError:
2369                    raise e
2370            break
2371
2372        if tarinfo is not None:
2373            self.members.append(tarinfo)
2374        else:
2375            self._loaded = True
2376
2377        return tarinfo
2378
2379    #--------------------------------------------------------------------------
2380    # Little helper methods:
2381
2382    def _getmember(self, name, tarinfo=None, normalize=False):
2383        """Find an archive member by name from bottom to top.
2384           If tarinfo is given, it is used as the starting point.
2385        """
2386        # Ensure that all members have been loaded.
2387        members = self.getmembers()
2388
2389        # Limit the member search list up to tarinfo.
2390        if tarinfo is not None:
2391            members = members[:members.index(tarinfo)]
2392
2393        if normalize:
2394            name = os.path.normpath(name)
2395
2396        for member in reversed(members):
2397            if normalize:
2398                member_name = os.path.normpath(member.name)
2399            else:
2400                member_name = member.name
2401
2402            if name == member_name:
2403                return member
2404
2405    def _load(self):
2406        """Read through the entire archive file and look for readable
2407           members.
2408        """
2409        while True:
2410            tarinfo = self.next()
2411            if tarinfo is None:
2412                break
2413        self._loaded = True
2414
2415    def _check(self, mode=None):
2416        """Check if TarFile is still open, and if the operation's mode
2417           corresponds to TarFile's mode.
2418        """
2419        if self.closed:
2420            raise OSError("%s is closed" % self.__class__.__name__)
2421        if mode is not None and self.mode not in mode:
2422            raise OSError("bad operation for mode %r" % self.mode)
2423
2424    def _find_link_target(self, tarinfo):
2425        """Find the target member of a symlink or hardlink member in the
2426           archive.
2427        """
2428        if tarinfo.issym():
2429            # Always search the entire archive.
2430            linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
2431            limit = None
2432        else:
2433            # Search the archive before the link, because a hard link is
2434            # just a reference to an already archived file.
2435            linkname = tarinfo.linkname
2436            limit = tarinfo
2437
2438        member = self._getmember(linkname, tarinfo=limit, normalize=True)
2439        if member is None:
2440            raise KeyError("linkname %r not found" % linkname)
2441        return member
2442
2443    def __iter__(self):
2444        """Provide an iterator object.
2445        """
2446        if self._loaded:
2447            yield from self.members
2448            return
2449
2450        # Yield items using TarFile's next() method.
2451        # When all members have been read, set TarFile as _loaded.
2452        index = 0
2453        # Fix for SF #1100429: Under rare circumstances it can
2454        # happen that getmembers() is called during iteration,
2455        # which will have already exhausted the next() method.
2456        if self.firstmember is not None:
2457            tarinfo = self.next()
2458            index += 1
2459            yield tarinfo
2460
2461        while True:
2462            if index < len(self.members):
2463                tarinfo = self.members[index]
2464            elif not self._loaded:
2465                tarinfo = self.next()
2466                if not tarinfo:
2467                    self._loaded = True
2468                    return
2469            else:
2470                return
2471            index += 1
2472            yield tarinfo
2473
2474    def _dbg(self, level, msg):
2475        """Write debugging output to sys.stderr.
2476        """
2477        if level <= self.debug:
2478            print(msg, file=sys.stderr)
2479
2480    def __enter__(self):
2481        self._check()
2482        return self
2483
2484    def __exit__(self, type, value, traceback):
2485        if type is None:
2486            self.close()
2487        else:
2488            # An exception occurred. We must not call close() because
2489            # it would try to write end-of-archive blocks and padding.
2490            if not self._extfileobj:
2491                self.fileobj.close()
2492            self.closed = True
2493
2494#--------------------
2495# exported functions
2496#--------------------
2497def is_tarfile(name):
2498    """Return True if name points to a tar archive that we
2499       are able to handle, else return False.
2500
2501       'name' should be a string, file, or file-like object.
2502    """
2503    try:
2504        if hasattr(name, "read"):
2505            t = open(fileobj=name)
2506        else:
2507            t = open(name)
2508        t.close()
2509        return True
2510    except TarError:
2511        return False
2512
2513open = TarFile.open
2514
2515
2516def main():
2517    import argparse
2518
2519    description = 'A simple command-line interface for tarfile module.'
2520    parser = argparse.ArgumentParser(description=description)
2521    parser.add_argument('-v', '--verbose', action='store_true', default=False,
2522                        help='Verbose output')
2523    group = parser.add_mutually_exclusive_group(required=True)
2524    group.add_argument('-l', '--list', metavar='<tarfile>',
2525                       help='Show listing of a tarfile')
2526    group.add_argument('-e', '--extract', nargs='+',
2527                       metavar=('<tarfile>', '<output_dir>'),
2528                       help='Extract tarfile into target dir')
2529    group.add_argument('-c', '--create', nargs='+',
2530                       metavar=('<name>', '<file>'),
2531                       help='Create tarfile from sources')
2532    group.add_argument('-t', '--test', metavar='<tarfile>',
2533                       help='Test if a tarfile is valid')
2534    args = parser.parse_args()
2535
2536    if args.test is not None:
2537        src = args.test
2538        if is_tarfile(src):
2539            with open(src, 'r') as tar:
2540                tar.getmembers()
2541                print(tar.getmembers(), file=sys.stderr)
2542            if args.verbose:
2543                print('{!r} is a tar archive.'.format(src))
2544        else:
2545            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2546
2547    elif args.list is not None:
2548        src = args.list
2549        if is_tarfile(src):
2550            with TarFile.open(src, 'r:*') as tf:
2551                tf.list(verbose=args.verbose)
2552        else:
2553            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2554
2555    elif args.extract is not None:
2556        if len(args.extract) == 1:
2557            src = args.extract[0]
2558            curdir = os.curdir
2559        elif len(args.extract) == 2:
2560            src, curdir = args.extract
2561        else:
2562            parser.exit(1, parser.format_help())
2563
2564        if is_tarfile(src):
2565            with TarFile.open(src, 'r:*') as tf:
2566                tf.extractall(path=curdir)
2567            if args.verbose:
2568                if curdir == '.':
2569                    msg = '{!r} file is extracted.'.format(src)
2570                else:
2571                    msg = ('{!r} file is extracted '
2572                           'into {!r} directory.').format(src, curdir)
2573                print(msg)
2574        else:
2575            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2576
2577    elif args.create is not None:
2578        tar_name = args.create.pop(0)
2579        _, ext = os.path.splitext(tar_name)
2580        compressions = {
2581            # gz
2582            '.gz': 'gz',
2583            '.tgz': 'gz',
2584            # xz
2585            '.xz': 'xz',
2586            '.txz': 'xz',
2587            # bz2
2588            '.bz2': 'bz2',
2589            '.tbz': 'bz2',
2590            '.tbz2': 'bz2',
2591            '.tb2': 'bz2',
2592        }
2593        tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2594        tar_files = args.create
2595
2596        with TarFile.open(tar_name, tar_mode) as tf:
2597            for file_name in tar_files:
2598                tf.add(file_name)
2599
2600        if args.verbose:
2601            print('{!r} file created.'.format(tar_name))
2602
2603if __name__ == '__main__':
2604    main()
2605