• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Interface to the liblzma compression library.
2
3This module provides a class for reading and writing compressed files,
4classes for incremental (de)compression, and convenience functions for
5one-shot (de)compression.
6
7These classes and functions support both the XZ and legacy LZMA
8container formats, as well as raw compressed data streams.
9"""
10
11__all__ = [
12    "CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256",
13    "CHECK_ID_MAX", "CHECK_UNKNOWN",
14    "FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64",
15    "FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC",
16    "FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW",
17    "MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4",
18    "MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME",
19
20    "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError",
21    "open", "compress", "decompress", "is_check_supported",
22]
23
24import builtins
25import io
26import os
27from _lzma import *
28from _lzma import _encode_filter_properties, _decode_filter_properties
29import _compression
30
31
32# Value 0 no longer used
33_MODE_READ     = 1
34# Value 2 no longer used
35_MODE_WRITE    = 3
36
37
38class LZMAFile(_compression.BaseStream):
39
40    """A file object providing transparent LZMA (de)compression.
41
42    An LZMAFile can act as a wrapper for an existing file object, or
43    refer directly to a named file on disk.
44
45    Note that LZMAFile provides a *binary* file interface - data read
46    is returned as bytes, and data to be written must be given as bytes.
47    """
48
49    def __init__(self, filename=None, mode="r", *,
50                 format=None, check=-1, preset=None, filters=None):
51        """Open an LZMA-compressed file in binary mode.
52
53        filename can be either an actual file name (given as a str,
54        bytes, or PathLike object), in which case the named file is
55        opened, or it can be an existing file object to read from or
56        write to.
57
58        mode can be "r" for reading (default), "w" for (over)writing,
59        "x" for creating exclusively, or "a" for appending. These can
60        equivalently be given as "rb", "wb", "xb" and "ab" respectively.
61
62        format specifies the container format to use for the file.
63        If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the
64        default is FORMAT_XZ.
65
66        check specifies the integrity check to use. This argument can
67        only be used when opening a file for writing. For FORMAT_XZ,
68        the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not
69        support integrity checks - for these formats, check must be
70        omitted, or be CHECK_NONE.
71
72        When opening a file for reading, the *preset* argument is not
73        meaningful, and should be omitted. The *filters* argument should
74        also be omitted, except when format is FORMAT_RAW (in which case
75        it is required).
76
77        When opening a file for writing, the settings used by the
78        compressor can be specified either as a preset compression
79        level (with the *preset* argument), or in detail as a custom
80        filter chain (with the *filters* argument). For FORMAT_XZ and
81        FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
82        level. For FORMAT_RAW, the caller must always specify a filter
83        chain; the raw compressor does not support preset compression
84        levels.
85
86        preset (if provided) should be an integer in the range 0-9,
87        optionally OR-ed with the constant PRESET_EXTREME.
88
89        filters (if provided) should be a sequence of dicts. Each dict
90        should have an entry for "id" indicating ID of the filter, plus
91        additional entries for options to the filter.
92        """
93        self._fp = None
94        self._closefp = False
95        self._mode = None
96
97        if mode in ("r", "rb"):
98            if check != -1:
99                raise ValueError("Cannot specify an integrity check "
100                                 "when opening a file for reading")
101            if preset is not None:
102                raise ValueError("Cannot specify a preset compression "
103                                 "level when opening a file for reading")
104            if format is None:
105                format = FORMAT_AUTO
106            mode_code = _MODE_READ
107        elif mode in ("w", "wb", "a", "ab", "x", "xb"):
108            if format is None:
109                format = FORMAT_XZ
110            mode_code = _MODE_WRITE
111            self._compressor = LZMACompressor(format=format, check=check,
112                                              preset=preset, filters=filters)
113            self._pos = 0
114        else:
115            raise ValueError("Invalid mode: {!r}".format(mode))
116
117        if isinstance(filename, (str, bytes, os.PathLike)):
118            if "b" not in mode:
119                mode += "b"
120            self._fp = builtins.open(filename, mode)
121            self._closefp = True
122            self._mode = mode_code
123        elif hasattr(filename, "read") or hasattr(filename, "write"):
124            self._fp = filename
125            self._mode = mode_code
126        else:
127            raise TypeError("filename must be a str, bytes, file or PathLike object")
128
129        if self._mode == _MODE_READ:
130            raw = _compression.DecompressReader(self._fp, LZMADecompressor,
131                trailing_error=LZMAError, format=format, filters=filters)
132            self._buffer = io.BufferedReader(raw)
133
134    def close(self):
135        """Flush and close the file.
136
137        May be called more than once without error. Once the file is
138        closed, any other operation on it will raise a ValueError.
139        """
140        if self.closed:
141            return
142        try:
143            if self._mode == _MODE_READ:
144                self._buffer.close()
145                self._buffer = None
146            elif self._mode == _MODE_WRITE:
147                self._fp.write(self._compressor.flush())
148                self._compressor = None
149        finally:
150            try:
151                if self._closefp:
152                    self._fp.close()
153            finally:
154                self._fp = None
155                self._closefp = False
156
157    @property
158    def closed(self):
159        """True if this file is closed."""
160        return self._fp is None
161
162    @property
163    def name(self):
164        self._check_not_closed()
165        return self._fp.name
166
167    @property
168    def mode(self):
169        return 'wb' if self._mode == _MODE_WRITE else 'rb'
170
171    def fileno(self):
172        """Return the file descriptor for the underlying file."""
173        self._check_not_closed()
174        return self._fp.fileno()
175
176    def seekable(self):
177        """Return whether the file supports seeking."""
178        return self.readable() and self._buffer.seekable()
179
180    def readable(self):
181        """Return whether the file was opened for reading."""
182        self._check_not_closed()
183        return self._mode == _MODE_READ
184
185    def writable(self):
186        """Return whether the file was opened for writing."""
187        self._check_not_closed()
188        return self._mode == _MODE_WRITE
189
190    def peek(self, size=-1):
191        """Return buffered data without advancing the file position.
192
193        Always returns at least one byte of data, unless at EOF.
194        The exact number of bytes returned is unspecified.
195        """
196        self._check_can_read()
197        # Relies on the undocumented fact that BufferedReader.peek() always
198        # returns at least one byte (except at EOF)
199        return self._buffer.peek(size)
200
201    def read(self, size=-1):
202        """Read up to size uncompressed bytes from the file.
203
204        If size is negative or omitted, read until EOF is reached.
205        Returns b"" if the file is already at EOF.
206        """
207        self._check_can_read()
208        return self._buffer.read(size)
209
210    def read1(self, size=-1):
211        """Read up to size uncompressed bytes, while trying to avoid
212        making multiple reads from the underlying stream. Reads up to a
213        buffer's worth of data if size is negative.
214
215        Returns b"" if the file is at EOF.
216        """
217        self._check_can_read()
218        if size < 0:
219            size = io.DEFAULT_BUFFER_SIZE
220        return self._buffer.read1(size)
221
222    def readline(self, size=-1):
223        """Read a line of uncompressed bytes from the file.
224
225        The terminating newline (if present) is retained. If size is
226        non-negative, no more than size bytes will be read (in which
227        case the line may be incomplete). Returns b'' if already at EOF.
228        """
229        self._check_can_read()
230        return self._buffer.readline(size)
231
232    def write(self, data):
233        """Write a bytes object to the file.
234
235        Returns the number of uncompressed bytes written, which is
236        always the length of data in bytes. Note that due to buffering,
237        the file on disk may not reflect the data written until close()
238        is called.
239        """
240        self._check_can_write()
241        if isinstance(data, (bytes, bytearray)):
242            length = len(data)
243        else:
244            # accept any data that supports the buffer protocol
245            data = memoryview(data)
246            length = data.nbytes
247
248        compressed = self._compressor.compress(data)
249        self._fp.write(compressed)
250        self._pos += length
251        return length
252
253    def seek(self, offset, whence=io.SEEK_SET):
254        """Change the file position.
255
256        The new position is specified by offset, relative to the
257        position indicated by whence. Possible values for whence are:
258
259            0: start of stream (default): offset must not be negative
260            1: current stream position
261            2: end of stream; offset must not be positive
262
263        Returns the new file position.
264
265        Note that seeking is emulated, so depending on the parameters,
266        this operation may be extremely slow.
267        """
268        self._check_can_seek()
269        return self._buffer.seek(offset, whence)
270
271    def tell(self):
272        """Return the current file position."""
273        self._check_not_closed()
274        if self._mode == _MODE_READ:
275            return self._buffer.tell()
276        return self._pos
277
278
279def open(filename, mode="rb", *,
280         format=None, check=-1, preset=None, filters=None,
281         encoding=None, errors=None, newline=None):
282    """Open an LZMA-compressed file in binary or text mode.
283
284    filename can be either an actual file name (given as a str, bytes,
285    or PathLike object), in which case the named file is opened, or it
286    can be an existing file object to read from or write to.
287
288    The mode argument can be "r", "rb" (default), "w", "wb", "x", "xb",
289    "a", or "ab" for binary mode, or "rt", "wt", "xt", or "at" for text
290    mode.
291
292    The format, check, preset and filters arguments specify the
293    compression settings, as for LZMACompressor, LZMADecompressor and
294    LZMAFile.
295
296    For binary mode, this function is equivalent to the LZMAFile
297    constructor: LZMAFile(filename, mode, ...). In this case, the
298    encoding, errors and newline arguments must not be provided.
299
300    For text mode, an LZMAFile object is created, and wrapped in an
301    io.TextIOWrapper instance with the specified encoding, error
302    handling behavior, and line ending(s).
303
304    """
305    if "t" in mode:
306        if "b" in mode:
307            raise ValueError("Invalid mode: %r" % (mode,))
308    else:
309        if encoding is not None:
310            raise ValueError("Argument 'encoding' not supported in binary mode")
311        if errors is not None:
312            raise ValueError("Argument 'errors' not supported in binary mode")
313        if newline is not None:
314            raise ValueError("Argument 'newline' not supported in binary mode")
315
316    lz_mode = mode.replace("t", "")
317    binary_file = LZMAFile(filename, lz_mode, format=format, check=check,
318                           preset=preset, filters=filters)
319
320    if "t" in mode:
321        encoding = io.text_encoding(encoding)
322        return io.TextIOWrapper(binary_file, encoding, errors, newline)
323    else:
324        return binary_file
325
326
327def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None):
328    """Compress a block of data.
329
330    Refer to LZMACompressor's docstring for a description of the
331    optional arguments *format*, *check*, *preset* and *filters*.
332
333    For incremental compression, use an LZMACompressor instead.
334    """
335    comp = LZMACompressor(format, check, preset, filters)
336    return comp.compress(data) + comp.flush()
337
338
339def decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None):
340    """Decompress a block of data.
341
342    Refer to LZMADecompressor's docstring for a description of the
343    optional arguments *format*, *check* and *filters*.
344
345    For incremental decompression, use an LZMADecompressor instead.
346    """
347    results = []
348    while True:
349        decomp = LZMADecompressor(format, memlimit, filters)
350        try:
351            res = decomp.decompress(data)
352        except LZMAError:
353            if results:
354                break  # Leftover data is not a valid LZMA/XZ stream; ignore it.
355            else:
356                raise  # Error on the first iteration; bail out.
357        results.append(res)
358        if not decomp.eof:
359            raise LZMAError("Compressed data ended before the "
360                            "end-of-stream marker was reached")
361        data = decomp.unused_data
362        if not data:
363            break
364    return b"".join(results)
365