• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Interface to the libbzip2 compression library.
2
3This module provides a file interface, classes for incremental
4(de)compression, and functions for one-shot (de)compression.
5"""
6
7__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor",
8           "open", "compress", "decompress"]
9
10__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"
11
12from builtins import open as _builtin_open
13import io
14import os
15import _compression
16
17from _bz2 import BZ2Compressor, BZ2Decompressor
18
19
20# Value 0 no longer used
21_MODE_READ     = 1
22# Value 2 no longer used
23_MODE_WRITE    = 3
24
25
26class BZ2File(_compression.BaseStream):
27
28    """A file object providing transparent bzip2 (de)compression.
29
30    A BZ2File can act as a wrapper for an existing file object, or refer
31    directly to a named file on disk.
32
33    Note that BZ2File provides a *binary* file interface - data read is
34    returned as bytes, and data to be written should be given as bytes.
35    """
36
37    def __init__(self, filename, mode="r", *, compresslevel=9):
38        """Open a bzip2-compressed file.
39
40        If filename is a str, bytes, or PathLike object, it gives the
41        name of the file to be opened. Otherwise, it should be a file
42        object, which will be used to read or write the compressed data.
43
44        mode can be 'r' for reading (default), 'w' for (over)writing,
45        'x' for creating exclusively, or 'a' for appending. These can
46        equivalently be given as 'rb', 'wb', 'xb', and 'ab'.
47
48        If mode is 'w', 'x' or 'a', compresslevel can be a number between 1
49        and 9 specifying the level of compression: 1 produces the least
50        compression, and 9 (default) produces the most compression.
51
52        If mode is 'r', the input file may be the concatenation of
53        multiple compressed streams.
54        """
55        self._fp = None
56        self._closefp = False
57        self._mode = None
58
59        if not (1 <= compresslevel <= 9):
60            raise ValueError("compresslevel must be between 1 and 9")
61
62        if mode in ("", "r", "rb"):
63            mode = "rb"
64            mode_code = _MODE_READ
65        elif mode in ("w", "wb"):
66            mode = "wb"
67            mode_code = _MODE_WRITE
68            self._compressor = BZ2Compressor(compresslevel)
69        elif mode in ("x", "xb"):
70            mode = "xb"
71            mode_code = _MODE_WRITE
72            self._compressor = BZ2Compressor(compresslevel)
73        elif mode in ("a", "ab"):
74            mode = "ab"
75            mode_code = _MODE_WRITE
76            self._compressor = BZ2Compressor(compresslevel)
77        else:
78            raise ValueError("Invalid mode: %r" % (mode,))
79
80        if isinstance(filename, (str, bytes, os.PathLike)):
81            self._fp = _builtin_open(filename, mode)
82            self._closefp = True
83            self._mode = mode_code
84        elif hasattr(filename, "read") or hasattr(filename, "write"):
85            self._fp = filename
86            self._mode = mode_code
87        else:
88            raise TypeError("filename must be a str, bytes, file or PathLike object")
89
90        if self._mode == _MODE_READ:
91            raw = _compression.DecompressReader(self._fp,
92                BZ2Decompressor, trailing_error=OSError)
93            self._buffer = io.BufferedReader(raw)
94        else:
95            self._pos = 0
96
97    def close(self):
98        """Flush and close the file.
99
100        May be called more than once without error. Once the file is
101        closed, any other operation on it will raise a ValueError.
102        """
103        if self.closed:
104            return
105        try:
106            if self._mode == _MODE_READ:
107                self._buffer.close()
108            elif self._mode == _MODE_WRITE:
109                self._fp.write(self._compressor.flush())
110                self._compressor = None
111        finally:
112            try:
113                if self._closefp:
114                    self._fp.close()
115            finally:
116                self._fp = None
117                self._closefp = False
118                self._buffer = None
119
120    @property
121    def closed(self):
122        """True if this file is closed."""
123        return self._fp is None
124
125    @property
126    def name(self):
127        self._check_not_closed()
128        return self._fp.name
129
130    @property
131    def mode(self):
132        return 'wb' if self._mode == _MODE_WRITE else 'rb'
133
134    def fileno(self):
135        """Return the file descriptor for the underlying file."""
136        self._check_not_closed()
137        return self._fp.fileno()
138
139    def seekable(self):
140        """Return whether the file supports seeking."""
141        return self.readable() and self._buffer.seekable()
142
143    def readable(self):
144        """Return whether the file was opened for reading."""
145        self._check_not_closed()
146        return self._mode == _MODE_READ
147
148    def writable(self):
149        """Return whether the file was opened for writing."""
150        self._check_not_closed()
151        return self._mode == _MODE_WRITE
152
153    def peek(self, n=0):
154        """Return buffered data without advancing the file position.
155
156        Always returns at least one byte of data, unless at EOF.
157        The exact number of bytes returned is unspecified.
158        """
159        self._check_can_read()
160        # Relies on the undocumented fact that BufferedReader.peek()
161        # always returns at least one byte (except at EOF), independent
162        # of the value of n
163        return self._buffer.peek(n)
164
165    def read(self, size=-1):
166        """Read up to size uncompressed bytes from the file.
167
168        If size is negative or omitted, read until EOF is reached.
169        Returns b'' if the file is already at EOF.
170        """
171        self._check_can_read()
172        return self._buffer.read(size)
173
174    def read1(self, size=-1):
175        """Read up to size uncompressed bytes, while trying to avoid
176        making multiple reads from the underlying stream. Reads up to a
177        buffer's worth of data if size is negative.
178
179        Returns b'' if the file is at EOF.
180        """
181        self._check_can_read()
182        if size < 0:
183            size = io.DEFAULT_BUFFER_SIZE
184        return self._buffer.read1(size)
185
186    def readinto(self, b):
187        """Read bytes into b.
188
189        Returns the number of bytes read (0 for EOF).
190        """
191        self._check_can_read()
192        return self._buffer.readinto(b)
193
194    def readline(self, size=-1):
195        """Read a line of uncompressed bytes from the file.
196
197        The terminating newline (if present) is retained. If size is
198        non-negative, no more than size bytes will be read (in which
199        case the line may be incomplete). Returns b'' if already at EOF.
200        """
201        if not isinstance(size, int):
202            if not hasattr(size, "__index__"):
203                raise TypeError("Integer argument expected")
204            size = size.__index__()
205        self._check_can_read()
206        return self._buffer.readline(size)
207
208    def readlines(self, size=-1):
209        """Read a list of lines of uncompressed bytes from the file.
210
211        size can be specified to control the number of lines read: no
212        further lines will be read once the total size of the lines read
213        so far equals or exceeds size.
214        """
215        if not isinstance(size, int):
216            if not hasattr(size, "__index__"):
217                raise TypeError("Integer argument expected")
218            size = size.__index__()
219        self._check_can_read()
220        return self._buffer.readlines(size)
221
222    def write(self, data):
223        """Write a byte string to the file.
224
225        Returns the number of uncompressed bytes written, which is
226        always the length of data in bytes. Note that due to buffering,
227        the file on disk may not reflect the data written until close()
228        is called.
229        """
230        self._check_can_write()
231        if isinstance(data, (bytes, bytearray)):
232            length = len(data)
233        else:
234            # accept any data that supports the buffer protocol
235            data = memoryview(data)
236            length = data.nbytes
237
238        compressed = self._compressor.compress(data)
239        self._fp.write(compressed)
240        self._pos += length
241        return length
242
243    def writelines(self, seq):
244        """Write a sequence of byte strings to the file.
245
246        Returns the number of uncompressed bytes written.
247        seq can be any iterable yielding byte strings.
248
249        Line separators are not added between the written byte strings.
250        """
251        return _compression.BaseStream.writelines(self, seq)
252
253    def seek(self, offset, whence=io.SEEK_SET):
254        """Change the file position.
255
256        The new position is specified by offset, relative to the
257        position indicated by whence. Values for whence are:
258
259            0: start of stream (default); offset must not be negative
260            1: current stream position
261            2: end of stream; offset must not be positive
262
263        Returns the new file position.
264
265        Note that seeking is emulated, so depending on the parameters,
266        this operation may be extremely slow.
267        """
268        self._check_can_seek()
269        return self._buffer.seek(offset, whence)
270
271    def tell(self):
272        """Return the current file position."""
273        self._check_not_closed()
274        if self._mode == _MODE_READ:
275            return self._buffer.tell()
276        return self._pos
277
278
279def open(filename, mode="rb", compresslevel=9,
280         encoding=None, errors=None, newline=None):
281    """Open a bzip2-compressed file in binary or text mode.
282
283    The filename argument can be an actual filename (a str, bytes, or
284    PathLike object), or an existing file object to read from or write
285    to.
286
287    The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or
288    "ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode.
289    The default mode is "rb", and the default compresslevel is 9.
290
291    For binary mode, this function is equivalent to the BZ2File
292    constructor: BZ2File(filename, mode, compresslevel). In this case,
293    the encoding, errors and newline arguments must not be provided.
294
295    For text mode, a BZ2File object is created, and wrapped in an
296    io.TextIOWrapper instance with the specified encoding, error
297    handling behavior, and line ending(s).
298
299    """
300    if "t" in mode:
301        if "b" in mode:
302            raise ValueError("Invalid mode: %r" % (mode,))
303    else:
304        if encoding is not None:
305            raise ValueError("Argument 'encoding' not supported in binary mode")
306        if errors is not None:
307            raise ValueError("Argument 'errors' not supported in binary mode")
308        if newline is not None:
309            raise ValueError("Argument 'newline' not supported in binary mode")
310
311    bz_mode = mode.replace("t", "")
312    binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)
313
314    if "t" in mode:
315        encoding = io.text_encoding(encoding)
316        return io.TextIOWrapper(binary_file, encoding, errors, newline)
317    else:
318        return binary_file
319
320
321def compress(data, compresslevel=9):
322    """Compress a block of data.
323
324    compresslevel, if given, must be a number between 1 and 9.
325
326    For incremental compression, use a BZ2Compressor object instead.
327    """
328    comp = BZ2Compressor(compresslevel)
329    return comp.compress(data) + comp.flush()
330
331
332def decompress(data):
333    """Decompress a block of data.
334
335    For incremental decompression, use a BZ2Decompressor object instead.
336    """
337    results = []
338    while data:
339        decomp = BZ2Decompressor()
340        try:
341            res = decomp.decompress(data)
342        except OSError:
343            if results:
344                break  # Leftover data is not a valid bzip2 stream; ignore it.
345            else:
346                raise  # Error on the first iteration; bail out.
347        results.append(res)
348        if not decomp.eof:
349            raise ValueError("Compressed data ended before the "
350                             "end-of-stream marker was reached")
351        data = decomp.unused_data
352    return b"".join(results)
353