• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Interface to the libbzip2 compression library.
2
3This module provides a file interface, classes for incremental
4(de)compression, and functions for one-shot (de)compression.
5"""
6
7__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor",
8           "open", "compress", "decompress"]
9
10__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"
11
12from builtins import open as _builtin_open
13import io
14import os
15import warnings
16import _compression
17from threading import RLock
18
19from _bz2 import BZ2Compressor, BZ2Decompressor
20
21
22_MODE_CLOSED   = 0
23_MODE_READ     = 1
24# Value 2 no longer used
25_MODE_WRITE    = 3
26
27
28class BZ2File(_compression.BaseStream):
29
30    """A file object providing transparent bzip2 (de)compression.
31
32    A BZ2File can act as a wrapper for an existing file object, or refer
33    directly to a named file on disk.
34
35    Note that BZ2File provides a *binary* file interface - data read is
36    returned as bytes, and data to be written should be given as bytes.
37    """
38
39    def __init__(self, filename, mode="r", buffering=None, compresslevel=9):
40        """Open a bzip2-compressed file.
41
42        If filename is a str, bytes, or PathLike object, it gives the
43        name of the file to be opened. Otherwise, it should be a file
44        object, which will be used to read or write the compressed data.
45
46        mode can be 'r' for reading (default), 'w' for (over)writing,
47        'x' for creating exclusively, or 'a' for appending. These can
48        equivalently be given as 'rb', 'wb', 'xb', and 'ab'.
49
50        buffering is ignored. Its use is deprecated.
51
52        If mode is 'w', 'x' or 'a', compresslevel can be a number between 1
53        and 9 specifying the level of compression: 1 produces the least
54        compression, and 9 (default) produces the most compression.
55
56        If mode is 'r', the input file may be the concatenation of
57        multiple compressed streams.
58        """
59        # This lock must be recursive, so that BufferedIOBase's
60        # writelines() does not deadlock.
61        self._lock = RLock()
62        self._fp = None
63        self._closefp = False
64        self._mode = _MODE_CLOSED
65
66        if buffering is not None:
67            warnings.warn("Use of 'buffering' argument is deprecated",
68                          DeprecationWarning)
69
70        if not (1 <= compresslevel <= 9):
71            raise ValueError("compresslevel must be between 1 and 9")
72
73        if mode in ("", "r", "rb"):
74            mode = "rb"
75            mode_code = _MODE_READ
76        elif mode in ("w", "wb"):
77            mode = "wb"
78            mode_code = _MODE_WRITE
79            self._compressor = BZ2Compressor(compresslevel)
80        elif mode in ("x", "xb"):
81            mode = "xb"
82            mode_code = _MODE_WRITE
83            self._compressor = BZ2Compressor(compresslevel)
84        elif mode in ("a", "ab"):
85            mode = "ab"
86            mode_code = _MODE_WRITE
87            self._compressor = BZ2Compressor(compresslevel)
88        else:
89            raise ValueError("Invalid mode: %r" % (mode,))
90
91        if isinstance(filename, (str, bytes, os.PathLike)):
92            self._fp = _builtin_open(filename, mode)
93            self._closefp = True
94            self._mode = mode_code
95        elif hasattr(filename, "read") or hasattr(filename, "write"):
96            self._fp = filename
97            self._mode = mode_code
98        else:
99            raise TypeError("filename must be a str, bytes, file or PathLike object")
100
101        if self._mode == _MODE_READ:
102            raw = _compression.DecompressReader(self._fp,
103                BZ2Decompressor, trailing_error=OSError)
104            self._buffer = io.BufferedReader(raw)
105        else:
106            self._pos = 0
107
108    def close(self):
109        """Flush and close the file.
110
111        May be called more than once without error. Once the file is
112        closed, any other operation on it will raise a ValueError.
113        """
114        with self._lock:
115            if self._mode == _MODE_CLOSED:
116                return
117            try:
118                if self._mode == _MODE_READ:
119                    self._buffer.close()
120                elif self._mode == _MODE_WRITE:
121                    self._fp.write(self._compressor.flush())
122                    self._compressor = None
123            finally:
124                try:
125                    if self._closefp:
126                        self._fp.close()
127                finally:
128                    self._fp = None
129                    self._closefp = False
130                    self._mode = _MODE_CLOSED
131                    self._buffer = None
132
133    @property
134    def closed(self):
135        """True if this file is closed."""
136        return self._mode == _MODE_CLOSED
137
138    def fileno(self):
139        """Return the file descriptor for the underlying file."""
140        self._check_not_closed()
141        return self._fp.fileno()
142
143    def seekable(self):
144        """Return whether the file supports seeking."""
145        return self.readable() and self._buffer.seekable()
146
147    def readable(self):
148        """Return whether the file was opened for reading."""
149        self._check_not_closed()
150        return self._mode == _MODE_READ
151
152    def writable(self):
153        """Return whether the file was opened for writing."""
154        self._check_not_closed()
155        return self._mode == _MODE_WRITE
156
157    def peek(self, n=0):
158        """Return buffered data without advancing the file position.
159
160        Always returns at least one byte of data, unless at EOF.
161        The exact number of bytes returned is unspecified.
162        """
163        with self._lock:
164            self._check_can_read()
165            # Relies on the undocumented fact that BufferedReader.peek()
166            # always returns at least one byte (except at EOF), independent
167            # of the value of n
168            return self._buffer.peek(n)
169
170    def read(self, size=-1):
171        """Read up to size uncompressed bytes from the file.
172
173        If size is negative or omitted, read until EOF is reached.
174        Returns b'' if the file is already at EOF.
175        """
176        with self._lock:
177            self._check_can_read()
178            return self._buffer.read(size)
179
180    def read1(self, size=-1):
181        """Read up to size uncompressed bytes, while trying to avoid
182        making multiple reads from the underlying stream. Reads up to a
183        buffer's worth of data if size is negative.
184
185        Returns b'' if the file is at EOF.
186        """
187        with self._lock:
188            self._check_can_read()
189            if size < 0:
190                size = io.DEFAULT_BUFFER_SIZE
191            return self._buffer.read1(size)
192
193    def readinto(self, b):
194        """Read bytes into b.
195
196        Returns the number of bytes read (0 for EOF).
197        """
198        with self._lock:
199            self._check_can_read()
200            return self._buffer.readinto(b)
201
202    def readline(self, size=-1):
203        """Read a line of uncompressed bytes from the file.
204
205        The terminating newline (if present) is retained. If size is
206        non-negative, no more than size bytes will be read (in which
207        case the line may be incomplete). Returns b'' if already at EOF.
208        """
209        if not isinstance(size, int):
210            if not hasattr(size, "__index__"):
211                raise TypeError("Integer argument expected")
212            size = size.__index__()
213        with self._lock:
214            self._check_can_read()
215            return self._buffer.readline(size)
216
217    def readlines(self, size=-1):
218        """Read a list of lines of uncompressed bytes from the file.
219
220        size can be specified to control the number of lines read: no
221        further lines will be read once the total size of the lines read
222        so far equals or exceeds size.
223        """
224        if not isinstance(size, int):
225            if not hasattr(size, "__index__"):
226                raise TypeError("Integer argument expected")
227            size = size.__index__()
228        with self._lock:
229            self._check_can_read()
230            return self._buffer.readlines(size)
231
232    def write(self, data):
233        """Write a byte string to the file.
234
235        Returns the number of uncompressed bytes written, which is
236        always len(data). Note that due to buffering, the file on disk
237        may not reflect the data written until close() is called.
238        """
239        with self._lock:
240            self._check_can_write()
241            compressed = self._compressor.compress(data)
242            self._fp.write(compressed)
243            self._pos += len(data)
244            return len(data)
245
246    def writelines(self, seq):
247        """Write a sequence of byte strings to the file.
248
249        Returns the number of uncompressed bytes written.
250        seq can be any iterable yielding byte strings.
251
252        Line separators are not added between the written byte strings.
253        """
254        with self._lock:
255            return _compression.BaseStream.writelines(self, seq)
256
257    def seek(self, offset, whence=io.SEEK_SET):
258        """Change the file position.
259
260        The new position is specified by offset, relative to the
261        position indicated by whence. Values for whence are:
262
263            0: start of stream (default); offset must not be negative
264            1: current stream position
265            2: end of stream; offset must not be positive
266
267        Returns the new file position.
268
269        Note that seeking is emulated, so depending on the parameters,
270        this operation may be extremely slow.
271        """
272        with self._lock:
273            self._check_can_seek()
274            return self._buffer.seek(offset, whence)
275
276    def tell(self):
277        """Return the current file position."""
278        with self._lock:
279            self._check_not_closed()
280            if self._mode == _MODE_READ:
281                return self._buffer.tell()
282            return self._pos
283
284
285def open(filename, mode="rb", compresslevel=9,
286         encoding=None, errors=None, newline=None):
287    """Open a bzip2-compressed file in binary or text mode.
288
289    The filename argument can be an actual filename (a str, bytes, or
290    PathLike object), or an existing file object to read from or write
291    to.
292
293    The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or
294    "ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode.
295    The default mode is "rb", and the default compresslevel is 9.
296
297    For binary mode, this function is equivalent to the BZ2File
298    constructor: BZ2File(filename, mode, compresslevel). In this case,
299    the encoding, errors and newline arguments must not be provided.
300
301    For text mode, a BZ2File object is created, and wrapped in an
302    io.TextIOWrapper instance with the specified encoding, error
303    handling behavior, and line ending(s).
304
305    """
306    if "t" in mode:
307        if "b" in mode:
308            raise ValueError("Invalid mode: %r" % (mode,))
309    else:
310        if encoding is not None:
311            raise ValueError("Argument 'encoding' not supported in binary mode")
312        if errors is not None:
313            raise ValueError("Argument 'errors' not supported in binary mode")
314        if newline is not None:
315            raise ValueError("Argument 'newline' not supported in binary mode")
316
317    bz_mode = mode.replace("t", "")
318    binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)
319
320    if "t" in mode:
321        return io.TextIOWrapper(binary_file, encoding, errors, newline)
322    else:
323        return binary_file
324
325
326def compress(data, compresslevel=9):
327    """Compress a block of data.
328
329    compresslevel, if given, must be a number between 1 and 9.
330
331    For incremental compression, use a BZ2Compressor object instead.
332    """
333    comp = BZ2Compressor(compresslevel)
334    return comp.compress(data) + comp.flush()
335
336
337def decompress(data):
338    """Decompress a block of data.
339
340    For incremental decompression, use a BZ2Decompressor object instead.
341    """
342    results = []
343    while data:
344        decomp = BZ2Decompressor()
345        try:
346            res = decomp.decompress(data)
347        except OSError:
348            if results:
349                break  # Leftover data is not a valid bzip2 stream; ignore it.
350            else:
351                raise  # Error on the first iteration; bail out.
352        results.append(res)
353        if not decomp.eof:
354            raise ValueError("Compressed data ended before the "
355                             "end-of-stream marker was reached")
356        data = decomp.unused_data
357    return b"".join(results)
358