• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Interface to the libbzip2 compression library.
2
3This module provides a file interface, classes for incremental
4(de)compression, and functions for one-shot (de)compression.
5"""
6
7__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor",
8           "open", "compress", "decompress"]
9
10__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"
11
12from builtins import open as _builtin_open
13import io
14import os
15import _compression
16from threading import RLock
17
18from _bz2 import BZ2Compressor, BZ2Decompressor
19
20
21_MODE_CLOSED   = 0
22_MODE_READ     = 1
23# Value 2 no longer used
24_MODE_WRITE    = 3
25
26
27class BZ2File(_compression.BaseStream):
28
29    """A file object providing transparent bzip2 (de)compression.
30
31    A BZ2File can act as a wrapper for an existing file object, or refer
32    directly to a named file on disk.
33
34    Note that BZ2File provides a *binary* file interface - data read is
35    returned as bytes, and data to be written should be given as bytes.
36    """
37
38    def __init__(self, filename, mode="r", *, compresslevel=9):
39        """Open a bzip2-compressed file.
40
41        If filename is a str, bytes, or PathLike object, it gives the
42        name of the file to be opened. Otherwise, it should be a file
43        object, which will be used to read or write the compressed data.
44
45        mode can be 'r' for reading (default), 'w' for (over)writing,
46        'x' for creating exclusively, or 'a' for appending. These can
47        equivalently be given as 'rb', 'wb', 'xb', and 'ab'.
48
49        If mode is 'w', 'x' or 'a', compresslevel can be a number between 1
50        and 9 specifying the level of compression: 1 produces the least
51        compression, and 9 (default) produces the most compression.
52
53        If mode is 'r', the input file may be the concatenation of
54        multiple compressed streams.
55        """
56        # This lock must be recursive, so that BufferedIOBase's
57        # writelines() does not deadlock.
58        self._lock = RLock()
59        self._fp = None
60        self._closefp = False
61        self._mode = _MODE_CLOSED
62
63        if not (1 <= compresslevel <= 9):
64            raise ValueError("compresslevel must be between 1 and 9")
65
66        if mode in ("", "r", "rb"):
67            mode = "rb"
68            mode_code = _MODE_READ
69        elif mode in ("w", "wb"):
70            mode = "wb"
71            mode_code = _MODE_WRITE
72            self._compressor = BZ2Compressor(compresslevel)
73        elif mode in ("x", "xb"):
74            mode = "xb"
75            mode_code = _MODE_WRITE
76            self._compressor = BZ2Compressor(compresslevel)
77        elif mode in ("a", "ab"):
78            mode = "ab"
79            mode_code = _MODE_WRITE
80            self._compressor = BZ2Compressor(compresslevel)
81        else:
82            raise ValueError("Invalid mode: %r" % (mode,))
83
84        if isinstance(filename, (str, bytes, os.PathLike)):
85            self._fp = _builtin_open(filename, mode)
86            self._closefp = True
87            self._mode = mode_code
88        elif hasattr(filename, "read") or hasattr(filename, "write"):
89            self._fp = filename
90            self._mode = mode_code
91        else:
92            raise TypeError("filename must be a str, bytes, file or PathLike object")
93
94        if self._mode == _MODE_READ:
95            raw = _compression.DecompressReader(self._fp,
96                BZ2Decompressor, trailing_error=OSError)
97            self._buffer = io.BufferedReader(raw)
98        else:
99            self._pos = 0
100
101    def close(self):
102        """Flush and close the file.
103
104        May be called more than once without error. Once the file is
105        closed, any other operation on it will raise a ValueError.
106        """
107        with self._lock:
108            if self._mode == _MODE_CLOSED:
109                return
110            try:
111                if self._mode == _MODE_READ:
112                    self._buffer.close()
113                elif self._mode == _MODE_WRITE:
114                    self._fp.write(self._compressor.flush())
115                    self._compressor = None
116            finally:
117                try:
118                    if self._closefp:
119                        self._fp.close()
120                finally:
121                    self._fp = None
122                    self._closefp = False
123                    self._mode = _MODE_CLOSED
124                    self._buffer = None
125
126    @property
127    def closed(self):
128        """True if this file is closed."""
129        return self._mode == _MODE_CLOSED
130
131    def fileno(self):
132        """Return the file descriptor for the underlying file."""
133        self._check_not_closed()
134        return self._fp.fileno()
135
136    def seekable(self):
137        """Return whether the file supports seeking."""
138        return self.readable() and self._buffer.seekable()
139
140    def readable(self):
141        """Return whether the file was opened for reading."""
142        self._check_not_closed()
143        return self._mode == _MODE_READ
144
145    def writable(self):
146        """Return whether the file was opened for writing."""
147        self._check_not_closed()
148        return self._mode == _MODE_WRITE
149
150    def peek(self, n=0):
151        """Return buffered data without advancing the file position.
152
153        Always returns at least one byte of data, unless at EOF.
154        The exact number of bytes returned is unspecified.
155        """
156        with self._lock:
157            self._check_can_read()
158            # Relies on the undocumented fact that BufferedReader.peek()
159            # always returns at least one byte (except at EOF), independent
160            # of the value of n
161            return self._buffer.peek(n)
162
163    def read(self, size=-1):
164        """Read up to size uncompressed bytes from the file.
165
166        If size is negative or omitted, read until EOF is reached.
167        Returns b'' if the file is already at EOF.
168        """
169        with self._lock:
170            self._check_can_read()
171            return self._buffer.read(size)
172
173    def read1(self, size=-1):
174        """Read up to size uncompressed bytes, while trying to avoid
175        making multiple reads from the underlying stream. Reads up to a
176        buffer's worth of data if size is negative.
177
178        Returns b'' if the file is at EOF.
179        """
180        with self._lock:
181            self._check_can_read()
182            if size < 0:
183                size = io.DEFAULT_BUFFER_SIZE
184            return self._buffer.read1(size)
185
186    def readinto(self, b):
187        """Read bytes into b.
188
189        Returns the number of bytes read (0 for EOF).
190        """
191        with self._lock:
192            self._check_can_read()
193            return self._buffer.readinto(b)
194
195    def readline(self, size=-1):
196        """Read a line of uncompressed bytes from the file.
197
198        The terminating newline (if present) is retained. If size is
199        non-negative, no more than size bytes will be read (in which
200        case the line may be incomplete). Returns b'' if already at EOF.
201        """
202        if not isinstance(size, int):
203            if not hasattr(size, "__index__"):
204                raise TypeError("Integer argument expected")
205            size = size.__index__()
206        with self._lock:
207            self._check_can_read()
208            return self._buffer.readline(size)
209
210    def readlines(self, size=-1):
211        """Read a list of lines of uncompressed bytes from the file.
212
213        size can be specified to control the number of lines read: no
214        further lines will be read once the total size of the lines read
215        so far equals or exceeds size.
216        """
217        if not isinstance(size, int):
218            if not hasattr(size, "__index__"):
219                raise TypeError("Integer argument expected")
220            size = size.__index__()
221        with self._lock:
222            self._check_can_read()
223            return self._buffer.readlines(size)
224
225    def write(self, data):
226        """Write a byte string to the file.
227
228        Returns the number of uncompressed bytes written, which is
229        always len(data). Note that due to buffering, the file on disk
230        may not reflect the data written until close() is called.
231        """
232        with self._lock:
233            self._check_can_write()
234            compressed = self._compressor.compress(data)
235            self._fp.write(compressed)
236            self._pos += len(data)
237            return len(data)
238
239    def writelines(self, seq):
240        """Write a sequence of byte strings to the file.
241
242        Returns the number of uncompressed bytes written.
243        seq can be any iterable yielding byte strings.
244
245        Line separators are not added between the written byte strings.
246        """
247        with self._lock:
248            return _compression.BaseStream.writelines(self, seq)
249
250    def seek(self, offset, whence=io.SEEK_SET):
251        """Change the file position.
252
253        The new position is specified by offset, relative to the
254        position indicated by whence. Values for whence are:
255
256            0: start of stream (default); offset must not be negative
257            1: current stream position
258            2: end of stream; offset must not be positive
259
260        Returns the new file position.
261
262        Note that seeking is emulated, so depending on the parameters,
263        this operation may be extremely slow.
264        """
265        with self._lock:
266            self._check_can_seek()
267            return self._buffer.seek(offset, whence)
268
269    def tell(self):
270        """Return the current file position."""
271        with self._lock:
272            self._check_not_closed()
273            if self._mode == _MODE_READ:
274                return self._buffer.tell()
275            return self._pos
276
277
278def open(filename, mode="rb", compresslevel=9,
279         encoding=None, errors=None, newline=None):
280    """Open a bzip2-compressed file in binary or text mode.
281
282    The filename argument can be an actual filename (a str, bytes, or
283    PathLike object), or an existing file object to read from or write
284    to.
285
286    The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or
287    "ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode.
288    The default mode is "rb", and the default compresslevel is 9.
289
290    For binary mode, this function is equivalent to the BZ2File
291    constructor: BZ2File(filename, mode, compresslevel). In this case,
292    the encoding, errors and newline arguments must not be provided.
293
294    For text mode, a BZ2File object is created, and wrapped in an
295    io.TextIOWrapper instance with the specified encoding, error
296    handling behavior, and line ending(s).
297
298    """
299    if "t" in mode:
300        if "b" in mode:
301            raise ValueError("Invalid mode: %r" % (mode,))
302    else:
303        if encoding is not None:
304            raise ValueError("Argument 'encoding' not supported in binary mode")
305        if errors is not None:
306            raise ValueError("Argument 'errors' not supported in binary mode")
307        if newline is not None:
308            raise ValueError("Argument 'newline' not supported in binary mode")
309
310    bz_mode = mode.replace("t", "")
311    binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)
312
313    if "t" in mode:
314        return io.TextIOWrapper(binary_file, encoding, errors, newline)
315    else:
316        return binary_file
317
318
319def compress(data, compresslevel=9):
320    """Compress a block of data.
321
322    compresslevel, if given, must be a number between 1 and 9.
323
324    For incremental compression, use a BZ2Compressor object instead.
325    """
326    comp = BZ2Compressor(compresslevel)
327    return comp.compress(data) + comp.flush()
328
329
330def decompress(data):
331    """Decompress a block of data.
332
333    For incremental decompression, use a BZ2Decompressor object instead.
334    """
335    results = []
336    while data:
337        decomp = BZ2Decompressor()
338        try:
339            res = decomp.decompress(data)
340        except OSError:
341            if results:
342                break  # Leftover data is not a valid bzip2 stream; ignore it.
343            else:
344                raise  # Error on the first iteration; bail out.
345        results.append(res)
346        if not decomp.eof:
347            raise ValueError("Compressed data ended before the "
348                             "end-of-stream marker was reached")
349        data = decomp.unused_data
350    return b"".join(results)
351