• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* _bz2 - Low-level Python interface to libbzip2. */
2 
3 #define PY_SSIZE_T_CLEAN
4 
5 #include "Python.h"
6 #include "structmember.h"         // PyMemberDef
7 
8 #include <bzlib.h>
9 #include <stdio.h>
10 
11 // Blocks output buffer wrappers
12 #include "pycore_blocks_output_buffer.h"
13 
14 #if OUTPUT_BUFFER_MAX_BLOCK_SIZE > UINT32_MAX
15     #error "The maximum block size accepted by libbzip2 is UINT32_MAX."
16 #endif
17 
18 /* On success, return value >= 0
19    On failure, return -1 */
20 static inline Py_ssize_t
OutputBuffer_InitAndGrow(_BlocksOutputBuffer * buffer,Py_ssize_t max_length,char ** next_out,uint32_t * avail_out)21 OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
22                          char **next_out, uint32_t *avail_out)
23 {
24     Py_ssize_t allocated;
25 
26     allocated = _BlocksOutputBuffer_InitAndGrow(
27                     buffer, max_length, (void**) next_out);
28     *avail_out = (uint32_t) allocated;
29     return allocated;
30 }
31 
32 /* On success, return value >= 0
33    On failure, return -1 */
34 static inline Py_ssize_t
OutputBuffer_Grow(_BlocksOutputBuffer * buffer,char ** next_out,uint32_t * avail_out)35 OutputBuffer_Grow(_BlocksOutputBuffer *buffer,
36                   char **next_out, uint32_t *avail_out)
37 {
38     Py_ssize_t allocated;
39 
40     allocated = _BlocksOutputBuffer_Grow(
41                     buffer, (void**) next_out, (Py_ssize_t) *avail_out);
42     *avail_out = (uint32_t) allocated;
43     return allocated;
44 }
45 
46 static inline Py_ssize_t
OutputBuffer_GetDataSize(_BlocksOutputBuffer * buffer,uint32_t avail_out)47 OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, uint32_t avail_out)
48 {
49     return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
50 }
51 
52 static inline PyObject *
OutputBuffer_Finish(_BlocksOutputBuffer * buffer,uint32_t avail_out)53 OutputBuffer_Finish(_BlocksOutputBuffer *buffer, uint32_t avail_out)
54 {
55     return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
56 }
57 
58 static inline void
OutputBuffer_OnError(_BlocksOutputBuffer * buffer)59 OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
60 {
61     _BlocksOutputBuffer_OnError(buffer);
62 }
63 
64 
65 #ifndef BZ_CONFIG_ERROR
66 #define BZ2_bzCompress bzCompress
67 #define BZ2_bzCompressInit bzCompressInit
68 #define BZ2_bzCompressEnd bzCompressEnd
69 #define BZ2_bzDecompress bzDecompress
70 #define BZ2_bzDecompressInit bzDecompressInit
71 #define BZ2_bzDecompressEnd bzDecompressEnd
72 #endif  /* ! BZ_CONFIG_ERROR */
73 
74 
75 #define ACQUIRE_LOCK(obj) do { \
76     if (!PyThread_acquire_lock((obj)->lock, 0)) { \
77         Py_BEGIN_ALLOW_THREADS \
78         PyThread_acquire_lock((obj)->lock, 1); \
79         Py_END_ALLOW_THREADS \
80     } } while (0)
81 #define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
82 
83 
84 typedef struct {
85     PyTypeObject *bz2_compressor_type;
86     PyTypeObject *bz2_decompressor_type;
87 } _bz2_state;
88 
89 static inline _bz2_state*
get_bz2_state(PyObject * module)90 get_bz2_state(PyObject *module)
91 {
92     void *state = PyModule_GetState(module);
93     assert(state != NULL);
94     return (_bz2_state *)state;
95 }
96 
97 typedef struct {
98     PyObject_HEAD
99     bz_stream bzs;
100     int flushed;
101     PyThread_type_lock lock;
102 } BZ2Compressor;
103 
104 typedef struct {
105     PyObject_HEAD
106     bz_stream bzs;
107     char eof;           /* T_BOOL expects a char */
108     PyObject *unused_data;
109     char needs_input;
110     char *input_buffer;
111     size_t input_buffer_size;
112 
113     /* bzs->avail_in is only 32 bit, so we store the true length
114        separately. Conversion and looping is encapsulated in
115        decompress_buf() */
116     size_t bzs_avail_in_real;
117     PyThread_type_lock lock;
118 } BZ2Decompressor;
119 
120 /* Helper functions. */
121 
122 static int
catch_bz2_error(int bzerror)123 catch_bz2_error(int bzerror)
124 {
125     switch(bzerror) {
126         case BZ_OK:
127         case BZ_RUN_OK:
128         case BZ_FLUSH_OK:
129         case BZ_FINISH_OK:
130         case BZ_STREAM_END:
131             return 0;
132 
133 #ifdef BZ_CONFIG_ERROR
134         case BZ_CONFIG_ERROR:
135             PyErr_SetString(PyExc_SystemError,
136                             "libbzip2 was not compiled correctly");
137             return 1;
138 #endif
139         case BZ_PARAM_ERROR:
140             PyErr_SetString(PyExc_ValueError,
141                             "Internal error - "
142                             "invalid parameters passed to libbzip2");
143             return 1;
144         case BZ_MEM_ERROR:
145             PyErr_NoMemory();
146             return 1;
147         case BZ_DATA_ERROR:
148         case BZ_DATA_ERROR_MAGIC:
149             PyErr_SetString(PyExc_OSError, "Invalid data stream");
150             return 1;
151         case BZ_IO_ERROR:
152             PyErr_SetString(PyExc_OSError, "Unknown I/O error");
153             return 1;
154         case BZ_UNEXPECTED_EOF:
155             PyErr_SetString(PyExc_EOFError,
156                             "Compressed file ended before the logical "
157                             "end-of-stream was detected");
158             return 1;
159         case BZ_SEQUENCE_ERROR:
160             PyErr_SetString(PyExc_RuntimeError,
161                             "Internal error - "
162                             "Invalid sequence of commands sent to libbzip2");
163             return 1;
164         default:
165             PyErr_Format(PyExc_OSError,
166                          "Unrecognized error from libbzip2: %d", bzerror);
167             return 1;
168     }
169 }
170 
171 
172 /* BZ2Compressor class. */
173 
174 static PyObject *
compress(BZ2Compressor * c,char * data,size_t len,int action)175 compress(BZ2Compressor *c, char *data, size_t len, int action)
176 {
177     PyObject *result;
178     _BlocksOutputBuffer buffer = {.list = NULL};
179 
180     if (OutputBuffer_InitAndGrow(&buffer, -1, &c->bzs.next_out, &c->bzs.avail_out) < 0) {
181         goto error;
182     }
183     c->bzs.next_in = data;
184     c->bzs.avail_in = 0;
185 
186     for (;;) {
187         int bzerror;
188 
189         /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
190            Do compression in chunks of no more than UINT_MAX bytes each. */
191         if (c->bzs.avail_in == 0 && len > 0) {
192             c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX);
193             len -= c->bzs.avail_in;
194         }
195 
196         /* In regular compression mode, stop when input data is exhausted. */
197         if (action == BZ_RUN && c->bzs.avail_in == 0)
198             break;
199 
200         if (c->bzs.avail_out == 0) {
201             if (OutputBuffer_Grow(&buffer, &c->bzs.next_out, &c->bzs.avail_out) < 0) {
202                 goto error;
203             }
204         }
205 
206         Py_BEGIN_ALLOW_THREADS
207         bzerror = BZ2_bzCompress(&c->bzs, action);
208         Py_END_ALLOW_THREADS
209 
210         if (catch_bz2_error(bzerror))
211             goto error;
212 
213         /* In flushing mode, stop when all buffered data has been flushed. */
214         if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
215             break;
216     }
217 
218     result = OutputBuffer_Finish(&buffer, c->bzs.avail_out);
219     if (result != NULL) {
220         return result;
221     }
222 
223 error:
224     OutputBuffer_OnError(&buffer);
225     return NULL;
226 }
227 
228 /*[clinic input]
229 module _bz2
230 class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type"
231 class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type"
232 [clinic start generated code]*/
233 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=dc7d7992a79f9cb7]*/
234 
235 #include "clinic/_bz2module.c.h"
236 
237 /*[clinic input]
238 _bz2.BZ2Compressor.compress
239 
240     data: Py_buffer
241     /
242 
243 Provide data to the compressor object.
244 
245 Returns a chunk of compressed data if possible, or b'' otherwise.
246 
247 When you have finished providing data to the compressor, call the
248 flush() method to finish the compression process.
249 [clinic start generated code]*/
250 
251 static PyObject *
_bz2_BZ2Compressor_compress_impl(BZ2Compressor * self,Py_buffer * data)252 _bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data)
253 /*[clinic end generated code: output=59365426e941fbcc input=85c963218070fc4c]*/
254 {
255     PyObject *result = NULL;
256 
257     ACQUIRE_LOCK(self);
258     if (self->flushed)
259         PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
260     else
261         result = compress(self, data->buf, data->len, BZ_RUN);
262     RELEASE_LOCK(self);
263     return result;
264 }
265 
266 /*[clinic input]
267 _bz2.BZ2Compressor.flush
268 
269 Finish the compression process.
270 
271 Returns the compressed data left in internal buffers.
272 
273 The compressor object may not be used after this method is called.
274 [clinic start generated code]*/
275 
276 static PyObject *
_bz2_BZ2Compressor_flush_impl(BZ2Compressor * self)277 _bz2_BZ2Compressor_flush_impl(BZ2Compressor *self)
278 /*[clinic end generated code: output=3ef03fc1b092a701 input=d64405d3c6f76691]*/
279 {
280     PyObject *result = NULL;
281 
282     ACQUIRE_LOCK(self);
283     if (self->flushed)
284         PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
285     else {
286         self->flushed = 1;
287         result = compress(self, NULL, 0, BZ_FINISH);
288     }
289     RELEASE_LOCK(self);
290     return result;
291 }
292 
293 static void*
BZ2_Malloc(void * ctx,int items,int size)294 BZ2_Malloc(void* ctx, int items, int size)
295 {
296     if (items < 0 || size < 0)
297         return NULL;
298     if (size != 0 && (size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size)
299         return NULL;
300     /* PyMem_Malloc() cannot be used: compress() and decompress()
301        release the GIL */
302     return PyMem_RawMalloc((size_t)items * (size_t)size);
303 }
304 
305 static void
BZ2_Free(void * ctx,void * ptr)306 BZ2_Free(void* ctx, void *ptr)
307 {
308     PyMem_RawFree(ptr);
309 }
310 
311 
312 /* Argument Clinic is not used since the Argument Clinic always want to
313    check the type which would be wrong here */
314 static int
_bz2_BZ2Compressor___init___impl(BZ2Compressor * self,int compresslevel)315 _bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
316 {
317     int bzerror;
318 
319     if (!(1 <= compresslevel && compresslevel <= 9)) {
320         PyErr_SetString(PyExc_ValueError,
321                         "compresslevel must be between 1 and 9");
322         return -1;
323     }
324 
325     self->lock = PyThread_allocate_lock();
326     if (self->lock == NULL) {
327         PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
328         return -1;
329     }
330 
331     self->bzs.opaque = NULL;
332     self->bzs.bzalloc = BZ2_Malloc;
333     self->bzs.bzfree = BZ2_Free;
334     bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
335     if (catch_bz2_error(bzerror))
336         goto error;
337 
338     return 0;
339 
340 error:
341     PyThread_free_lock(self->lock);
342     self->lock = NULL;
343     return -1;
344 }
345 
346 PyDoc_STRVAR(_bz2_BZ2Compressor___init____doc__,
347 "BZ2Compressor(compresslevel=9, /)\n"
348 "--\n"
349 "\n"
350 "Create a compressor object for compressing data incrementally.\n"
351 "\n"
352 "  compresslevel\n"
353 "    Compression level, as a number between 1 and 9.\n"
354 "\n"
355 "For one-shot compression, use the compress() function instead.");
356 
357 static int
_bz2_BZ2Compressor___init__(PyObject * self,PyObject * args,PyObject * kwargs)358 _bz2_BZ2Compressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
359 {
360     int return_value = -1;
361     int compresslevel = 9;
362 
363     if (!_PyArg_NoKeywords("BZ2Compressor", kwargs)) {
364         goto exit;
365     }
366     if (!_PyArg_CheckPositional("BZ2Compressor", PyTuple_GET_SIZE(args), 0, 1)) {
367         goto exit;
368     }
369     if (PyTuple_GET_SIZE(args) < 1) {
370         goto skip_optional;
371     }
372     compresslevel = _PyLong_AsInt(PyTuple_GET_ITEM(args, 0));
373     if (compresslevel == -1 && PyErr_Occurred()) {
374         goto exit;
375     }
376 skip_optional:
377     return_value = _bz2_BZ2Compressor___init___impl((BZ2Compressor *)self, compresslevel);
378 
379 exit:
380     return return_value;
381 }
382 
383 static void
BZ2Compressor_dealloc(BZ2Compressor * self)384 BZ2Compressor_dealloc(BZ2Compressor *self)
385 {
386     BZ2_bzCompressEnd(&self->bzs);
387     if (self->lock != NULL) {
388         PyThread_free_lock(self->lock);
389     }
390     PyTypeObject *tp = Py_TYPE(self);
391     tp->tp_free((PyObject *)self);
392     Py_DECREF(tp);
393 }
394 
395 static int
BZ2Compressor_traverse(BZ2Compressor * self,visitproc visit,void * arg)396 BZ2Compressor_traverse(BZ2Compressor *self, visitproc visit, void *arg)
397 {
398     Py_VISIT(Py_TYPE(self));
399     return 0;
400 }
401 
402 static PyMethodDef BZ2Compressor_methods[] = {
403     _BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF
404     _BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF
405     {NULL}
406 };
407 
408 static PyType_Slot bz2_compressor_type_slots[] = {
409     {Py_tp_dealloc, BZ2Compressor_dealloc},
410     {Py_tp_methods, BZ2Compressor_methods},
411     {Py_tp_init, _bz2_BZ2Compressor___init__},
412     {Py_tp_new, PyType_GenericNew},
413     {Py_tp_doc, (char *)_bz2_BZ2Compressor___init____doc__},
414     {Py_tp_traverse, BZ2Compressor_traverse},
415     {0, 0}
416 };
417 
418 static PyType_Spec bz2_compressor_type_spec = {
419     .name = "_bz2.BZ2Compressor",
420     .basicsize = sizeof(BZ2Compressor),
421     // Calling PyType_GetModuleState() on a subclass is not safe.
422     // bz2_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
423     // which prevents to create a subclass.
424     // So calling PyType_GetModuleState() in this file is always safe.
425     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
426     .slots = bz2_compressor_type_slots,
427 };
428 
429 /* BZ2Decompressor class. */
430 
431 /* Decompress data of length d->bzs_avail_in_real in d->bzs.next_in.  The output
432    buffer is allocated dynamically and returned.  At most max_length bytes are
433    returned, so some of the input may not be consumed. d->bzs.next_in and
434    d->bzs_avail_in_real are updated to reflect the consumed input. */
435 static PyObject*
decompress_buf(BZ2Decompressor * d,Py_ssize_t max_length)436 decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
437 {
438     /* data_size is strictly positive, but because we repeatedly have to
439        compare against max_length and PyBytes_GET_SIZE we declare it as
440        signed */
441     PyObject *result;
442     _BlocksOutputBuffer buffer = {.list = NULL};
443     bz_stream *bzs = &d->bzs;
444 
445     if (OutputBuffer_InitAndGrow(&buffer, max_length, &bzs->next_out, &bzs->avail_out) < 0) {
446         goto error;
447     }
448 
449     for (;;) {
450         int bzret;
451         /* On a 64-bit system, buffer length might not fit in avail_out, so we
452            do decompression in chunks of no more than UINT_MAX bytes
453            each. Note that the expression for `avail` is guaranteed to be
454            positive, so the cast is safe. */
455         bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
456         d->bzs_avail_in_real -= bzs->avail_in;
457 
458         Py_BEGIN_ALLOW_THREADS
459         bzret = BZ2_bzDecompress(bzs);
460         Py_END_ALLOW_THREADS
461 
462         d->bzs_avail_in_real += bzs->avail_in;
463 
464         if (catch_bz2_error(bzret))
465             goto error;
466         if (bzret == BZ_STREAM_END) {
467             d->eof = 1;
468             break;
469         } else if (d->bzs_avail_in_real == 0) {
470             break;
471         } else if (bzs->avail_out == 0) {
472             if (OutputBuffer_GetDataSize(&buffer, bzs->avail_out) == max_length) {
473                 break;
474             }
475             if (OutputBuffer_Grow(&buffer, &bzs->next_out, &bzs->avail_out) < 0) {
476                 goto error;
477             }
478         }
479     }
480 
481     result = OutputBuffer_Finish(&buffer, bzs->avail_out);
482     if (result != NULL) {
483         return result;
484     }
485 
486 error:
487     OutputBuffer_OnError(&buffer);
488     return NULL;
489 }
490 
491 
492 static PyObject *
decompress(BZ2Decompressor * d,char * data,size_t len,Py_ssize_t max_length)493 decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
494 {
495     char input_buffer_in_use;
496     PyObject *result;
497     bz_stream *bzs = &d->bzs;
498 
499     /* Prepend unconsumed input if necessary */
500     if (bzs->next_in != NULL) {
501         size_t avail_now, avail_total;
502 
503         /* Number of bytes we can append to input buffer */
504         avail_now = (d->input_buffer + d->input_buffer_size)
505             - (bzs->next_in + d->bzs_avail_in_real);
506 
507         /* Number of bytes we can append if we move existing
508            contents to beginning of buffer (overwriting
509            consumed input) */
510         avail_total = d->input_buffer_size - d->bzs_avail_in_real;
511 
512         if (avail_total < len) {
513             size_t offset = bzs->next_in - d->input_buffer;
514             char *tmp;
515             size_t new_size = d->input_buffer_size + len - avail_now;
516 
517             /* Assign to temporary variable first, so we don't
518                lose address of allocated buffer if realloc fails */
519             tmp = PyMem_Realloc(d->input_buffer, new_size);
520             if (tmp == NULL) {
521                 PyErr_SetNone(PyExc_MemoryError);
522                 return NULL;
523             }
524             d->input_buffer = tmp;
525             d->input_buffer_size = new_size;
526 
527             bzs->next_in = d->input_buffer + offset;
528         }
529         else if (avail_now < len) {
530             memmove(d->input_buffer, bzs->next_in,
531                     d->bzs_avail_in_real);
532             bzs->next_in = d->input_buffer;
533         }
534         memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len);
535         d->bzs_avail_in_real += len;
536         input_buffer_in_use = 1;
537     }
538     else {
539         bzs->next_in = data;
540         d->bzs_avail_in_real = len;
541         input_buffer_in_use = 0;
542     }
543 
544     result = decompress_buf(d, max_length);
545     if(result == NULL) {
546         bzs->next_in = NULL;
547         return NULL;
548     }
549 
550     if (d->eof) {
551         d->needs_input = 0;
552         if (d->bzs_avail_in_real > 0) {
553             Py_XSETREF(d->unused_data,
554                       PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real));
555             if (d->unused_data == NULL)
556                 goto error;
557         }
558     }
559     else if (d->bzs_avail_in_real == 0) {
560         bzs->next_in = NULL;
561         d->needs_input = 1;
562     }
563     else {
564         d->needs_input = 0;
565 
566         /* If we did not use the input buffer, we now have
567            to copy the tail from the caller's buffer into the
568            input buffer */
569         if (!input_buffer_in_use) {
570 
571             /* Discard buffer if it's too small
572                (resizing it may needlessly copy the current contents) */
573             if (d->input_buffer != NULL &&
574                 d->input_buffer_size < d->bzs_avail_in_real) {
575                 PyMem_Free(d->input_buffer);
576                 d->input_buffer = NULL;
577             }
578 
579             /* Allocate if necessary */
580             if (d->input_buffer == NULL) {
581                 d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real);
582                 if (d->input_buffer == NULL) {
583                     PyErr_SetNone(PyExc_MemoryError);
584                     goto error;
585                 }
586                 d->input_buffer_size = d->bzs_avail_in_real;
587             }
588 
589             /* Copy tail */
590             memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real);
591             bzs->next_in = d->input_buffer;
592         }
593     }
594 
595     return result;
596 
597 error:
598     Py_XDECREF(result);
599     return NULL;
600 }
601 
602 /*[clinic input]
603 _bz2.BZ2Decompressor.decompress
604 
605     data: Py_buffer
606     max_length: Py_ssize_t=-1
607 
608 Decompress *data*, returning uncompressed data as bytes.
609 
610 If *max_length* is nonnegative, returns at most *max_length* bytes of
611 decompressed data. If this limit is reached and further output can be
612 produced, *self.needs_input* will be set to ``False``. In this case, the next
613 call to *decompress()* may provide *data* as b'' to obtain more of the output.
614 
615 If all of the input data was decompressed and returned (either because this
616 was less than *max_length* bytes, or because *max_length* was negative),
617 *self.needs_input* will be set to True.
618 
619 Attempting to decompress data after the end of stream is reached raises an
620 EOFError.  Any data found after the end of the stream is ignored and saved in
621 the unused_data attribute.
622 [clinic start generated code]*/
623 
624 static PyObject *
_bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor * self,Py_buffer * data,Py_ssize_t max_length)625 _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
626                                      Py_ssize_t max_length)
627 /*[clinic end generated code: output=23e41045deb240a3 input=52e1ffc66a8ea624]*/
628 {
629     PyObject *result = NULL;
630 
631     ACQUIRE_LOCK(self);
632     if (self->eof)
633         PyErr_SetString(PyExc_EOFError, "End of stream already reached");
634     else
635         result = decompress(self, data->buf, data->len, max_length);
636     RELEASE_LOCK(self);
637     return result;
638 }
639 
640 /* Argument Clinic is not used since the Argument Clinic always want to
641    check the type which would be wrong here */
642 static int
_bz2_BZ2Decompressor___init___impl(BZ2Decompressor * self)643 _bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
644 {
645     int bzerror;
646 
647     PyThread_type_lock lock = PyThread_allocate_lock();
648     if (lock == NULL) {
649         PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
650         return -1;
651     }
652     if (self->lock != NULL) {
653         PyThread_free_lock(self->lock);
654     }
655     self->lock = lock;
656 
657     self->needs_input = 1;
658     self->bzs_avail_in_real = 0;
659     self->input_buffer = NULL;
660     self->input_buffer_size = 0;
661     Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
662     if (self->unused_data == NULL)
663         goto error;
664 
665     bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
666     if (catch_bz2_error(bzerror))
667         goto error;
668 
669     return 0;
670 
671 error:
672     Py_CLEAR(self->unused_data);
673     PyThread_free_lock(self->lock);
674     self->lock = NULL;
675     return -1;
676 }
677 
678 static int
_bz2_BZ2Decompressor___init__(PyObject * self,PyObject * args,PyObject * kwargs)679 _bz2_BZ2Decompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
680 {
681     int return_value = -1;
682 
683     if (!_PyArg_NoPositional("BZ2Decompressor", args)) {
684         goto exit;
685     }
686     if (!_PyArg_NoKeywords("BZ2Decompressor", kwargs)) {
687         goto exit;
688     }
689     return_value = _bz2_BZ2Decompressor___init___impl((BZ2Decompressor *)self);
690 
691 exit:
692     return return_value;
693 }
694 
695 PyDoc_STRVAR(_bz2_BZ2Decompressor___init____doc__,
696 "BZ2Decompressor()\n"
697 "--\n"
698 "\n"
699 "Create a decompressor object for decompressing data incrementally.\n"
700 "\n"
701 "For one-shot decompression, use the decompress() function instead.");
702 
703 static void
BZ2Decompressor_dealloc(BZ2Decompressor * self)704 BZ2Decompressor_dealloc(BZ2Decompressor *self)
705 {
706     if(self->input_buffer != NULL) {
707         PyMem_Free(self->input_buffer);
708     }
709     BZ2_bzDecompressEnd(&self->bzs);
710     Py_CLEAR(self->unused_data);
711     if (self->lock != NULL) {
712         PyThread_free_lock(self->lock);
713     }
714 
715     PyTypeObject *tp = Py_TYPE(self);
716     tp->tp_free((PyObject *)self);
717     Py_DECREF(tp);
718 }
719 
720 static int
BZ2Decompressor_traverse(BZ2Decompressor * self,visitproc visit,void * arg)721 BZ2Decompressor_traverse(BZ2Decompressor *self, visitproc visit, void *arg)
722 {
723     Py_VISIT(Py_TYPE(self));
724     return 0;
725 }
726 
727 static PyMethodDef BZ2Decompressor_methods[] = {
728     _BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF
729     {NULL}
730 };
731 
732 PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
733 "True if the end-of-stream marker has been reached.");
734 
735 PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
736 "Data found after the end of the compressed stream.");
737 
738 PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
739 "True if more input is needed before more decompressed data can be produced.");
740 
741 static PyMemberDef BZ2Decompressor_members[] = {
742     {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
743      READONLY, BZ2Decompressor_eof__doc__},
744     {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
745      READONLY, BZ2Decompressor_unused_data__doc__},
746     {"needs_input", T_BOOL, offsetof(BZ2Decompressor, needs_input), READONLY,
747      BZ2Decompressor_needs_input_doc},
748     {NULL}
749 };
750 
751 static PyType_Slot bz2_decompressor_type_slots[] = {
752     {Py_tp_dealloc, BZ2Decompressor_dealloc},
753     {Py_tp_methods, BZ2Decompressor_methods},
754     {Py_tp_init, _bz2_BZ2Decompressor___init__},
755     {Py_tp_doc, (char *)_bz2_BZ2Decompressor___init____doc__},
756     {Py_tp_members, BZ2Decompressor_members},
757     {Py_tp_new, PyType_GenericNew},
758     {Py_tp_traverse, BZ2Decompressor_traverse},
759     {0, 0}
760 };
761 
762 static PyType_Spec bz2_decompressor_type_spec = {
763     .name = "_bz2.BZ2Decompressor",
764     .basicsize = sizeof(BZ2Decompressor),
765     // Calling PyType_GetModuleState() on a subclass is not safe.
766     // bz2_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
767     // which prevents to create a subclass.
768     // So calling PyType_GetModuleState() in this file is always safe.
769     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
770     .slots = bz2_decompressor_type_slots,
771 };
772 
773 /* Module initialization. */
774 
775 static int
_bz2_exec(PyObject * module)776 _bz2_exec(PyObject *module)
777 {
778     _bz2_state *state = get_bz2_state(module);
779     state->bz2_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
780                                                             &bz2_compressor_type_spec, NULL);
781     if (state->bz2_compressor_type == NULL) {
782         return -1;
783     }
784 
785     if (PyModule_AddType(module, state->bz2_compressor_type) < 0) {
786         return -1;
787     }
788 
789     state->bz2_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
790                                                          &bz2_decompressor_type_spec, NULL);
791     if (state->bz2_decompressor_type == NULL) {
792         return -1;
793     }
794 
795     if (PyModule_AddType(module, state->bz2_decompressor_type) < 0) {
796         return -1;
797     }
798 
799     return 0;
800 }
801 
802 static int
_bz2_traverse(PyObject * module,visitproc visit,void * arg)803 _bz2_traverse(PyObject *module, visitproc visit, void *arg)
804 {
805     _bz2_state *state = get_bz2_state(module);
806     Py_VISIT(state->bz2_compressor_type);
807     Py_VISIT(state->bz2_decompressor_type);
808     return 0;
809 }
810 
811 static int
_bz2_clear(PyObject * module)812 _bz2_clear(PyObject *module)
813 {
814     _bz2_state *state = get_bz2_state(module);
815     Py_CLEAR(state->bz2_compressor_type);
816     Py_CLEAR(state->bz2_decompressor_type);
817     return 0;
818 }
819 
820 static void
_bz2_free(void * module)821 _bz2_free(void *module)
822 {
823     _bz2_clear((PyObject *)module);
824 }
825 
826 static struct PyModuleDef_Slot _bz2_slots[] = {
827     {Py_mod_exec, _bz2_exec},
828     {0, NULL}
829 };
830 
831 static struct PyModuleDef _bz2module = {
832     PyModuleDef_HEAD_INIT,
833     .m_name = "_bz2",
834     .m_size = sizeof(_bz2_state),
835     .m_slots = _bz2_slots,
836     .m_traverse = _bz2_traverse,
837     .m_clear = _bz2_clear,
838     .m_free = _bz2_free,
839 };
840 
841 PyMODINIT_FUNC
PyInit__bz2(void)842 PyInit__bz2(void)
843 {
844     return PyModuleDef_Init(&_bz2module);
845 }
846