• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* _bz2 - Low-level Python interface to libbzip2. */
2 
3 #ifndef Py_BUILD_CORE_BUILTIN
4 #  define Py_BUILD_CORE_MODULE 1
5 #endif
6 
7 #include "Python.h"
8 
9 #include <bzlib.h>
10 #include <stdio.h>
11 #include <stddef.h>               // offsetof()
12 
13 // Blocks output buffer wrappers
14 #include "pycore_blocks_output_buffer.h"
15 
16 #if OUTPUT_BUFFER_MAX_BLOCK_SIZE > UINT32_MAX
17     #error "The maximum block size accepted by libbzip2 is UINT32_MAX."
18 #endif
19 
20 typedef struct {
21     PyTypeObject *bz2_compressor_type;
22     PyTypeObject *bz2_decompressor_type;
23 } _bz2_state;
24 
25 static inline _bz2_state *
get_module_state(PyObject * module)26 get_module_state(PyObject *module)
27 {
28     void *state = PyModule_GetState(module);
29     assert(state != NULL);
30     return (_bz2_state *)state;
31 }
32 
33 static struct PyModuleDef _bz2module;
34 
35 static inline _bz2_state *
find_module_state_by_def(PyTypeObject * type)36 find_module_state_by_def(PyTypeObject *type)
37 {
38     PyObject *module = PyType_GetModuleByDef(type, &_bz2module);
39     assert(module != NULL);
40     return get_module_state(module);
41 }
42 
43 /* On success, return value >= 0
44    On failure, return -1 */
45 static inline Py_ssize_t
OutputBuffer_InitAndGrow(_BlocksOutputBuffer * buffer,Py_ssize_t max_length,char ** next_out,uint32_t * avail_out)46 OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
47                          char **next_out, uint32_t *avail_out)
48 {
49     Py_ssize_t allocated;
50 
51     allocated = _BlocksOutputBuffer_InitAndGrow(
52                     buffer, max_length, (void**) next_out);
53     *avail_out = (uint32_t) allocated;
54     return allocated;
55 }
56 
57 /* On success, return value >= 0
58    On failure, return -1 */
59 static inline Py_ssize_t
OutputBuffer_Grow(_BlocksOutputBuffer * buffer,char ** next_out,uint32_t * avail_out)60 OutputBuffer_Grow(_BlocksOutputBuffer *buffer,
61                   char **next_out, uint32_t *avail_out)
62 {
63     Py_ssize_t allocated;
64 
65     allocated = _BlocksOutputBuffer_Grow(
66                     buffer, (void**) next_out, (Py_ssize_t) *avail_out);
67     *avail_out = (uint32_t) allocated;
68     return allocated;
69 }
70 
71 static inline Py_ssize_t
OutputBuffer_GetDataSize(_BlocksOutputBuffer * buffer,uint32_t avail_out)72 OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, uint32_t avail_out)
73 {
74     return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
75 }
76 
77 static inline PyObject *
OutputBuffer_Finish(_BlocksOutputBuffer * buffer,uint32_t avail_out)78 OutputBuffer_Finish(_BlocksOutputBuffer *buffer, uint32_t avail_out)
79 {
80     return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
81 }
82 
83 static inline void
OutputBuffer_OnError(_BlocksOutputBuffer * buffer)84 OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
85 {
86     _BlocksOutputBuffer_OnError(buffer);
87 }
88 
89 
90 #ifndef BZ_CONFIG_ERROR
91 #define BZ2_bzCompress bzCompress
92 #define BZ2_bzCompressInit bzCompressInit
93 #define BZ2_bzCompressEnd bzCompressEnd
94 #define BZ2_bzDecompress bzDecompress
95 #define BZ2_bzDecompressInit bzDecompressInit
96 #define BZ2_bzDecompressEnd bzDecompressEnd
97 #endif  /* ! BZ_CONFIG_ERROR */
98 
99 
100 #define ACQUIRE_LOCK(obj) do { \
101     if (!PyThread_acquire_lock((obj)->lock, 0)) { \
102         Py_BEGIN_ALLOW_THREADS \
103         PyThread_acquire_lock((obj)->lock, 1); \
104         Py_END_ALLOW_THREADS \
105     } } while (0)
106 #define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
107 
108 
109 typedef struct {
110     PyObject_HEAD
111     bz_stream bzs;
112     int flushed;
113     PyThread_type_lock lock;
114 } BZ2Compressor;
115 
116 typedef struct {
117     PyObject_HEAD
118     bz_stream bzs;
119     char eof;           /* Py_T_BOOL expects a char */
120     PyObject *unused_data;
121     char needs_input;
122     char *input_buffer;
123     size_t input_buffer_size;
124 
125     /* bzs->avail_in is only 32 bit, so we store the true length
126        separately. Conversion and looping is encapsulated in
127        decompress_buf() */
128     size_t bzs_avail_in_real;
129     PyThread_type_lock lock;
130 } BZ2Decompressor;
131 
132 /* Helper functions. */
133 
134 static int
catch_bz2_error(int bzerror)135 catch_bz2_error(int bzerror)
136 {
137     switch(bzerror) {
138         case BZ_OK:
139         case BZ_RUN_OK:
140         case BZ_FLUSH_OK:
141         case BZ_FINISH_OK:
142         case BZ_STREAM_END:
143             return 0;
144 
145 #ifdef BZ_CONFIG_ERROR
146         case BZ_CONFIG_ERROR:
147             PyErr_SetString(PyExc_SystemError,
148                             "libbzip2 was not compiled correctly");
149             return 1;
150 #endif
151         case BZ_PARAM_ERROR:
152             PyErr_SetString(PyExc_ValueError,
153                             "Internal error - "
154                             "invalid parameters passed to libbzip2");
155             return 1;
156         case BZ_MEM_ERROR:
157             PyErr_NoMemory();
158             return 1;
159         case BZ_DATA_ERROR:
160         case BZ_DATA_ERROR_MAGIC:
161             PyErr_SetString(PyExc_OSError, "Invalid data stream");
162             return 1;
163         case BZ_IO_ERROR:
164             PyErr_SetString(PyExc_OSError, "Unknown I/O error");
165             return 1;
166         case BZ_UNEXPECTED_EOF:
167             PyErr_SetString(PyExc_EOFError,
168                             "Compressed file ended before the logical "
169                             "end-of-stream was detected");
170             return 1;
171         case BZ_SEQUENCE_ERROR:
172             PyErr_SetString(PyExc_RuntimeError,
173                             "Internal error - "
174                             "Invalid sequence of commands sent to libbzip2");
175             return 1;
176         default:
177             PyErr_Format(PyExc_OSError,
178                          "Unrecognized error from libbzip2: %d", bzerror);
179             return 1;
180     }
181 }
182 
183 
184 /* BZ2Compressor class. */
185 
186 static PyObject *
compress(BZ2Compressor * c,char * data,size_t len,int action)187 compress(BZ2Compressor *c, char *data, size_t len, int action)
188 {
189     PyObject *result;
190     _BlocksOutputBuffer buffer = {.list = NULL};
191 
192     if (OutputBuffer_InitAndGrow(&buffer, -1, &c->bzs.next_out, &c->bzs.avail_out) < 0) {
193         goto error;
194     }
195     c->bzs.next_in = data;
196     c->bzs.avail_in = 0;
197 
198     for (;;) {
199         int bzerror;
200 
201         /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
202            Do compression in chunks of no more than UINT_MAX bytes each. */
203         if (c->bzs.avail_in == 0 && len > 0) {
204             c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX);
205             len -= c->bzs.avail_in;
206         }
207 
208         /* In regular compression mode, stop when input data is exhausted. */
209         if (action == BZ_RUN && c->bzs.avail_in == 0)
210             break;
211 
212         if (c->bzs.avail_out == 0) {
213             if (OutputBuffer_Grow(&buffer, &c->bzs.next_out, &c->bzs.avail_out) < 0) {
214                 goto error;
215             }
216         }
217 
218         Py_BEGIN_ALLOW_THREADS
219         bzerror = BZ2_bzCompress(&c->bzs, action);
220         Py_END_ALLOW_THREADS
221 
222         if (catch_bz2_error(bzerror))
223             goto error;
224 
225         /* In flushing mode, stop when all buffered data has been flushed. */
226         if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
227             break;
228     }
229 
230     result = OutputBuffer_Finish(&buffer, c->bzs.avail_out);
231     if (result != NULL) {
232         return result;
233     }
234 
235 error:
236     OutputBuffer_OnError(&buffer);
237     return NULL;
238 }
239 
240 /*[clinic input]
241 module _bz2
242 class _bz2.BZ2Compressor "BZ2Compressor *" "clinic_state()->bz2_compressor_type"
243 class _bz2.BZ2Decompressor "BZ2Decompressor *" "clinic_state()->bz2_decompressor_type"
244 [clinic start generated code]*/
245 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=92348121632b94c4]*/
246 
247 #define clinic_state() (find_module_state_by_def(type))
248 #include "clinic/_bz2module.c.h"
249 #undef clinic_state
250 
251 /*[clinic input]
252 _bz2.BZ2Compressor.compress
253 
254     data: Py_buffer
255     /
256 
257 Provide data to the compressor object.
258 
259 Returns a chunk of compressed data if possible, or b'' otherwise.
260 
261 When you have finished providing data to the compressor, call the
262 flush() method to finish the compression process.
263 [clinic start generated code]*/
264 
265 static PyObject *
_bz2_BZ2Compressor_compress_impl(BZ2Compressor * self,Py_buffer * data)266 _bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data)
267 /*[clinic end generated code: output=59365426e941fbcc input=85c963218070fc4c]*/
268 {
269     PyObject *result = NULL;
270 
271     ACQUIRE_LOCK(self);
272     if (self->flushed)
273         PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
274     else
275         result = compress(self, data->buf, data->len, BZ_RUN);
276     RELEASE_LOCK(self);
277     return result;
278 }
279 
280 /*[clinic input]
281 _bz2.BZ2Compressor.flush
282 
283 Finish the compression process.
284 
285 Returns the compressed data left in internal buffers.
286 
287 The compressor object may not be used after this method is called.
288 [clinic start generated code]*/
289 
290 static PyObject *
_bz2_BZ2Compressor_flush_impl(BZ2Compressor * self)291 _bz2_BZ2Compressor_flush_impl(BZ2Compressor *self)
292 /*[clinic end generated code: output=3ef03fc1b092a701 input=d64405d3c6f76691]*/
293 {
294     PyObject *result = NULL;
295 
296     ACQUIRE_LOCK(self);
297     if (self->flushed)
298         PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
299     else {
300         self->flushed = 1;
301         result = compress(self, NULL, 0, BZ_FINISH);
302     }
303     RELEASE_LOCK(self);
304     return result;
305 }
306 
307 static void*
BZ2_Malloc(void * ctx,int items,int size)308 BZ2_Malloc(void* ctx, int items, int size)
309 {
310     if (items < 0 || size < 0)
311         return NULL;
312     if (size != 0 && (size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size)
313         return NULL;
314     /* PyMem_Malloc() cannot be used: compress() and decompress()
315        release the GIL */
316     return PyMem_RawMalloc((size_t)items * (size_t)size);
317 }
318 
319 static void
BZ2_Free(void * ctx,void * ptr)320 BZ2_Free(void* ctx, void *ptr)
321 {
322     PyMem_RawFree(ptr);
323 }
324 
325 /*[clinic input]
326 @classmethod
327 _bz2.BZ2Compressor.__new__
328 
329     compresslevel: int = 9
330         Compression level, as a number between 1 and 9.
331     /
332 
333 Create a compressor object for compressing data incrementally.
334 
335 For one-shot compression, use the compress() function instead.
336 [clinic start generated code]*/
337 
338 static PyObject *
_bz2_BZ2Compressor_impl(PyTypeObject * type,int compresslevel)339 _bz2_BZ2Compressor_impl(PyTypeObject *type, int compresslevel)
340 /*[clinic end generated code: output=83346c96beaacad7 input=d4500d2a52c8b263]*/
341 {
342     int bzerror;
343     BZ2Compressor *self;
344 
345     if (!(1 <= compresslevel && compresslevel <= 9)) {
346         PyErr_SetString(PyExc_ValueError,
347                         "compresslevel must be between 1 and 9");
348         return NULL;
349     }
350 
351     assert(type != NULL && type->tp_alloc != NULL);
352     self = (BZ2Compressor *)type->tp_alloc(type, 0);
353     if (self == NULL) {
354         return NULL;
355     }
356 
357     self->lock = PyThread_allocate_lock();
358     if (self->lock == NULL) {
359         Py_DECREF(self);
360         PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
361         return NULL;
362     }
363 
364     self->bzs.opaque = NULL;
365     self->bzs.bzalloc = BZ2_Malloc;
366     self->bzs.bzfree = BZ2_Free;
367     bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
368     if (catch_bz2_error(bzerror))
369         goto error;
370 
371     return (PyObject *)self;
372 
373 error:
374     Py_DECREF(self);
375     return NULL;
376 }
377 
378 static void
BZ2Compressor_dealloc(BZ2Compressor * self)379 BZ2Compressor_dealloc(BZ2Compressor *self)
380 {
381     BZ2_bzCompressEnd(&self->bzs);
382     if (self->lock != NULL) {
383         PyThread_free_lock(self->lock);
384     }
385     PyTypeObject *tp = Py_TYPE(self);
386     tp->tp_free((PyObject *)self);
387     Py_DECREF(tp);
388 }
389 
390 static int
BZ2Compressor_traverse(BZ2Compressor * self,visitproc visit,void * arg)391 BZ2Compressor_traverse(BZ2Compressor *self, visitproc visit, void *arg)
392 {
393     Py_VISIT(Py_TYPE(self));
394     return 0;
395 }
396 
397 static PyMethodDef BZ2Compressor_methods[] = {
398     _BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF
399     _BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF
400     {NULL}
401 };
402 
403 static PyType_Slot bz2_compressor_type_slots[] = {
404     {Py_tp_dealloc, BZ2Compressor_dealloc},
405     {Py_tp_methods, BZ2Compressor_methods},
406     {Py_tp_new, _bz2_BZ2Compressor},
407     {Py_tp_doc, (char *)_bz2_BZ2Compressor__doc__},
408     {Py_tp_traverse, BZ2Compressor_traverse},
409     {0, 0}
410 };
411 
412 static PyType_Spec bz2_compressor_type_spec = {
413     .name = "_bz2.BZ2Compressor",
414     .basicsize = sizeof(BZ2Compressor),
415     // Calling PyType_GetModuleState() on a subclass is not safe.
416     // bz2_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
417     // which prevents to create a subclass.
418     // So calling PyType_GetModuleState() in this file is always safe.
419     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
420     .slots = bz2_compressor_type_slots,
421 };
422 
423 /* BZ2Decompressor class. */
424 
425 /* Decompress data of length d->bzs_avail_in_real in d->bzs.next_in.  The output
426    buffer is allocated dynamically and returned.  At most max_length bytes are
427    returned, so some of the input may not be consumed. d->bzs.next_in and
428    d->bzs_avail_in_real are updated to reflect the consumed input. */
429 static PyObject*
decompress_buf(BZ2Decompressor * d,Py_ssize_t max_length)430 decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
431 {
432     /* data_size is strictly positive, but because we repeatedly have to
433        compare against max_length and PyBytes_GET_SIZE we declare it as
434        signed */
435     PyObject *result;
436     _BlocksOutputBuffer buffer = {.list = NULL};
437     bz_stream *bzs = &d->bzs;
438 
439     if (OutputBuffer_InitAndGrow(&buffer, max_length, &bzs->next_out, &bzs->avail_out) < 0) {
440         goto error;
441     }
442 
443     for (;;) {
444         int bzret;
445         /* On a 64-bit system, buffer length might not fit in avail_out, so we
446            do decompression in chunks of no more than UINT_MAX bytes
447            each. Note that the expression for `avail` is guaranteed to be
448            positive, so the cast is safe. */
449         bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
450         d->bzs_avail_in_real -= bzs->avail_in;
451 
452         Py_BEGIN_ALLOW_THREADS
453         bzret = BZ2_bzDecompress(bzs);
454         Py_END_ALLOW_THREADS
455 
456         d->bzs_avail_in_real += bzs->avail_in;
457 
458         if (catch_bz2_error(bzret))
459             goto error;
460         if (bzret == BZ_STREAM_END) {
461             d->eof = 1;
462             break;
463         } else if (d->bzs_avail_in_real == 0) {
464             break;
465         } else if (bzs->avail_out == 0) {
466             if (OutputBuffer_GetDataSize(&buffer, bzs->avail_out) == max_length) {
467                 break;
468             }
469             if (OutputBuffer_Grow(&buffer, &bzs->next_out, &bzs->avail_out) < 0) {
470                 goto error;
471             }
472         }
473     }
474 
475     result = OutputBuffer_Finish(&buffer, bzs->avail_out);
476     if (result != NULL) {
477         return result;
478     }
479 
480 error:
481     OutputBuffer_OnError(&buffer);
482     return NULL;
483 }
484 
485 
486 static PyObject *
decompress(BZ2Decompressor * d,char * data,size_t len,Py_ssize_t max_length)487 decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
488 {
489     char input_buffer_in_use;
490     PyObject *result;
491     bz_stream *bzs = &d->bzs;
492 
493     /* Prepend unconsumed input if necessary */
494     if (bzs->next_in != NULL) {
495         size_t avail_now, avail_total;
496 
497         /* Number of bytes we can append to input buffer */
498         avail_now = (d->input_buffer + d->input_buffer_size)
499             - (bzs->next_in + d->bzs_avail_in_real);
500 
501         /* Number of bytes we can append if we move existing
502            contents to beginning of buffer (overwriting
503            consumed input) */
504         avail_total = d->input_buffer_size - d->bzs_avail_in_real;
505 
506         if (avail_total < len) {
507             size_t offset = bzs->next_in - d->input_buffer;
508             char *tmp;
509             size_t new_size = d->input_buffer_size + len - avail_now;
510 
511             /* Assign to temporary variable first, so we don't
512                lose address of allocated buffer if realloc fails */
513             tmp = PyMem_Realloc(d->input_buffer, new_size);
514             if (tmp == NULL) {
515                 PyErr_SetNone(PyExc_MemoryError);
516                 return NULL;
517             }
518             d->input_buffer = tmp;
519             d->input_buffer_size = new_size;
520 
521             bzs->next_in = d->input_buffer + offset;
522         }
523         else if (avail_now < len) {
524             memmove(d->input_buffer, bzs->next_in,
525                     d->bzs_avail_in_real);
526             bzs->next_in = d->input_buffer;
527         }
528         memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len);
529         d->bzs_avail_in_real += len;
530         input_buffer_in_use = 1;
531     }
532     else {
533         bzs->next_in = data;
534         d->bzs_avail_in_real = len;
535         input_buffer_in_use = 0;
536     }
537 
538     result = decompress_buf(d, max_length);
539     if(result == NULL) {
540         bzs->next_in = NULL;
541         return NULL;
542     }
543 
544     if (d->eof) {
545         d->needs_input = 0;
546         if (d->bzs_avail_in_real > 0) {
547             Py_XSETREF(d->unused_data,
548                       PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real));
549             if (d->unused_data == NULL)
550                 goto error;
551         }
552     }
553     else if (d->bzs_avail_in_real == 0) {
554         bzs->next_in = NULL;
555         d->needs_input = 1;
556     }
557     else {
558         d->needs_input = 0;
559 
560         /* If we did not use the input buffer, we now have
561            to copy the tail from the caller's buffer into the
562            input buffer */
563         if (!input_buffer_in_use) {
564 
565             /* Discard buffer if it's too small
566                (resizing it may needlessly copy the current contents) */
567             if (d->input_buffer != NULL &&
568                 d->input_buffer_size < d->bzs_avail_in_real) {
569                 PyMem_Free(d->input_buffer);
570                 d->input_buffer = NULL;
571             }
572 
573             /* Allocate if necessary */
574             if (d->input_buffer == NULL) {
575                 d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real);
576                 if (d->input_buffer == NULL) {
577                     PyErr_SetNone(PyExc_MemoryError);
578                     goto error;
579                 }
580                 d->input_buffer_size = d->bzs_avail_in_real;
581             }
582 
583             /* Copy tail */
584             memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real);
585             bzs->next_in = d->input_buffer;
586         }
587     }
588 
589     return result;
590 
591 error:
592     Py_XDECREF(result);
593     return NULL;
594 }
595 
596 /*[clinic input]
597 _bz2.BZ2Decompressor.decompress
598 
599     data: Py_buffer
600     max_length: Py_ssize_t=-1
601 
602 Decompress *data*, returning uncompressed data as bytes.
603 
604 If *max_length* is nonnegative, returns at most *max_length* bytes of
605 decompressed data. If this limit is reached and further output can be
606 produced, *self.needs_input* will be set to ``False``. In this case, the next
607 call to *decompress()* may provide *data* as b'' to obtain more of the output.
608 
609 If all of the input data was decompressed and returned (either because this
610 was less than *max_length* bytes, or because *max_length* was negative),
611 *self.needs_input* will be set to True.
612 
613 Attempting to decompress data after the end of stream is reached raises an
614 EOFError.  Any data found after the end of the stream is ignored and saved in
615 the unused_data attribute.
616 [clinic start generated code]*/
617 
618 static PyObject *
_bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor * self,Py_buffer * data,Py_ssize_t max_length)619 _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
620                                      Py_ssize_t max_length)
621 /*[clinic end generated code: output=23e41045deb240a3 input=52e1ffc66a8ea624]*/
622 {
623     PyObject *result = NULL;
624 
625     ACQUIRE_LOCK(self);
626     if (self->eof)
627         PyErr_SetString(PyExc_EOFError, "End of stream already reached");
628     else
629         result = decompress(self, data->buf, data->len, max_length);
630     RELEASE_LOCK(self);
631     return result;
632 }
633 
634 /*[clinic input]
635 @classmethod
636 _bz2.BZ2Decompressor.__new__
637 
638 Create a decompressor object for decompressing data incrementally.
639 
640 For one-shot decompression, use the decompress() function instead.
641 [clinic start generated code]*/
642 
643 static PyObject *
_bz2_BZ2Decompressor_impl(PyTypeObject * type)644 _bz2_BZ2Decompressor_impl(PyTypeObject *type)
645 /*[clinic end generated code: output=5150d51ccaab220e input=b87413ce51853528]*/
646 {
647     BZ2Decompressor *self;
648     int bzerror;
649 
650     assert(type != NULL && type->tp_alloc != NULL);
651     self = (BZ2Decompressor *)type->tp_alloc(type, 0);
652     if (self == NULL) {
653         return NULL;
654     }
655 
656     self->lock = PyThread_allocate_lock();
657     if (self->lock == NULL) {
658         Py_DECREF(self);
659         PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
660         return NULL;
661     }
662 
663     self->needs_input = 1;
664     self->bzs_avail_in_real = 0;
665     self->input_buffer = NULL;
666     self->input_buffer_size = 0;
667     self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
668     if (self->unused_data == NULL)
669         goto error;
670 
671     bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
672     if (catch_bz2_error(bzerror))
673         goto error;
674 
675     return (PyObject *)self;
676 
677 error:
678     Py_DECREF(self);
679     return NULL;
680 }
681 
682 static void
BZ2Decompressor_dealloc(BZ2Decompressor * self)683 BZ2Decompressor_dealloc(BZ2Decompressor *self)
684 {
685     if(self->input_buffer != NULL) {
686         PyMem_Free(self->input_buffer);
687     }
688     BZ2_bzDecompressEnd(&self->bzs);
689     Py_CLEAR(self->unused_data);
690     if (self->lock != NULL) {
691         PyThread_free_lock(self->lock);
692     }
693 
694     PyTypeObject *tp = Py_TYPE(self);
695     tp->tp_free((PyObject *)self);
696     Py_DECREF(tp);
697 }
698 
699 static int
BZ2Decompressor_traverse(BZ2Decompressor * self,visitproc visit,void * arg)700 BZ2Decompressor_traverse(BZ2Decompressor *self, visitproc visit, void *arg)
701 {
702     Py_VISIT(Py_TYPE(self));
703     return 0;
704 }
705 
706 static PyMethodDef BZ2Decompressor_methods[] = {
707     _BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF
708     {NULL}
709 };
710 
711 PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
712 "True if the end-of-stream marker has been reached.");
713 
714 PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
715 "Data found after the end of the compressed stream.");
716 
717 PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
718 "True if more input is needed before more decompressed data can be produced.");
719 
720 static PyMemberDef BZ2Decompressor_members[] = {
721     {"eof", Py_T_BOOL, offsetof(BZ2Decompressor, eof),
722      Py_READONLY, BZ2Decompressor_eof__doc__},
723     {"unused_data", Py_T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
724      Py_READONLY, BZ2Decompressor_unused_data__doc__},
725     {"needs_input", Py_T_BOOL, offsetof(BZ2Decompressor, needs_input), Py_READONLY,
726      BZ2Decompressor_needs_input_doc},
727     {NULL}
728 };
729 
730 static PyType_Slot bz2_decompressor_type_slots[] = {
731     {Py_tp_dealloc, BZ2Decompressor_dealloc},
732     {Py_tp_methods, BZ2Decompressor_methods},
733     {Py_tp_doc, (char *)_bz2_BZ2Decompressor__doc__},
734     {Py_tp_members, BZ2Decompressor_members},
735     {Py_tp_new, _bz2_BZ2Decompressor},
736     {Py_tp_traverse, BZ2Decompressor_traverse},
737     {0, 0}
738 };
739 
740 static PyType_Spec bz2_decompressor_type_spec = {
741     .name = "_bz2.BZ2Decompressor",
742     .basicsize = sizeof(BZ2Decompressor),
743     // Calling PyType_GetModuleState() on a subclass is not safe.
744     // bz2_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
745     // which prevents to create a subclass.
746     // So calling PyType_GetModuleState() in this file is always safe.
747     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
748     .slots = bz2_decompressor_type_slots,
749 };
750 
751 /* Module initialization. */
752 
753 static int
_bz2_exec(PyObject * module)754 _bz2_exec(PyObject *module)
755 {
756     _bz2_state *state = get_module_state(module);
757     state->bz2_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
758                                                             &bz2_compressor_type_spec, NULL);
759     if (state->bz2_compressor_type == NULL) {
760         return -1;
761     }
762     if (PyModule_AddType(module, state->bz2_compressor_type) < 0) {
763         return -1;
764     }
765 
766     state->bz2_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
767                                                          &bz2_decompressor_type_spec, NULL);
768     if (state->bz2_decompressor_type == NULL) {
769         return -1;
770     }
771     if (PyModule_AddType(module, state->bz2_decompressor_type) < 0) {
772         return -1;
773     }
774 
775     return 0;
776 }
777 
778 static int
_bz2_traverse(PyObject * module,visitproc visit,void * arg)779 _bz2_traverse(PyObject *module, visitproc visit, void *arg)
780 {
781     _bz2_state *state = get_module_state(module);
782     Py_VISIT(state->bz2_compressor_type);
783     Py_VISIT(state->bz2_decompressor_type);
784     return 0;
785 }
786 
787 static int
_bz2_clear(PyObject * module)788 _bz2_clear(PyObject *module)
789 {
790     _bz2_state *state = get_module_state(module);
791     Py_CLEAR(state->bz2_compressor_type);
792     Py_CLEAR(state->bz2_decompressor_type);
793     return 0;
794 }
795 
796 static void
_bz2_free(void * module)797 _bz2_free(void *module)
798 {
799     (void)_bz2_clear((PyObject *)module);
800 }
801 
802 static struct PyModuleDef_Slot _bz2_slots[] = {
803     {Py_mod_exec, _bz2_exec},
804     {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
805     {Py_mod_gil, Py_MOD_GIL_NOT_USED},
806     {0, NULL}
807 };
808 
809 static struct PyModuleDef _bz2module = {
810     .m_base = PyModuleDef_HEAD_INIT,
811     .m_name = "_bz2",
812     .m_size = sizeof(_bz2_state),
813     .m_traverse = _bz2_traverse,
814     .m_clear = _bz2_clear,
815     .m_free = _bz2_free,
816     .m_slots = _bz2_slots,
817 };
818 
819 PyMODINIT_FUNC
PyInit__bz2(void)820 PyInit__bz2(void)
821 {
822     return PyModuleDef_Init(&_bz2module);
823 }
824