• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* _lzma - Low-level Python interface to liblzma.
2 
3    Initial implementation by Per Øyvind Karlsen.
4    Rewritten by Nadeem Vawda.
5 
6 */
7 
8 #ifndef Py_BUILD_CORE_BUILTIN
9 #  define Py_BUILD_CORE_MODULE 1
10 #endif
11 
12 #include "Python.h"
13 
14 
15 #include <stdlib.h>               // free()
16 #include <string.h>
17 
18 #include <lzma.h>
19 
20 // Blocks output buffer wrappers
21 #include "pycore_blocks_output_buffer.h"
22 
23 #if OUTPUT_BUFFER_MAX_BLOCK_SIZE > SIZE_MAX
24     #error "The maximum block size accepted by liblzma is SIZE_MAX."
25 #endif
26 
27 /* On success, return value >= 0
28    On failure, return -1 */
29 static inline Py_ssize_t
OutputBuffer_InitAndGrow(_BlocksOutputBuffer * buffer,Py_ssize_t max_length,uint8_t ** next_out,size_t * avail_out)30 OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
31                          uint8_t **next_out, size_t *avail_out)
32 {
33     Py_ssize_t allocated;
34 
35     allocated = _BlocksOutputBuffer_InitAndGrow(
36                     buffer, max_length, (void**) next_out);
37     *avail_out = (size_t) allocated;
38     return allocated;
39 }
40 
41 /* On success, return value >= 0
42    On failure, return -1 */
43 static inline Py_ssize_t
OutputBuffer_Grow(_BlocksOutputBuffer * buffer,uint8_t ** next_out,size_t * avail_out)44 OutputBuffer_Grow(_BlocksOutputBuffer *buffer,
45                   uint8_t **next_out, size_t *avail_out)
46 {
47     Py_ssize_t allocated;
48 
49     allocated = _BlocksOutputBuffer_Grow(
50                     buffer, (void**) next_out, (Py_ssize_t) *avail_out);
51     *avail_out = (size_t) allocated;
52     return allocated;
53 }
54 
55 static inline Py_ssize_t
OutputBuffer_GetDataSize(_BlocksOutputBuffer * buffer,size_t avail_out)56 OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, size_t avail_out)
57 {
58     return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
59 }
60 
61 static inline PyObject *
OutputBuffer_Finish(_BlocksOutputBuffer * buffer,size_t avail_out)62 OutputBuffer_Finish(_BlocksOutputBuffer *buffer, size_t avail_out)
63 {
64     return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
65 }
66 
67 static inline void
OutputBuffer_OnError(_BlocksOutputBuffer * buffer)68 OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
69 {
70     _BlocksOutputBuffer_OnError(buffer);
71 }
72 
73 
74 #define ACQUIRE_LOCK(obj) do { \
75     if (!PyThread_acquire_lock((obj)->lock, 0)) { \
76         Py_BEGIN_ALLOW_THREADS \
77         PyThread_acquire_lock((obj)->lock, 1); \
78         Py_END_ALLOW_THREADS \
79     } } while (0)
80 #define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
81 
82 typedef struct {
83     PyTypeObject *lzma_compressor_type;
84     PyTypeObject *lzma_decompressor_type;
85     PyObject *error;
86     PyObject *empty_tuple;
87 } _lzma_state;
88 
89 static inline _lzma_state*
get_lzma_state(PyObject * module)90 get_lzma_state(PyObject *module)
91 {
92     void *state = PyModule_GetState(module);
93     assert(state != NULL);
94     return (_lzma_state *)state;
95 }
96 
97 /* Container formats: */
98 enum {
99     FORMAT_AUTO,
100     FORMAT_XZ,
101     FORMAT_ALONE,
102     FORMAT_RAW,
103 };
104 
105 #define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
106 
107 
108 typedef struct {
109     PyObject_HEAD
110     lzma_allocator alloc;
111     lzma_stream lzs;
112     int flushed;
113     PyThread_type_lock lock;
114 } Compressor;
115 
116 typedef struct {
117     PyObject_HEAD
118     lzma_allocator alloc;
119     lzma_stream lzs;
120     int check;
121     char eof;
122     PyObject *unused_data;
123     char needs_input;
124     uint8_t *input_buffer;
125     size_t input_buffer_size;
126     PyThread_type_lock lock;
127 } Decompressor;
128 
129 /* Helper functions. */
130 
131 static int
catch_lzma_error(_lzma_state * state,lzma_ret lzret)132 catch_lzma_error(_lzma_state *state, lzma_ret lzret)
133 {
134     switch (lzret) {
135         case LZMA_OK:
136         case LZMA_GET_CHECK:
137         case LZMA_NO_CHECK:
138         case LZMA_STREAM_END:
139             return 0;
140         case LZMA_UNSUPPORTED_CHECK:
141             PyErr_SetString(state->error, "Unsupported integrity check");
142             return 1;
143         case LZMA_MEM_ERROR:
144             PyErr_NoMemory();
145             return 1;
146         case LZMA_MEMLIMIT_ERROR:
147             PyErr_SetString(state->error, "Memory usage limit exceeded");
148             return 1;
149         case LZMA_FORMAT_ERROR:
150             PyErr_SetString(state->error, "Input format not supported by decoder");
151             return 1;
152         case LZMA_OPTIONS_ERROR:
153             PyErr_SetString(state->error, "Invalid or unsupported options");
154             return 1;
155         case LZMA_DATA_ERROR:
156             PyErr_SetString(state->error, "Corrupt input data");
157             return 1;
158         case LZMA_BUF_ERROR:
159             PyErr_SetString(state->error, "Insufficient buffer space");
160             return 1;
161         case LZMA_PROG_ERROR:
162             PyErr_SetString(state->error, "Internal error");
163             return 1;
164         default:
165             PyErr_Format(state->error, "Unrecognized error from liblzma: %d", lzret);
166             return 1;
167     }
168 }
169 
170 static void*
PyLzma_Malloc(void * opaque,size_t items,size_t size)171 PyLzma_Malloc(void *opaque, size_t items, size_t size)
172 {
173     if (size != 0 && items > (size_t)PY_SSIZE_T_MAX / size) {
174         return NULL;
175     }
176     /* PyMem_Malloc() cannot be used:
177        the GIL is not held when lzma_code() is called */
178     return PyMem_RawMalloc(items * size);
179 }
180 
181 static void
PyLzma_Free(void * opaque,void * ptr)182 PyLzma_Free(void *opaque, void *ptr)
183 {
184     PyMem_RawFree(ptr);
185 }
186 
187 
188 /* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
189    since the predefined conversion specifiers do not suit our needs:
190 
191       uint32_t - the "I" (unsigned int) specifier is the right size, but
192       silently ignores overflows on conversion.
193 
194       lzma_vli - the "K" (unsigned long long) specifier is the right
195       size, but like "I" it silently ignores overflows on conversion.
196 
197       lzma_mode and lzma_match_finder - these are enumeration types, and
198       so the size of each is implementation-defined. Worse, different
199       enum types can be of different sizes within the same program, so
200       to be strictly correct, we need to define two separate converters.
201  */
202 
203 #define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
204     static int \
205     FUNCNAME(PyObject *obj, void *ptr) \
206     { \
207         unsigned long long val; \
208         \
209         val = PyLong_AsUnsignedLongLong(obj); \
210         if (PyErr_Occurred()) \
211             return 0; \
212         if ((unsigned long long)(TYPE)val != val) { \
213             PyErr_SetString(PyExc_OverflowError, \
214                             "Value too large for " #TYPE " type"); \
215             return 0; \
216         } \
217         *(TYPE *)ptr = (TYPE)val; \
218         return 1; \
219     }
220 
INT_TYPE_CONVERTER_FUNC(uint32_t,uint32_converter)221 INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
222 INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
223 INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
224 INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
225 
226 #undef INT_TYPE_CONVERTER_FUNC
227 
228 
229 /* Filter specifier parsing.
230 
231    This code handles converting filter specifiers (Python dicts) into
232    the C lzma_filter structs expected by liblzma. */
233 
234 static void *
235 parse_filter_spec_lzma(_lzma_state *state, PyObject *spec)
236 {
237     static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
238                                "pb", "mode", "nice_len", "mf", "depth", NULL};
239     PyObject *id;
240     PyObject *preset_obj;
241     uint32_t preset = LZMA_PRESET_DEFAULT;
242     lzma_options_lzma *options;
243 
244     /* First, fill in default values for all the options using a preset.
245        Then, override the defaults with any values given by the caller. */
246 
247     if (PyMapping_GetOptionalItemString(spec, "preset", &preset_obj) < 0) {
248         return NULL;
249     }
250     if (preset_obj != NULL) {
251         int ok = uint32_converter(preset_obj, &preset);
252         Py_DECREF(preset_obj);
253         if (!ok) {
254             return NULL;
255         }
256     }
257 
258     options = (lzma_options_lzma *)PyMem_Calloc(1, sizeof *options);
259     if (options == NULL) {
260         return PyErr_NoMemory();
261     }
262 
263     if (lzma_lzma_preset(options, preset)) {
264         PyMem_Free(options);
265         PyErr_Format(state->error, "Invalid compression preset: %u", preset);
266         return NULL;
267     }
268 
269     if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec,
270                                      "|OOO&O&O&O&O&O&O&O&", optnames,
271                                      &id, &preset_obj,
272                                      uint32_converter, &options->dict_size,
273                                      uint32_converter, &options->lc,
274                                      uint32_converter, &options->lp,
275                                      uint32_converter, &options->pb,
276                                      lzma_mode_converter, &options->mode,
277                                      uint32_converter, &options->nice_len,
278                                      lzma_mf_converter, &options->mf,
279                                      uint32_converter, &options->depth)) {
280         PyErr_SetString(PyExc_ValueError,
281                         "Invalid filter specifier for LZMA filter");
282         PyMem_Free(options);
283         return NULL;
284     }
285 
286     return options;
287 }
288 
289 static void *
parse_filter_spec_delta(_lzma_state * state,PyObject * spec)290 parse_filter_spec_delta(_lzma_state *state, PyObject *spec)
291 {
292     static char *optnames[] = {"id", "dist", NULL};
293     PyObject *id;
294     uint32_t dist = 1;
295     lzma_options_delta *options;
296 
297     if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
298                                      &id, uint32_converter, &dist)) {
299         PyErr_SetString(PyExc_ValueError,
300                         "Invalid filter specifier for delta filter");
301         return NULL;
302     }
303 
304     options = (lzma_options_delta *)PyMem_Calloc(1, sizeof *options);
305     if (options == NULL) {
306         return PyErr_NoMemory();
307     }
308     options->type = LZMA_DELTA_TYPE_BYTE;
309     options->dist = dist;
310     return options;
311 }
312 
313 static void *
parse_filter_spec_bcj(_lzma_state * state,PyObject * spec)314 parse_filter_spec_bcj(_lzma_state *state, PyObject *spec)
315 {
316     static char *optnames[] = {"id", "start_offset", NULL};
317     PyObject *id;
318     uint32_t start_offset = 0;
319     lzma_options_bcj *options;
320 
321     if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
322                                      &id, uint32_converter, &start_offset)) {
323         PyErr_SetString(PyExc_ValueError,
324                         "Invalid filter specifier for BCJ filter");
325         return NULL;
326     }
327 
328     options = (lzma_options_bcj *)PyMem_Calloc(1, sizeof *options);
329     if (options == NULL) {
330         return PyErr_NoMemory();
331     }
332     options->start_offset = start_offset;
333     return options;
334 }
335 
336 static int
lzma_filter_converter(_lzma_state * state,PyObject * spec,void * ptr)337 lzma_filter_converter(_lzma_state *state, PyObject *spec, void *ptr)
338 {
339     lzma_filter *f = (lzma_filter *)ptr;
340     PyObject *id_obj;
341 
342     if (!PyMapping_Check(spec)) {
343         PyErr_SetString(PyExc_TypeError,
344                         "Filter specifier must be a dict or dict-like object");
345         return 0;
346     }
347     if (PyMapping_GetOptionalItemString(spec, "id", &id_obj) < 0) {
348         return 0;
349     }
350     if (id_obj == NULL) {
351         PyErr_SetString(PyExc_ValueError,
352                         "Filter specifier must have an \"id\" entry");
353         return 0;
354     }
355     f->id = PyLong_AsUnsignedLongLong(id_obj);
356     Py_DECREF(id_obj);
357     if (PyErr_Occurred()) {
358         return 0;
359     }
360 
361     switch (f->id) {
362         case LZMA_FILTER_LZMA1:
363         case LZMA_FILTER_LZMA2:
364             f->options = parse_filter_spec_lzma(state, spec);
365             return f->options != NULL;
366         case LZMA_FILTER_DELTA:
367             f->options = parse_filter_spec_delta(state, spec);
368             return f->options != NULL;
369         case LZMA_FILTER_X86:
370         case LZMA_FILTER_POWERPC:
371         case LZMA_FILTER_IA64:
372         case LZMA_FILTER_ARM:
373         case LZMA_FILTER_ARMTHUMB:
374         case LZMA_FILTER_SPARC:
375             f->options = parse_filter_spec_bcj(state, spec);
376             return f->options != NULL;
377         default:
378             PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
379             return 0;
380     }
381 }
382 
383 static void
free_filter_chain(lzma_filter filters[])384 free_filter_chain(lzma_filter filters[])
385 {
386     for (int i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++) {
387         PyMem_Free(filters[i].options);
388     }
389 }
390 
391 static int
parse_filter_chain_spec(_lzma_state * state,lzma_filter filters[],PyObject * filterspecs)392 parse_filter_chain_spec(_lzma_state *state, lzma_filter filters[], PyObject *filterspecs)
393 {
394     Py_ssize_t i, num_filters;
395 
396     num_filters = PySequence_Length(filterspecs);
397     if (num_filters == -1) {
398         return -1;
399     }
400     if (num_filters > LZMA_FILTERS_MAX) {
401         PyErr_Format(PyExc_ValueError,
402                      "Too many filters - liblzma supports a maximum of %d",
403                      LZMA_FILTERS_MAX);
404         return -1;
405     }
406 
407     for (i = 0; i < num_filters; i++) {
408         int ok = 1;
409         PyObject *spec = PySequence_GetItem(filterspecs, i);
410         if (spec == NULL || !lzma_filter_converter(state, spec, &filters[i])) {
411             ok = 0;
412         }
413         Py_XDECREF(spec);
414         if (!ok) {
415             filters[i].id = LZMA_VLI_UNKNOWN;
416             free_filter_chain(filters);
417             return -1;
418         }
419     }
420     filters[num_filters].id = LZMA_VLI_UNKNOWN;
421     return 0;
422 }
423 
424 
425 /* Filter specifier construction.
426 
427    This code handles converting C lzma_filter structs into
428    Python-level filter specifiers (represented as dicts). */
429 
430 static int
spec_add_field(PyObject * spec,const char * key,unsigned long long value)431 spec_add_field(PyObject *spec, const char *key, unsigned long long value)
432 {
433     PyObject *value_object = PyLong_FromUnsignedLongLong(value);
434     if (value_object == NULL) {
435         return -1;
436     }
437     PyObject *key_object = PyUnicode_InternFromString(key);
438     if (key_object == NULL) {
439         Py_DECREF(value_object);
440         return -1;
441     }
442     int status = PyDict_SetItem(spec, key_object, value_object);
443     Py_DECREF(key_object);
444     Py_DECREF(value_object);
445     return status;
446 }
447 
448 static PyObject *
build_filter_spec(const lzma_filter * f)449 build_filter_spec(const lzma_filter *f)
450 {
451     PyObject *spec;
452 
453     spec = PyDict_New();
454     if (spec == NULL) {
455         return NULL;
456     }
457 
458 #define ADD_FIELD(SOURCE, FIELD) \
459     do { \
460         if (spec_add_field(spec, #FIELD, SOURCE->FIELD) == -1) \
461             goto error;\
462     } while (0)
463 
464     ADD_FIELD(f, id);
465 
466     switch (f->id) {
467         /* For LZMA1 filters, lzma_properties_{encode,decode}() only look at the
468            lc, lp, pb, and dict_size fields. For LZMA2 filters, only the
469            dict_size field is used. */
470         case LZMA_FILTER_LZMA1: {
471             lzma_options_lzma *options = f->options;
472             ADD_FIELD(options, lc);
473             ADD_FIELD(options, lp);
474             ADD_FIELD(options, pb);
475             ADD_FIELD(options, dict_size);
476             break;
477         }
478         case LZMA_FILTER_LZMA2: {
479             lzma_options_lzma *options = f->options;
480             ADD_FIELD(options, dict_size);
481             break;
482         }
483         case LZMA_FILTER_DELTA: {
484             lzma_options_delta *options = f->options;
485             ADD_FIELD(options, dist);
486             break;
487         }
488         case LZMA_FILTER_X86:
489         case LZMA_FILTER_POWERPC:
490         case LZMA_FILTER_IA64:
491         case LZMA_FILTER_ARM:
492         case LZMA_FILTER_ARMTHUMB:
493         case LZMA_FILTER_SPARC: {
494             lzma_options_bcj *options = f->options;
495             if (options) {
496                 ADD_FIELD(options, start_offset);
497             }
498             break;
499         }
500         default:
501             PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
502             goto error;
503     }
504 
505 #undef ADD_FIELD
506 
507     return spec;
508 
509 error:
510     Py_DECREF(spec);
511     return NULL;
512 }
513 
514 
515 /*[clinic input]
516 module _lzma
517 class _lzma.LZMACompressor "Compressor *" "&Compressor_type"
518 class _lzma.LZMADecompressor "Decompressor *" "&Decompressor_type"
519 [clinic start generated code]*/
520 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2c14bbe05ff0c147]*/
521 
522 #include "clinic/_lzmamodule.c.h"
523 
524 /*[python input]
525 
526 class lzma_vli_converter(CConverter):
527     type = 'lzma_vli'
528     converter = 'lzma_vli_converter'
529 
530 class lzma_filter_converter(CConverter):
531     type = 'lzma_filter'
532     converter = 'lzma_filter_converter'
533     c_default = c_ignored_default = "{LZMA_VLI_UNKNOWN, NULL}"
534 
535     def cleanup(self):
536         name = ensure_legal_c_identifier(self.name)
537         return ('if (%(name)s.id != LZMA_VLI_UNKNOWN)\n'
538                 '   PyMem_Free(%(name)s.options);\n') % {'name': name}
539 
540 [python start generated code]*/
541 /*[python end generated code: output=da39a3ee5e6b4b0d input=74fe7631ce377a94]*/
542 
543 
544 /* LZMACompressor class. */
545 
546 static PyObject *
compress(Compressor * c,uint8_t * data,size_t len,lzma_action action)547 compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
548 {
549     PyObject *result;
550     _BlocksOutputBuffer buffer = {.list = NULL};
551     _lzma_state *state = PyType_GetModuleState(Py_TYPE(c));
552     assert(state != NULL);
553 
554     if (OutputBuffer_InitAndGrow(&buffer, -1, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
555         goto error;
556     }
557     c->lzs.next_in = data;
558     c->lzs.avail_in = len;
559 
560     for (;;) {
561         lzma_ret lzret;
562 
563         Py_BEGIN_ALLOW_THREADS
564         lzret = lzma_code(&c->lzs, action);
565         Py_END_ALLOW_THREADS
566 
567         if (lzret == LZMA_BUF_ERROR && len == 0 && c->lzs.avail_out > 0) {
568             lzret = LZMA_OK; /* That wasn't a real error */
569         }
570         if (catch_lzma_error(state, lzret)) {
571             goto error;
572         }
573         if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
574             (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
575             break;
576         } else if (c->lzs.avail_out == 0) {
577             if (OutputBuffer_Grow(&buffer, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
578                 goto error;
579             }
580         }
581     }
582 
583     result = OutputBuffer_Finish(&buffer, c->lzs.avail_out);
584     if (result != NULL) {
585         return result;
586     }
587 
588 error:
589     OutputBuffer_OnError(&buffer);
590     return NULL;
591 }
592 
593 /*[clinic input]
594 _lzma.LZMACompressor.compress
595 
596     data: Py_buffer
597     /
598 
599 Provide data to the compressor object.
600 
601 Returns a chunk of compressed data if possible, or b'' otherwise.
602 
603 When you have finished providing data to the compressor, call the
604 flush() method to finish the compression process.
605 [clinic start generated code]*/
606 
607 static PyObject *
_lzma_LZMACompressor_compress_impl(Compressor * self,Py_buffer * data)608 _lzma_LZMACompressor_compress_impl(Compressor *self, Py_buffer *data)
609 /*[clinic end generated code: output=31f615136963e00f input=64019eac7f2cc8d0]*/
610 {
611     PyObject *result = NULL;
612 
613     ACQUIRE_LOCK(self);
614     if (self->flushed) {
615         PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
616     }
617     else {
618         result = compress(self, data->buf, data->len, LZMA_RUN);
619     }
620     RELEASE_LOCK(self);
621     return result;
622 }
623 
624 /*[clinic input]
625 _lzma.LZMACompressor.flush
626 
627 Finish the compression process.
628 
629 Returns the compressed data left in internal buffers.
630 
631 The compressor object may not be used after this method is called.
632 [clinic start generated code]*/
633 
634 static PyObject *
_lzma_LZMACompressor_flush_impl(Compressor * self)635 _lzma_LZMACompressor_flush_impl(Compressor *self)
636 /*[clinic end generated code: output=fec21f3e22504f50 input=6b369303f67ad0a8]*/
637 {
638     PyObject *result = NULL;
639 
640     ACQUIRE_LOCK(self);
641     if (self->flushed) {
642         PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
643     } else {
644         self->flushed = 1;
645         result = compress(self, NULL, 0, LZMA_FINISH);
646     }
647     RELEASE_LOCK(self);
648     return result;
649 }
650 
651 static int
Compressor_init_xz(_lzma_state * state,lzma_stream * lzs,int check,uint32_t preset,PyObject * filterspecs)652 Compressor_init_xz(_lzma_state *state, lzma_stream *lzs,
653                    int check, uint32_t preset, PyObject *filterspecs)
654 {
655     lzma_ret lzret;
656 
657     if (filterspecs == Py_None) {
658         lzret = lzma_easy_encoder(lzs, preset, check);
659     } else {
660         lzma_filter filters[LZMA_FILTERS_MAX + 1];
661 
662         if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
663             return -1;
664         lzret = lzma_stream_encoder(lzs, filters, check);
665         free_filter_chain(filters);
666     }
667     if (catch_lzma_error(state, lzret)) {
668         return -1;
669     }
670     else {
671         return 0;
672     }
673 }
674 
675 static int
Compressor_init_alone(_lzma_state * state,lzma_stream * lzs,uint32_t preset,PyObject * filterspecs)676 Compressor_init_alone(_lzma_state *state, lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
677 {
678     lzma_ret lzret;
679 
680     if (filterspecs == Py_None) {
681         lzma_options_lzma options;
682 
683         if (lzma_lzma_preset(&options, preset)) {
684             PyErr_Format(state->error, "Invalid compression preset: %u", preset);
685             return -1;
686         }
687         lzret = lzma_alone_encoder(lzs, &options);
688     } else {
689         lzma_filter filters[LZMA_FILTERS_MAX + 1];
690 
691         if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
692             return -1;
693         if (filters[0].id == LZMA_FILTER_LZMA1 &&
694             filters[1].id == LZMA_VLI_UNKNOWN) {
695             lzret = lzma_alone_encoder(lzs, filters[0].options);
696         } else {
697             PyErr_SetString(PyExc_ValueError,
698                             "Invalid filter chain for FORMAT_ALONE - "
699                             "must be a single LZMA1 filter");
700             lzret = LZMA_PROG_ERROR;
701         }
702         free_filter_chain(filters);
703     }
704     if (PyErr_Occurred() || catch_lzma_error(state, lzret)) {
705         return -1;
706     }
707     else {
708         return 0;
709     }
710 }
711 
712 static int
Compressor_init_raw(_lzma_state * state,lzma_stream * lzs,PyObject * filterspecs)713 Compressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
714 {
715     lzma_filter filters[LZMA_FILTERS_MAX + 1];
716     lzma_ret lzret;
717 
718     if (filterspecs == Py_None) {
719         PyErr_SetString(PyExc_ValueError,
720                         "Must specify filters for FORMAT_RAW");
721         return -1;
722     }
723     if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
724         return -1;
725     }
726     lzret = lzma_raw_encoder(lzs, filters);
727     free_filter_chain(filters);
728     if (catch_lzma_error(state, lzret)) {
729         return -1;
730     }
731     else {
732         return 0;
733     }
734 }
735 
736 /*[-clinic input]
737 @classmethod
738 _lzma.LZMACompressor.__new__
739 
740     format: int(c_default="FORMAT_XZ") = FORMAT_XZ
741         The container format to use for the output.  This can
742         be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.
743 
744     check: int(c_default="-1") = unspecified
745         The integrity check to use.  For FORMAT_XZ, the default
746         is CHECK_CRC64.  FORMAT_ALONE and FORMAT_RAW do not support integrity
747         checks; for these formats, check must be omitted, or be CHECK_NONE.
748 
749     preset: object = None
750         If provided should be an integer in the range 0-9, optionally
751         OR-ed with the constant PRESET_EXTREME.
752 
753     filters: object = None
754         If provided should be a sequence of dicts.  Each dict should
755         have an entry for "id" indicating the ID of the filter, plus
756         additional entries for options to the filter.
757 
758 Create a compressor object for compressing data incrementally.
759 
760 The settings used by the compressor can be specified either as a
761 preset compression level (with the 'preset' argument), or in detail
762 as a custom filter chain (with the 'filters' argument).  For FORMAT_XZ
763 and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
764 level.  For FORMAT_RAW, the caller must always specify a filter chain;
765 the raw compressor does not support preset compression levels.
766 
767 For one-shot compression, use the compress() function instead.
768 [-clinic start generated code]*/
769 static PyObject *
Compressor_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)770 Compressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
771 {
772     static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
773     int format = FORMAT_XZ;
774     int check = -1;
775     uint32_t preset = LZMA_PRESET_DEFAULT;
776     PyObject *preset_obj = Py_None;
777     PyObject *filterspecs = Py_None;
778     Compressor *self;
779 
780     _lzma_state *state = PyType_GetModuleState(type);
781     assert(state != NULL);
782     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
783                                      "|iiOO:LZMACompressor", arg_names,
784                                      &format, &check, &preset_obj,
785                                      &filterspecs)) {
786         return NULL;
787     }
788 
789     if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
790         PyErr_SetString(PyExc_ValueError,
791                         "Integrity checks are only supported by FORMAT_XZ");
792         return NULL;
793     }
794 
795     if (preset_obj != Py_None && filterspecs != Py_None) {
796         PyErr_SetString(PyExc_ValueError,
797                         "Cannot specify both preset and filter chain");
798         return NULL;
799     }
800 
801     if (preset_obj != Py_None && !uint32_converter(preset_obj, &preset)) {
802         return NULL;
803     }
804 
805     assert(type != NULL && type->tp_alloc != NULL);
806     self = (Compressor *)type->tp_alloc(type, 0);
807     if (self == NULL) {
808         return NULL;
809     }
810 
811     self->alloc.opaque = NULL;
812     self->alloc.alloc = PyLzma_Malloc;
813     self->alloc.free = PyLzma_Free;
814     self->lzs.allocator = &self->alloc;
815 
816     self->lock = PyThread_allocate_lock();
817     if (self->lock == NULL) {
818         Py_DECREF(self);
819         PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
820         return NULL;
821     }
822 
823     self->flushed = 0;
824     switch (format) {
825         case FORMAT_XZ:
826             if (check == -1) {
827                 check = LZMA_CHECK_CRC64;
828             }
829             if (Compressor_init_xz(state, &self->lzs, check, preset, filterspecs) != 0) {
830                 goto error;
831             }
832             break;
833 
834         case FORMAT_ALONE:
835             if (Compressor_init_alone(state, &self->lzs, preset, filterspecs) != 0) {
836                 goto error;
837             }
838             break;
839 
840         case FORMAT_RAW:
841             if (Compressor_init_raw(state, &self->lzs, filterspecs) != 0) {
842                 goto error;
843             }
844             break;
845 
846         default:
847             PyErr_Format(PyExc_ValueError,
848                          "Invalid container format: %d", format);
849             goto error;
850     }
851 
852     return (PyObject *)self;
853 
854 error:
855     Py_DECREF(self);
856     return NULL;
857 }
858 
859 static void
Compressor_dealloc(Compressor * self)860 Compressor_dealloc(Compressor *self)
861 {
862     lzma_end(&self->lzs);
863     if (self->lock != NULL) {
864         PyThread_free_lock(self->lock);
865     }
866     PyTypeObject *tp = Py_TYPE(self);
867     tp->tp_free((PyObject *)self);
868     Py_DECREF(tp);
869 }
870 
871 static PyMethodDef Compressor_methods[] = {
872     _LZMA_LZMACOMPRESSOR_COMPRESS_METHODDEF
873     _LZMA_LZMACOMPRESSOR_FLUSH_METHODDEF
874     {NULL}
875 };
876 
877 static int
Compressor_traverse(Compressor * self,visitproc visit,void * arg)878 Compressor_traverse(Compressor *self, visitproc visit, void *arg)
879 {
880     Py_VISIT(Py_TYPE(self));
881     return 0;
882 }
883 
884 PyDoc_STRVAR(Compressor_doc,
885 "LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
886 "\n"
887 "Create a compressor object for compressing data incrementally.\n"
888 "\n"
889 "format specifies the container format to use for the output. This can\n"
890 "be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
891 "\n"
892 "check specifies the integrity check to use. For FORMAT_XZ, the default\n"
893 "is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity\n"
894 "checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
895 "\n"
896 "The settings used by the compressor can be specified either as a\n"
897 "preset compression level (with the 'preset' argument), or in detail\n"
898 "as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
899 "and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
900 "level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
901 "the raw compressor does not support preset compression levels.\n"
902 "\n"
903 "preset (if provided) should be an integer in the range 0-9, optionally\n"
904 "OR-ed with the constant PRESET_EXTREME.\n"
905 "\n"
906 "filters (if provided) should be a sequence of dicts. Each dict should\n"
907 "have an entry for \"id\" indicating the ID of the filter, plus\n"
908 "additional entries for options to the filter.\n"
909 "\n"
910 "For one-shot compression, use the compress() function instead.\n");
911 
912 static PyType_Slot lzma_compressor_type_slots[] = {
913     {Py_tp_dealloc, Compressor_dealloc},
914     {Py_tp_methods, Compressor_methods},
915     {Py_tp_new, Compressor_new},
916     {Py_tp_doc, (char *)Compressor_doc},
917     {Py_tp_traverse, Compressor_traverse},
918     {0, 0}
919 };
920 
921 static PyType_Spec lzma_compressor_type_spec = {
922     .name = "_lzma.LZMACompressor",
923     .basicsize = sizeof(Compressor),
924     // Calling PyType_GetModuleState() on a subclass is not safe.
925     // lzma_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
926     // which prevents to create a subclass.
927     // So calling PyType_GetModuleState() in this file is always safe.
928     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
929     .slots = lzma_compressor_type_slots,
930 };
931 
932 /* LZMADecompressor class. */
933 
934 /* Decompress data of length d->lzs.avail_in in d->lzs.next_in.  The output
935    buffer is allocated dynamically and returned.  At most max_length bytes are
936    returned, so some of the input may not be consumed. d->lzs.next_in and
937    d->lzs.avail_in are updated to reflect the consumed input. */
938 static PyObject*
decompress_buf(Decompressor * d,Py_ssize_t max_length)939 decompress_buf(Decompressor *d, Py_ssize_t max_length)
940 {
941     PyObject *result;
942     lzma_stream *lzs = &d->lzs;
943     _BlocksOutputBuffer buffer = {.list = NULL};
944     _lzma_state *state = PyType_GetModuleState(Py_TYPE(d));
945     assert(state != NULL);
946 
947     if (OutputBuffer_InitAndGrow(&buffer, max_length, &lzs->next_out, &lzs->avail_out) < 0) {
948         goto error;
949     }
950 
951     for (;;) {
952         lzma_ret lzret;
953 
954         Py_BEGIN_ALLOW_THREADS
955         lzret = lzma_code(lzs, LZMA_RUN);
956         Py_END_ALLOW_THREADS
957 
958         if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0) {
959             lzret = LZMA_OK; /* That wasn't a real error */
960         }
961         if (catch_lzma_error(state, lzret)) {
962             goto error;
963         }
964         if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) {
965             d->check = lzma_get_check(&d->lzs);
966         }
967         if (lzret == LZMA_STREAM_END) {
968             d->eof = 1;
969             break;
970         } else if (lzs->avail_out == 0) {
971             /* Need to check lzs->avail_out before lzs->avail_in.
972                Maybe lzs's internal state still have a few bytes
973                can be output, grow the output buffer and continue
974                if max_lengh < 0. */
975             if (OutputBuffer_GetDataSize(&buffer, lzs->avail_out) == max_length) {
976                 break;
977             }
978             if (OutputBuffer_Grow(&buffer, &lzs->next_out, &lzs->avail_out) < 0) {
979                 goto error;
980             }
981         } else if (lzs->avail_in == 0) {
982             break;
983         }
984     }
985 
986     result = OutputBuffer_Finish(&buffer, lzs->avail_out);
987     if (result != NULL) {
988         return result;
989     }
990 
991 error:
992     OutputBuffer_OnError(&buffer);
993     return NULL;
994 }
995 
996 static PyObject *
decompress(Decompressor * d,uint8_t * data,size_t len,Py_ssize_t max_length)997 decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
998 {
999     char input_buffer_in_use;
1000     PyObject *result;
1001     lzma_stream *lzs = &d->lzs;
1002 
1003     /* Prepend unconsumed input if necessary */
1004     if (lzs->next_in != NULL) {
1005         size_t avail_now, avail_total;
1006 
1007         /* Number of bytes we can append to input buffer */
1008         avail_now = (d->input_buffer + d->input_buffer_size)
1009             - (lzs->next_in + lzs->avail_in);
1010 
1011         /* Number of bytes we can append if we move existing
1012            contents to beginning of buffer (overwriting
1013            consumed input) */
1014         avail_total = d->input_buffer_size - lzs->avail_in;
1015 
1016         if (avail_total < len) {
1017             size_t offset = lzs->next_in - d->input_buffer;
1018             uint8_t *tmp;
1019             size_t new_size = d->input_buffer_size + len - avail_now;
1020 
1021             /* Assign to temporary variable first, so we don't
1022                lose address of allocated buffer if realloc fails */
1023             tmp = PyMem_Realloc(d->input_buffer, new_size);
1024             if (tmp == NULL) {
1025                 PyErr_SetNone(PyExc_MemoryError);
1026                 return NULL;
1027             }
1028             d->input_buffer = tmp;
1029             d->input_buffer_size = new_size;
1030 
1031             lzs->next_in = d->input_buffer + offset;
1032         }
1033         else if (avail_now < len) {
1034             memmove(d->input_buffer, lzs->next_in,
1035                     lzs->avail_in);
1036             lzs->next_in = d->input_buffer;
1037         }
1038         memcpy((void*)(lzs->next_in + lzs->avail_in), data, len);
1039         lzs->avail_in += len;
1040         input_buffer_in_use = 1;
1041     }
1042     else {
1043         lzs->next_in = data;
1044         lzs->avail_in = len;
1045         input_buffer_in_use = 0;
1046     }
1047 
1048     result = decompress_buf(d, max_length);
1049     if (result == NULL) {
1050         lzs->next_in = NULL;
1051         return NULL;
1052     }
1053 
1054     if (d->eof) {
1055         d->needs_input = 0;
1056         if (lzs->avail_in > 0) {
1057             Py_XSETREF(d->unused_data,
1058                       PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
1059             if (d->unused_data == NULL) {
1060                 goto error;
1061             }
1062         }
1063     }
1064     else if (lzs->avail_in == 0) {
1065         lzs->next_in = NULL;
1066 
1067         if (lzs->avail_out == 0) {
1068             /* (avail_in==0 && avail_out==0)
1069                Maybe lzs's internal state still have a few bytes can
1070                be output, try to output them next time. */
1071             d->needs_input = 0;
1072 
1073             /* If max_length < 0, lzs->avail_out always > 0 */
1074             assert(max_length >= 0);
1075         } else {
1076             /* Input buffer exhausted, output buffer has space. */
1077             d->needs_input = 1;
1078         }
1079     }
1080     else {
1081         d->needs_input = 0;
1082 
1083         /* If we did not use the input buffer, we now have
1084            to copy the tail from the caller's buffer into the
1085            input buffer */
1086         if (!input_buffer_in_use) {
1087 
1088             /* Discard buffer if it's too small
1089                (resizing it may needlessly copy the current contents) */
1090             if (d->input_buffer != NULL &&
1091                 d->input_buffer_size < lzs->avail_in) {
1092                 PyMem_Free(d->input_buffer);
1093                 d->input_buffer = NULL;
1094             }
1095 
1096             /* Allocate if necessary */
1097             if (d->input_buffer == NULL) {
1098                 d->input_buffer = PyMem_Malloc(lzs->avail_in);
1099                 if (d->input_buffer == NULL) {
1100                     PyErr_SetNone(PyExc_MemoryError);
1101                     goto error;
1102                 }
1103                 d->input_buffer_size = lzs->avail_in;
1104             }
1105 
1106             /* Copy tail */
1107             memcpy(d->input_buffer, lzs->next_in, lzs->avail_in);
1108             lzs->next_in = d->input_buffer;
1109         }
1110     }
1111 
1112     return result;
1113 
1114 error:
1115     Py_XDECREF(result);
1116     return NULL;
1117 }
1118 
1119 /*[clinic input]
1120 _lzma.LZMADecompressor.decompress
1121 
1122     data: Py_buffer
1123     max_length: Py_ssize_t=-1
1124 
1125 Decompress *data*, returning uncompressed data as bytes.
1126 
1127 If *max_length* is nonnegative, returns at most *max_length* bytes of
1128 decompressed data. If this limit is reached and further output can be
1129 produced, *self.needs_input* will be set to ``False``. In this case, the next
1130 call to *decompress()* may provide *data* as b'' to obtain more of the output.
1131 
1132 If all of the input data was decompressed and returned (either because this
1133 was less than *max_length* bytes, or because *max_length* was negative),
1134 *self.needs_input* will be set to True.
1135 
1136 Attempting to decompress data after the end of stream is reached raises an
1137 EOFError.  Any data found after the end of the stream is ignored and saved in
1138 the unused_data attribute.
1139 [clinic start generated code]*/
1140 
1141 static PyObject *
_lzma_LZMADecompressor_decompress_impl(Decompressor * self,Py_buffer * data,Py_ssize_t max_length)1142 _lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data,
1143                                        Py_ssize_t max_length)
1144 /*[clinic end generated code: output=ef4e20ec7122241d input=60c1f135820e309d]*/
1145 {
1146     PyObject *result = NULL;
1147 
1148     ACQUIRE_LOCK(self);
1149     if (self->eof)
1150         PyErr_SetString(PyExc_EOFError, "Already at end of stream");
1151     else
1152         result = decompress(self, data->buf, data->len, max_length);
1153     RELEASE_LOCK(self);
1154     return result;
1155 }
1156 
1157 static int
Decompressor_init_raw(_lzma_state * state,lzma_stream * lzs,PyObject * filterspecs)1158 Decompressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
1159 {
1160     lzma_filter filters[LZMA_FILTERS_MAX + 1];
1161     lzma_ret lzret;
1162 
1163     if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
1164         return -1;
1165     }
1166     lzret = lzma_raw_decoder(lzs, filters);
1167     free_filter_chain(filters);
1168     if (catch_lzma_error(state, lzret)) {
1169         return -1;
1170     }
1171     else {
1172         return 0;
1173     }
1174 }
1175 
1176 /*[clinic input]
1177 @classmethod
1178 _lzma.LZMADecompressor.__new__
1179 
1180     format: int(c_default="FORMAT_AUTO") = FORMAT_AUTO
1181         Specifies the container format of the input stream.  If this is
1182         FORMAT_AUTO (the default), the decompressor will automatically detect
1183         whether the input is FORMAT_XZ or FORMAT_ALONE.  Streams created with
1184         FORMAT_RAW cannot be autodetected.
1185 
1186     memlimit: object = None
1187         Limit the amount of memory used by the decompressor.  This will cause
1188         decompression to fail if the input cannot be decompressed within the
1189         given limit.
1190 
1191     filters: object = None
1192         A custom filter chain.  This argument is required for FORMAT_RAW, and
1193         not accepted with any other format.  When provided, this should be a
1194         sequence of dicts, each indicating the ID and options for a single
1195         filter.
1196 
1197 Create a decompressor object for decompressing data incrementally.
1198 
1199 For one-shot decompression, use the decompress() function instead.
1200 [clinic start generated code]*/
1201 
1202 static PyObject *
_lzma_LZMADecompressor_impl(PyTypeObject * type,int format,PyObject * memlimit,PyObject * filters)1203 _lzma_LZMADecompressor_impl(PyTypeObject *type, int format,
1204                             PyObject *memlimit, PyObject *filters)
1205 /*[clinic end generated code: output=2d46d5e70f10bc7f input=ca40cd1cb1202b0d]*/
1206 {
1207     Decompressor *self;
1208     const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
1209     uint64_t memlimit_ = UINT64_MAX;
1210     lzma_ret lzret;
1211     _lzma_state *state = PyType_GetModuleState(type);
1212     assert(state != NULL);
1213 
1214     if (memlimit != Py_None) {
1215         if (format == FORMAT_RAW) {
1216             PyErr_SetString(PyExc_ValueError,
1217                             "Cannot specify memory limit with FORMAT_RAW");
1218             return NULL;
1219         }
1220         memlimit_ = PyLong_AsUnsignedLongLong(memlimit);
1221         if (PyErr_Occurred()) {
1222             return NULL;
1223         }
1224     }
1225 
1226     if (format == FORMAT_RAW && filters == Py_None) {
1227         PyErr_SetString(PyExc_ValueError,
1228                         "Must specify filters for FORMAT_RAW");
1229         return NULL;
1230     } else if (format != FORMAT_RAW && filters != Py_None) {
1231         PyErr_SetString(PyExc_ValueError,
1232                         "Cannot specify filters except with FORMAT_RAW");
1233         return NULL;
1234     }
1235 
1236     assert(type != NULL && type->tp_alloc != NULL);
1237     self = (Decompressor *)type->tp_alloc(type, 0);
1238     if (self == NULL) {
1239         return NULL;
1240     }
1241     self->alloc.opaque = NULL;
1242     self->alloc.alloc = PyLzma_Malloc;
1243     self->alloc.free = PyLzma_Free;
1244     self->lzs.allocator = &self->alloc;
1245     self->lzs.next_in = NULL;
1246 
1247     self->lock = PyThread_allocate_lock();
1248     if (self->lock == NULL) {
1249         Py_DECREF(self);
1250         PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
1251         return NULL;
1252     }
1253 
1254     self->check = LZMA_CHECK_UNKNOWN;
1255     self->needs_input = 1;
1256     self->input_buffer = NULL;
1257     self->input_buffer_size = 0;
1258     Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
1259     if (self->unused_data == NULL) {
1260         goto error;
1261     }
1262 
1263     switch (format) {
1264         case FORMAT_AUTO:
1265             lzret = lzma_auto_decoder(&self->lzs, memlimit_, decoder_flags);
1266             if (catch_lzma_error(state, lzret)) {
1267                 goto error;
1268             }
1269             break;
1270 
1271         case FORMAT_XZ:
1272             lzret = lzma_stream_decoder(&self->lzs, memlimit_, decoder_flags);
1273             if (catch_lzma_error(state, lzret)) {
1274                 goto error;
1275             }
1276             break;
1277 
1278         case FORMAT_ALONE:
1279             self->check = LZMA_CHECK_NONE;
1280             lzret = lzma_alone_decoder(&self->lzs, memlimit_);
1281             if (catch_lzma_error(state, lzret)) {
1282                 goto error;
1283             }
1284             break;
1285 
1286         case FORMAT_RAW:
1287             self->check = LZMA_CHECK_NONE;
1288             if (Decompressor_init_raw(state, &self->lzs, filters) == -1) {
1289                 goto error;
1290             }
1291             break;
1292 
1293         default:
1294             PyErr_Format(PyExc_ValueError,
1295                          "Invalid container format: %d", format);
1296             goto error;
1297     }
1298 
1299     return (PyObject *)self;
1300 
1301 error:
1302     Py_DECREF(self);
1303     return NULL;
1304 }
1305 
1306 static void
Decompressor_dealloc(Decompressor * self)1307 Decompressor_dealloc(Decompressor *self)
1308 {
1309     if(self->input_buffer != NULL)
1310         PyMem_Free(self->input_buffer);
1311 
1312     lzma_end(&self->lzs);
1313     Py_CLEAR(self->unused_data);
1314     if (self->lock != NULL) {
1315         PyThread_free_lock(self->lock);
1316     }
1317     PyTypeObject *tp = Py_TYPE(self);
1318     tp->tp_free((PyObject *)self);
1319     Py_DECREF(tp);
1320 }
1321 
1322 static int
Decompressor_traverse(Decompressor * self,visitproc visit,void * arg)1323 Decompressor_traverse(Decompressor *self, visitproc visit, void *arg)
1324 {
1325     Py_VISIT(Py_TYPE(self));
1326     return 0;
1327 }
1328 
1329 static PyMethodDef Decompressor_methods[] = {
1330     _LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF
1331     {NULL}
1332 };
1333 
1334 PyDoc_STRVAR(Decompressor_check_doc,
1335 "ID of the integrity check used by the input stream.");
1336 
1337 PyDoc_STRVAR(Decompressor_eof_doc,
1338 "True if the end-of-stream marker has been reached.");
1339 
1340 PyDoc_STRVAR(Decompressor_needs_input_doc,
1341 "True if more input is needed before more decompressed data can be produced.");
1342 
1343 PyDoc_STRVAR(Decompressor_unused_data_doc,
1344 "Data found after the end of the compressed stream.");
1345 
1346 static PyMemberDef Decompressor_members[] = {
1347     {"check", Py_T_INT, offsetof(Decompressor, check), Py_READONLY,
1348      Decompressor_check_doc},
1349     {"eof", Py_T_BOOL, offsetof(Decompressor, eof), Py_READONLY,
1350      Decompressor_eof_doc},
1351     {"needs_input", Py_T_BOOL, offsetof(Decompressor, needs_input), Py_READONLY,
1352      Decompressor_needs_input_doc},
1353     {"unused_data", Py_T_OBJECT_EX, offsetof(Decompressor, unused_data), Py_READONLY,
1354      Decompressor_unused_data_doc},
1355     {NULL}
1356 };
1357 
1358 static PyType_Slot lzma_decompressor_type_slots[] = {
1359     {Py_tp_dealloc, Decompressor_dealloc},
1360     {Py_tp_methods, Decompressor_methods},
1361     {Py_tp_new, _lzma_LZMADecompressor},
1362     {Py_tp_doc, (char *)_lzma_LZMADecompressor__doc__},
1363     {Py_tp_traverse, Decompressor_traverse},
1364     {Py_tp_members, Decompressor_members},
1365     {0, 0}
1366 };
1367 
1368 static PyType_Spec lzma_decompressor_type_spec = {
1369     .name = "_lzma.LZMADecompressor",
1370     .basicsize = sizeof(Decompressor),
1371     // Calling PyType_GetModuleState() on a subclass is not safe.
1372     // lzma_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
1373     // which prevents to create a subclass.
1374     // So calling PyType_GetModuleState() in this file is always safe.
1375     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
1376     .slots = lzma_decompressor_type_slots,
1377 };
1378 
1379 
1380 /* Module-level functions. */
1381 
1382 /*[clinic input]
1383 _lzma.is_check_supported
1384     check_id: int
1385     /
1386 
1387 Test whether the given integrity check is supported.
1388 
1389 Always returns True for CHECK_NONE and CHECK_CRC32.
1390 [clinic start generated code]*/
1391 
1392 static PyObject *
_lzma_is_check_supported_impl(PyObject * module,int check_id)1393 _lzma_is_check_supported_impl(PyObject *module, int check_id)
1394 /*[clinic end generated code: output=e4f14ba3ce2ad0a5 input=5518297b97b2318f]*/
1395 {
1396     return PyBool_FromLong(lzma_check_is_supported(check_id));
1397 }
1398 
1399 PyDoc_STRVAR(_lzma__encode_filter_properties__doc__,
1400 "_encode_filter_properties($module, filter, /)\n"
1401 "--\n"
1402 "\n"
1403 "Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).\n"
1404 "\n"
1405 "The result does not include the filter ID itself, only the options.");
1406 
1407 #define _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF    \
1408     {"_encode_filter_properties", (PyCFunction)_lzma__encode_filter_properties, METH_O, _lzma__encode_filter_properties__doc__},
1409 
1410 static PyObject *
1411 _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter);
1412 
1413 static PyObject *
_lzma__encode_filter_properties(PyObject * module,PyObject * arg)1414 _lzma__encode_filter_properties(PyObject *module, PyObject *arg)
1415 {
1416     PyObject *return_value = NULL;
1417     lzma_filter filter = {LZMA_VLI_UNKNOWN, NULL};
1418     _lzma_state *state = get_lzma_state(module);
1419     assert(state != NULL);
1420     if (!lzma_filter_converter(state, arg, &filter)) {
1421         goto exit;
1422     }
1423     return_value = _lzma__encode_filter_properties_impl(module, filter);
1424 
1425 exit:
1426     /* Cleanup for filter */
1427     if (filter.id != LZMA_VLI_UNKNOWN) {
1428        PyMem_Free(filter.options);
1429     }
1430 
1431     return return_value;
1432 }
1433 
1434 static PyObject *
_lzma__encode_filter_properties_impl(PyObject * module,lzma_filter filter)1435 _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter)
1436 {
1437     lzma_ret lzret;
1438     uint32_t encoded_size;
1439     PyObject *result = NULL;
1440     _lzma_state *state = get_lzma_state(module);
1441     assert(state != NULL);
1442 
1443     lzret = lzma_properties_size(&encoded_size, &filter);
1444     if (catch_lzma_error(state, lzret))
1445         goto error;
1446 
1447     result = PyBytes_FromStringAndSize(NULL, encoded_size);
1448     if (result == NULL)
1449         goto error;
1450 
1451     lzret = lzma_properties_encode(
1452             &filter, (uint8_t *)PyBytes_AS_STRING(result));
1453     if (catch_lzma_error(state, lzret)) {
1454         goto error;
1455     }
1456 
1457     return result;
1458 
1459 error:
1460     Py_XDECREF(result);
1461     return NULL;
1462 }
1463 
1464 
1465 /*[clinic input]
1466 _lzma._decode_filter_properties
1467     filter_id: lzma_vli
1468     encoded_props: Py_buffer
1469     /
1470 
1471 Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).
1472 
1473 The result does not include the filter ID itself, only the options.
1474 [clinic start generated code]*/
1475 
1476 static PyObject *
_lzma__decode_filter_properties_impl(PyObject * module,lzma_vli filter_id,Py_buffer * encoded_props)1477 _lzma__decode_filter_properties_impl(PyObject *module, lzma_vli filter_id,
1478                                      Py_buffer *encoded_props)
1479 /*[clinic end generated code: output=714fd2ef565d5c60 input=246410800782160c]*/
1480 {
1481     lzma_filter filter;
1482     lzma_ret lzret;
1483     PyObject *result = NULL;
1484     filter.id = filter_id;
1485     _lzma_state *state = get_lzma_state(module);
1486     assert(state != NULL);
1487 
1488     lzret = lzma_properties_decode(
1489             &filter, NULL, encoded_props->buf, encoded_props->len);
1490     if (catch_lzma_error(state, lzret)) {
1491         return NULL;
1492     }
1493 
1494     result = build_filter_spec(&filter);
1495 
1496     /* We use vanilla free() here instead of PyMem_Free() - filter.options was
1497        allocated by lzma_properties_decode() using the default allocator. */
1498     free(filter.options);
1499     return result;
1500 }
1501 
1502 /* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
1503    would not work correctly on platforms with 32-bit longs. */
1504 static int
module_add_int_constant(PyObject * m,const char * name,long long value)1505 module_add_int_constant(PyObject *m, const char *name, long long value)
1506 {
1507     return PyModule_Add(m, name, PyLong_FromLongLong(value));
1508 }
1509 
1510 static int
lzma_exec(PyObject * module)1511 lzma_exec(PyObject *module)
1512 {
1513 #define ADD_INT_PREFIX_MACRO(module, macro)                                 \
1514     do {                                                                    \
1515         if (module_add_int_constant(module, #macro, LZMA_ ## macro) < 0) {  \
1516             return -1;                                                      \
1517         }                                                                   \
1518     } while(0)
1519 
1520 #define ADD_INT_MACRO(module, macro)                                        \
1521     do {                                                                    \
1522         if (PyModule_AddIntMacro(module, macro) < 0) {                      \
1523             return -1;                                                      \
1524         }                                                                   \
1525     } while (0)
1526 
1527 
1528     _lzma_state *state = get_lzma_state(module);
1529 
1530     state->empty_tuple = PyTuple_New(0);
1531     if (state->empty_tuple == NULL) {
1532         return -1;
1533     }
1534 
1535     ADD_INT_MACRO(module, FORMAT_AUTO);
1536     ADD_INT_MACRO(module, FORMAT_XZ);
1537     ADD_INT_MACRO(module, FORMAT_ALONE);
1538     ADD_INT_MACRO(module, FORMAT_RAW);
1539     ADD_INT_PREFIX_MACRO(module, CHECK_NONE);
1540     ADD_INT_PREFIX_MACRO(module, CHECK_CRC32);
1541     ADD_INT_PREFIX_MACRO(module, CHECK_CRC64);
1542     ADD_INT_PREFIX_MACRO(module, CHECK_SHA256);
1543     ADD_INT_PREFIX_MACRO(module, CHECK_ID_MAX);
1544     ADD_INT_PREFIX_MACRO(module, CHECK_UNKNOWN);
1545     ADD_INT_PREFIX_MACRO(module, FILTER_LZMA1);
1546     ADD_INT_PREFIX_MACRO(module, FILTER_LZMA2);
1547     ADD_INT_PREFIX_MACRO(module, FILTER_DELTA);
1548     ADD_INT_PREFIX_MACRO(module, FILTER_X86);
1549     ADD_INT_PREFIX_MACRO(module, FILTER_IA64);
1550     ADD_INT_PREFIX_MACRO(module, FILTER_ARM);
1551     ADD_INT_PREFIX_MACRO(module, FILTER_ARMTHUMB);
1552     ADD_INT_PREFIX_MACRO(module, FILTER_SPARC);
1553     ADD_INT_PREFIX_MACRO(module, FILTER_POWERPC);
1554     ADD_INT_PREFIX_MACRO(module, MF_HC3);
1555     ADD_INT_PREFIX_MACRO(module, MF_HC4);
1556     ADD_INT_PREFIX_MACRO(module, MF_BT2);
1557     ADD_INT_PREFIX_MACRO(module, MF_BT3);
1558     ADD_INT_PREFIX_MACRO(module, MF_BT4);
1559     ADD_INT_PREFIX_MACRO(module, MODE_FAST);
1560     ADD_INT_PREFIX_MACRO(module, MODE_NORMAL);
1561     ADD_INT_PREFIX_MACRO(module, PRESET_DEFAULT);
1562     ADD_INT_PREFIX_MACRO(module, PRESET_EXTREME);
1563 
1564     state->error = PyErr_NewExceptionWithDoc("_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1565     if (state->error == NULL) {
1566         return -1;
1567     }
1568 
1569     if (PyModule_AddType(module, (PyTypeObject *)state->error) < 0) {
1570         return -1;
1571     }
1572 
1573 
1574     state->lzma_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1575                                                             &lzma_compressor_type_spec, NULL);
1576     if (state->lzma_compressor_type == NULL) {
1577         return -1;
1578     }
1579 
1580     if (PyModule_AddType(module, state->lzma_compressor_type) < 0) {
1581         return -1;
1582     }
1583 
1584     state->lzma_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1585                                                          &lzma_decompressor_type_spec, NULL);
1586     if (state->lzma_decompressor_type == NULL) {
1587         return -1;
1588     }
1589 
1590     if (PyModule_AddType(module, state->lzma_decompressor_type) < 0) {
1591         return -1;
1592     }
1593 
1594     return 0;
1595 }
1596 
1597 static PyMethodDef lzma_methods[] = {
1598     _LZMA_IS_CHECK_SUPPORTED_METHODDEF
1599     _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
1600     _LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
1601     {NULL}
1602 };
1603 
1604 static PyModuleDef_Slot lzma_slots[] = {
1605     {Py_mod_exec, lzma_exec},
1606     {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1607     {Py_mod_gil, Py_MOD_GIL_NOT_USED},
1608     {0, NULL}
1609 };
1610 
1611 static int
lzma_traverse(PyObject * module,visitproc visit,void * arg)1612 lzma_traverse(PyObject *module, visitproc visit, void *arg)
1613 {
1614     _lzma_state *state = get_lzma_state(module);
1615     Py_VISIT(state->lzma_compressor_type);
1616     Py_VISIT(state->lzma_decompressor_type);
1617     Py_VISIT(state->error);
1618     Py_VISIT(state->empty_tuple);
1619     return 0;
1620 }
1621 
1622 static int
lzma_clear(PyObject * module)1623 lzma_clear(PyObject *module)
1624 {
1625     _lzma_state *state = get_lzma_state(module);
1626     Py_CLEAR(state->lzma_compressor_type);
1627     Py_CLEAR(state->lzma_decompressor_type);
1628     Py_CLEAR(state->error);
1629     Py_CLEAR(state->empty_tuple);
1630     return 0;
1631 }
1632 
1633 static void
lzma_free(void * module)1634 lzma_free(void *module)
1635 {
1636     lzma_clear((PyObject *)module);
1637 }
1638 
1639 static PyModuleDef _lzmamodule = {
1640     PyModuleDef_HEAD_INIT,
1641     .m_name = "_lzma",
1642     .m_size = sizeof(_lzma_state),
1643     .m_methods = lzma_methods,
1644     .m_slots = lzma_slots,
1645     .m_traverse = lzma_traverse,
1646     .m_clear = lzma_clear,
1647     .m_free = lzma_free,
1648 };
1649 
1650 PyMODINIT_FUNC
PyInit__lzma(void)1651 PyInit__lzma(void)
1652 {
1653     return PyModuleDef_Init(&_lzmamodule);
1654 }
1655