• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* _lzma - Low-level Python interface to liblzma.
2 
3    Initial implementation by Per Øyvind Karlsen.
4    Rewritten by Nadeem Vawda.
5 
6 */
7 
8 #define PY_SSIZE_T_CLEAN
9 
10 #include "Python.h"
11 #include "structmember.h"         // PyMemberDef
12 
13 #include <stdarg.h>
14 #include <string.h>
15 
16 #include <lzma.h>
17 
18 // Blocks output buffer wrappers
19 #include "pycore_blocks_output_buffer.h"
20 
21 #if OUTPUT_BUFFER_MAX_BLOCK_SIZE > SIZE_MAX
22     #error "The maximum block size accepted by liblzma is SIZE_MAX."
23 #endif
24 
25 /* On success, return value >= 0
26    On failure, return -1 */
27 static inline Py_ssize_t
OutputBuffer_InitAndGrow(_BlocksOutputBuffer * buffer,Py_ssize_t max_length,uint8_t ** next_out,size_t * avail_out)28 OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
29                          uint8_t **next_out, size_t *avail_out)
30 {
31     Py_ssize_t allocated;
32 
33     allocated = _BlocksOutputBuffer_InitAndGrow(
34                     buffer, max_length, (void**) next_out);
35     *avail_out = (size_t) allocated;
36     return allocated;
37 }
38 
39 /* On success, return value >= 0
40    On failure, return -1 */
41 static inline Py_ssize_t
OutputBuffer_Grow(_BlocksOutputBuffer * buffer,uint8_t ** next_out,size_t * avail_out)42 OutputBuffer_Grow(_BlocksOutputBuffer *buffer,
43                   uint8_t **next_out, size_t *avail_out)
44 {
45     Py_ssize_t allocated;
46 
47     allocated = _BlocksOutputBuffer_Grow(
48                     buffer, (void**) next_out, (Py_ssize_t) *avail_out);
49     *avail_out = (size_t) allocated;
50     return allocated;
51 }
52 
53 static inline Py_ssize_t
OutputBuffer_GetDataSize(_BlocksOutputBuffer * buffer,size_t avail_out)54 OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, size_t avail_out)
55 {
56     return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
57 }
58 
59 static inline PyObject *
OutputBuffer_Finish(_BlocksOutputBuffer * buffer,size_t avail_out)60 OutputBuffer_Finish(_BlocksOutputBuffer *buffer, size_t avail_out)
61 {
62     return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
63 }
64 
65 static inline void
OutputBuffer_OnError(_BlocksOutputBuffer * buffer)66 OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
67 {
68     _BlocksOutputBuffer_OnError(buffer);
69 }
70 
71 
72 #define ACQUIRE_LOCK(obj) do { \
73     if (!PyThread_acquire_lock((obj)->lock, 0)) { \
74         Py_BEGIN_ALLOW_THREADS \
75         PyThread_acquire_lock((obj)->lock, 1); \
76         Py_END_ALLOW_THREADS \
77     } } while (0)
78 #define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
79 
80 typedef struct {
81     PyTypeObject *lzma_compressor_type;
82     PyTypeObject *lzma_decompressor_type;
83     PyObject *error;
84     PyObject *empty_tuple;
85 } _lzma_state;
86 
87 static inline _lzma_state*
get_lzma_state(PyObject * module)88 get_lzma_state(PyObject *module)
89 {
90     void *state = PyModule_GetState(module);
91     assert(state != NULL);
92     return (_lzma_state *)state;
93 }
94 
95 /* Container formats: */
96 enum {
97     FORMAT_AUTO,
98     FORMAT_XZ,
99     FORMAT_ALONE,
100     FORMAT_RAW,
101 };
102 
103 #define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
104 
105 
106 typedef struct {
107     PyObject_HEAD
108     lzma_allocator alloc;
109     lzma_stream lzs;
110     int flushed;
111     PyThread_type_lock lock;
112 } Compressor;
113 
114 typedef struct {
115     PyObject_HEAD
116     lzma_allocator alloc;
117     lzma_stream lzs;
118     int check;
119     char eof;
120     PyObject *unused_data;
121     char needs_input;
122     uint8_t *input_buffer;
123     size_t input_buffer_size;
124     PyThread_type_lock lock;
125 } Decompressor;
126 
127 /* Helper functions. */
128 
129 static int
catch_lzma_error(_lzma_state * state,lzma_ret lzret)130 catch_lzma_error(_lzma_state *state, lzma_ret lzret)
131 {
132     switch (lzret) {
133         case LZMA_OK:
134         case LZMA_GET_CHECK:
135         case LZMA_NO_CHECK:
136         case LZMA_STREAM_END:
137             return 0;
138         case LZMA_UNSUPPORTED_CHECK:
139             PyErr_SetString(state->error, "Unsupported integrity check");
140             return 1;
141         case LZMA_MEM_ERROR:
142             PyErr_NoMemory();
143             return 1;
144         case LZMA_MEMLIMIT_ERROR:
145             PyErr_SetString(state->error, "Memory usage limit exceeded");
146             return 1;
147         case LZMA_FORMAT_ERROR:
148             PyErr_SetString(state->error, "Input format not supported by decoder");
149             return 1;
150         case LZMA_OPTIONS_ERROR:
151             PyErr_SetString(state->error, "Invalid or unsupported options");
152             return 1;
153         case LZMA_DATA_ERROR:
154             PyErr_SetString(state->error, "Corrupt input data");
155             return 1;
156         case LZMA_BUF_ERROR:
157             PyErr_SetString(state->error, "Insufficient buffer space");
158             return 1;
159         case LZMA_PROG_ERROR:
160             PyErr_SetString(state->error, "Internal error");
161             return 1;
162         default:
163             PyErr_Format(state->error, "Unrecognized error from liblzma: %d", lzret);
164             return 1;
165     }
166 }
167 
168 static void*
PyLzma_Malloc(void * opaque,size_t items,size_t size)169 PyLzma_Malloc(void *opaque, size_t items, size_t size)
170 {
171     if (size != 0 && items > (size_t)PY_SSIZE_T_MAX / size) {
172         return NULL;
173     }
174     /* PyMem_Malloc() cannot be used:
175        the GIL is not held when lzma_code() is called */
176     return PyMem_RawMalloc(items * size);
177 }
178 
179 static void
PyLzma_Free(void * opaque,void * ptr)180 PyLzma_Free(void *opaque, void *ptr)
181 {
182     PyMem_RawFree(ptr);
183 }
184 
185 
186 /* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
187    since the predefined conversion specifiers do not suit our needs:
188 
189       uint32_t - the "I" (unsigned int) specifier is the right size, but
190       silently ignores overflows on conversion.
191 
192       lzma_vli - the "K" (unsigned long long) specifier is the right
193       size, but like "I" it silently ignores overflows on conversion.
194 
195       lzma_mode and lzma_match_finder - these are enumeration types, and
196       so the size of each is implementation-defined. Worse, different
197       enum types can be of different sizes within the same program, so
198       to be strictly correct, we need to define two separate converters.
199  */
200 
201 #define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
202     static int \
203     FUNCNAME(PyObject *obj, void *ptr) \
204     { \
205         unsigned long long val; \
206         \
207         val = PyLong_AsUnsignedLongLong(obj); \
208         if (PyErr_Occurred()) \
209             return 0; \
210         if ((unsigned long long)(TYPE)val != val) { \
211             PyErr_SetString(PyExc_OverflowError, \
212                             "Value too large for " #TYPE " type"); \
213             return 0; \
214         } \
215         *(TYPE *)ptr = (TYPE)val; \
216         return 1; \
217     }
218 
INT_TYPE_CONVERTER_FUNC(uint32_t,uint32_converter)219 INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
220 INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
221 INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
222 INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
223 
224 #undef INT_TYPE_CONVERTER_FUNC
225 
226 
227 /* Filter specifier parsing.
228 
229    This code handles converting filter specifiers (Python dicts) into
230    the C lzma_filter structs expected by liblzma. */
231 
232 static void *
233 parse_filter_spec_lzma(_lzma_state *state, PyObject *spec)
234 {
235     static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
236                                "pb", "mode", "nice_len", "mf", "depth", NULL};
237     PyObject *id;
238     PyObject *preset_obj;
239     uint32_t preset = LZMA_PRESET_DEFAULT;
240     lzma_options_lzma *options;
241 
242     /* First, fill in default values for all the options using a preset.
243        Then, override the defaults with any values given by the caller. */
244 
245     preset_obj = PyMapping_GetItemString(spec, "preset");
246     if (preset_obj == NULL) {
247         if (PyErr_ExceptionMatches(PyExc_KeyError)) {
248             PyErr_Clear();
249         }
250         else {
251             return NULL;
252         }
253     } else {
254         int ok = uint32_converter(preset_obj, &preset);
255         Py_DECREF(preset_obj);
256         if (!ok) {
257             return NULL;
258         }
259     }
260 
261     options = (lzma_options_lzma *)PyMem_Calloc(1, sizeof *options);
262     if (options == NULL) {
263         return PyErr_NoMemory();
264     }
265 
266     if (lzma_lzma_preset(options, preset)) {
267         PyMem_Free(options);
268         PyErr_Format(state->error, "Invalid compression preset: %u", preset);
269         return NULL;
270     }
271 
272     if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec,
273                                      "|OOO&O&O&O&O&O&O&O&", optnames,
274                                      &id, &preset_obj,
275                                      uint32_converter, &options->dict_size,
276                                      uint32_converter, &options->lc,
277                                      uint32_converter, &options->lp,
278                                      uint32_converter, &options->pb,
279                                      lzma_mode_converter, &options->mode,
280                                      uint32_converter, &options->nice_len,
281                                      lzma_mf_converter, &options->mf,
282                                      uint32_converter, &options->depth)) {
283         PyErr_SetString(PyExc_ValueError,
284                         "Invalid filter specifier for LZMA filter");
285         PyMem_Free(options);
286         return NULL;
287     }
288 
289     return options;
290 }
291 
292 static void *
parse_filter_spec_delta(_lzma_state * state,PyObject * spec)293 parse_filter_spec_delta(_lzma_state *state, PyObject *spec)
294 {
295     static char *optnames[] = {"id", "dist", NULL};
296     PyObject *id;
297     uint32_t dist = 1;
298     lzma_options_delta *options;
299 
300     if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
301                                      &id, uint32_converter, &dist)) {
302         PyErr_SetString(PyExc_ValueError,
303                         "Invalid filter specifier for delta filter");
304         return NULL;
305     }
306 
307     options = (lzma_options_delta *)PyMem_Calloc(1, sizeof *options);
308     if (options == NULL) {
309         return PyErr_NoMemory();
310     }
311     options->type = LZMA_DELTA_TYPE_BYTE;
312     options->dist = dist;
313     return options;
314 }
315 
316 static void *
parse_filter_spec_bcj(_lzma_state * state,PyObject * spec)317 parse_filter_spec_bcj(_lzma_state *state, PyObject *spec)
318 {
319     static char *optnames[] = {"id", "start_offset", NULL};
320     PyObject *id;
321     uint32_t start_offset = 0;
322     lzma_options_bcj *options;
323 
324     if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
325                                      &id, uint32_converter, &start_offset)) {
326         PyErr_SetString(PyExc_ValueError,
327                         "Invalid filter specifier for BCJ filter");
328         return NULL;
329     }
330 
331     options = (lzma_options_bcj *)PyMem_Calloc(1, sizeof *options);
332     if (options == NULL) {
333         return PyErr_NoMemory();
334     }
335     options->start_offset = start_offset;
336     return options;
337 }
338 
339 static int
lzma_filter_converter(_lzma_state * state,PyObject * spec,void * ptr)340 lzma_filter_converter(_lzma_state *state, PyObject *spec, void *ptr)
341 {
342     lzma_filter *f = (lzma_filter *)ptr;
343     PyObject *id_obj;
344 
345     if (!PyMapping_Check(spec)) {
346         PyErr_SetString(PyExc_TypeError,
347                         "Filter specifier must be a dict or dict-like object");
348         return 0;
349     }
350     id_obj = PyMapping_GetItemString(spec, "id");
351     if (id_obj == NULL) {
352         if (PyErr_ExceptionMatches(PyExc_KeyError))
353             PyErr_SetString(PyExc_ValueError,
354                             "Filter specifier must have an \"id\" entry");
355         return 0;
356     }
357     f->id = PyLong_AsUnsignedLongLong(id_obj);
358     Py_DECREF(id_obj);
359     if (PyErr_Occurred()) {
360         return 0;
361     }
362 
363     switch (f->id) {
364         case LZMA_FILTER_LZMA1:
365         case LZMA_FILTER_LZMA2:
366             f->options = parse_filter_spec_lzma(state, spec);
367             return f->options != NULL;
368         case LZMA_FILTER_DELTA:
369             f->options = parse_filter_spec_delta(state, spec);
370             return f->options != NULL;
371         case LZMA_FILTER_X86:
372         case LZMA_FILTER_POWERPC:
373         case LZMA_FILTER_IA64:
374         case LZMA_FILTER_ARM:
375         case LZMA_FILTER_ARMTHUMB:
376         case LZMA_FILTER_SPARC:
377             f->options = parse_filter_spec_bcj(state, spec);
378             return f->options != NULL;
379         default:
380             PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
381             return 0;
382     }
383 }
384 
385 static void
free_filter_chain(lzma_filter filters[])386 free_filter_chain(lzma_filter filters[])
387 {
388     for (int i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++) {
389         PyMem_Free(filters[i].options);
390     }
391 }
392 
393 static int
parse_filter_chain_spec(_lzma_state * state,lzma_filter filters[],PyObject * filterspecs)394 parse_filter_chain_spec(_lzma_state *state, lzma_filter filters[], PyObject *filterspecs)
395 {
396     Py_ssize_t i, num_filters;
397 
398     num_filters = PySequence_Length(filterspecs);
399     if (num_filters == -1) {
400         return -1;
401     }
402     if (num_filters > LZMA_FILTERS_MAX) {
403         PyErr_Format(PyExc_ValueError,
404                      "Too many filters - liblzma supports a maximum of %d",
405                      LZMA_FILTERS_MAX);
406         return -1;
407     }
408 
409     for (i = 0; i < num_filters; i++) {
410         int ok = 1;
411         PyObject *spec = PySequence_GetItem(filterspecs, i);
412         if (spec == NULL || !lzma_filter_converter(state, spec, &filters[i])) {
413             ok = 0;
414         }
415         Py_XDECREF(spec);
416         if (!ok) {
417             filters[i].id = LZMA_VLI_UNKNOWN;
418             free_filter_chain(filters);
419             return -1;
420         }
421     }
422     filters[num_filters].id = LZMA_VLI_UNKNOWN;
423     return 0;
424 }
425 
426 
427 /* Filter specifier construction.
428 
429    This code handles converting C lzma_filter structs into
430    Python-level filter specifiers (represented as dicts). */
431 
432 static int
spec_add_field(PyObject * spec,_Py_Identifier * key,unsigned long long value)433 spec_add_field(PyObject *spec, _Py_Identifier *key, unsigned long long value)
434 {
435     int status;
436     PyObject *value_object;
437 
438     value_object = PyLong_FromUnsignedLongLong(value);
439     if (value_object == NULL) {
440         return -1;
441     }
442 
443     status = _PyDict_SetItemId(spec, key, value_object);
444     Py_DECREF(value_object);
445     return status;
446 }
447 
448 static PyObject *
build_filter_spec(const lzma_filter * f)449 build_filter_spec(const lzma_filter *f)
450 {
451     PyObject *spec;
452 
453     spec = PyDict_New();
454     if (spec == NULL) {
455         return NULL;
456     }
457 
458 #define ADD_FIELD(SOURCE, FIELD) \
459     do { \
460         _Py_IDENTIFIER(FIELD); \
461         if (spec_add_field(spec, &PyId_##FIELD, SOURCE->FIELD) == -1) \
462             goto error;\
463     } while (0)
464 
465     ADD_FIELD(f, id);
466 
467     switch (f->id) {
468         /* For LZMA1 filters, lzma_properties_{encode,decode}() only look at the
469            lc, lp, pb, and dict_size fields. For LZMA2 filters, only the
470            dict_size field is used. */
471         case LZMA_FILTER_LZMA1: {
472             lzma_options_lzma *options = f->options;
473             ADD_FIELD(options, lc);
474             ADD_FIELD(options, lp);
475             ADD_FIELD(options, pb);
476             ADD_FIELD(options, dict_size);
477             break;
478         }
479         case LZMA_FILTER_LZMA2: {
480             lzma_options_lzma *options = f->options;
481             ADD_FIELD(options, dict_size);
482             break;
483         }
484         case LZMA_FILTER_DELTA: {
485             lzma_options_delta *options = f->options;
486             ADD_FIELD(options, dist);
487             break;
488         }
489         case LZMA_FILTER_X86:
490         case LZMA_FILTER_POWERPC:
491         case LZMA_FILTER_IA64:
492         case LZMA_FILTER_ARM:
493         case LZMA_FILTER_ARMTHUMB:
494         case LZMA_FILTER_SPARC: {
495             lzma_options_bcj *options = f->options;
496             ADD_FIELD(options, start_offset);
497             break;
498         }
499         default:
500             PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
501             goto error;
502     }
503 
504 #undef ADD_FIELD
505 
506     return spec;
507 
508 error:
509     Py_DECREF(spec);
510     return NULL;
511 }
512 
513 
514 /*[clinic input]
515 module _lzma
516 class _lzma.LZMACompressor "Compressor *" "&Compressor_type"
517 class _lzma.LZMADecompressor "Decompressor *" "&Decompressor_type"
518 [clinic start generated code]*/
519 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2c14bbe05ff0c147]*/
520 
521 #include "clinic/_lzmamodule.c.h"
522 
523 /*[python input]
524 
525 class lzma_vli_converter(CConverter):
526     type = 'lzma_vli'
527     converter = 'lzma_vli_converter'
528 
529 class lzma_filter_converter(CConverter):
530     type = 'lzma_filter'
531     converter = 'lzma_filter_converter'
532     c_default = c_ignored_default = "{LZMA_VLI_UNKNOWN, NULL}"
533 
534     def cleanup(self):
535         name = ensure_legal_c_identifier(self.name)
536         return ('if (%(name)s.id != LZMA_VLI_UNKNOWN)\n'
537                 '   PyMem_Free(%(name)s.options);\n') % {'name': name}
538 
539 [python start generated code]*/
540 /*[python end generated code: output=da39a3ee5e6b4b0d input=74fe7631ce377a94]*/
541 
542 
543 /* LZMACompressor class. */
544 
545 static PyObject *
compress(Compressor * c,uint8_t * data,size_t len,lzma_action action)546 compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
547 {
548     PyObject *result;
549     _BlocksOutputBuffer buffer = {.list = NULL};
550     _lzma_state *state = PyType_GetModuleState(Py_TYPE(c));
551     assert(state != NULL);
552 
553     if (OutputBuffer_InitAndGrow(&buffer, -1, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
554         goto error;
555     }
556     c->lzs.next_in = data;
557     c->lzs.avail_in = len;
558 
559     for (;;) {
560         lzma_ret lzret;
561 
562         Py_BEGIN_ALLOW_THREADS
563         lzret = lzma_code(&c->lzs, action);
564         Py_END_ALLOW_THREADS
565 
566         if (lzret == LZMA_BUF_ERROR && len == 0 && c->lzs.avail_out > 0) {
567             lzret = LZMA_OK; /* That wasn't a real error */
568         }
569         if (catch_lzma_error(state, lzret)) {
570             goto error;
571         }
572         if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
573             (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
574             break;
575         } else if (c->lzs.avail_out == 0) {
576             if (OutputBuffer_Grow(&buffer, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
577                 goto error;
578             }
579         }
580     }
581 
582     result = OutputBuffer_Finish(&buffer, c->lzs.avail_out);
583     if (result != NULL) {
584         return result;
585     }
586 
587 error:
588     OutputBuffer_OnError(&buffer);
589     return NULL;
590 }
591 
592 /*[clinic input]
593 _lzma.LZMACompressor.compress
594 
595     data: Py_buffer
596     /
597 
598 Provide data to the compressor object.
599 
600 Returns a chunk of compressed data if possible, or b'' otherwise.
601 
602 When you have finished providing data to the compressor, call the
603 flush() method to finish the compression process.
604 [clinic start generated code]*/
605 
606 static PyObject *
_lzma_LZMACompressor_compress_impl(Compressor * self,Py_buffer * data)607 _lzma_LZMACompressor_compress_impl(Compressor *self, Py_buffer *data)
608 /*[clinic end generated code: output=31f615136963e00f input=64019eac7f2cc8d0]*/
609 {
610     PyObject *result = NULL;
611 
612     ACQUIRE_LOCK(self);
613     if (self->flushed) {
614         PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
615     }
616     else {
617         result = compress(self, data->buf, data->len, LZMA_RUN);
618     }
619     RELEASE_LOCK(self);
620     return result;
621 }
622 
623 /*[clinic input]
624 _lzma.LZMACompressor.flush
625 
626 Finish the compression process.
627 
628 Returns the compressed data left in internal buffers.
629 
630 The compressor object may not be used after this method is called.
631 [clinic start generated code]*/
632 
633 static PyObject *
_lzma_LZMACompressor_flush_impl(Compressor * self)634 _lzma_LZMACompressor_flush_impl(Compressor *self)
635 /*[clinic end generated code: output=fec21f3e22504f50 input=6b369303f67ad0a8]*/
636 {
637     PyObject *result = NULL;
638 
639     ACQUIRE_LOCK(self);
640     if (self->flushed) {
641         PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
642     } else {
643         self->flushed = 1;
644         result = compress(self, NULL, 0, LZMA_FINISH);
645     }
646     RELEASE_LOCK(self);
647     return result;
648 }
649 
650 static int
Compressor_init_xz(_lzma_state * state,lzma_stream * lzs,int check,uint32_t preset,PyObject * filterspecs)651 Compressor_init_xz(_lzma_state *state, lzma_stream *lzs,
652                    int check, uint32_t preset, PyObject *filterspecs)
653 {
654     lzma_ret lzret;
655 
656     if (filterspecs == Py_None) {
657         lzret = lzma_easy_encoder(lzs, preset, check);
658     } else {
659         lzma_filter filters[LZMA_FILTERS_MAX + 1];
660 
661         if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
662             return -1;
663         lzret = lzma_stream_encoder(lzs, filters, check);
664         free_filter_chain(filters);
665     }
666     if (catch_lzma_error(state, lzret)) {
667         return -1;
668     }
669     else {
670         return 0;
671     }
672 }
673 
674 static int
Compressor_init_alone(_lzma_state * state,lzma_stream * lzs,uint32_t preset,PyObject * filterspecs)675 Compressor_init_alone(_lzma_state *state, lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
676 {
677     lzma_ret lzret;
678 
679     if (filterspecs == Py_None) {
680         lzma_options_lzma options;
681 
682         if (lzma_lzma_preset(&options, preset)) {
683             PyErr_Format(state->error, "Invalid compression preset: %u", preset);
684             return -1;
685         }
686         lzret = lzma_alone_encoder(lzs, &options);
687     } else {
688         lzma_filter filters[LZMA_FILTERS_MAX + 1];
689 
690         if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
691             return -1;
692         if (filters[0].id == LZMA_FILTER_LZMA1 &&
693             filters[1].id == LZMA_VLI_UNKNOWN) {
694             lzret = lzma_alone_encoder(lzs, filters[0].options);
695         } else {
696             PyErr_SetString(PyExc_ValueError,
697                             "Invalid filter chain for FORMAT_ALONE - "
698                             "must be a single LZMA1 filter");
699             lzret = LZMA_PROG_ERROR;
700         }
701         free_filter_chain(filters);
702     }
703     if (PyErr_Occurred() || catch_lzma_error(state, lzret)) {
704         return -1;
705     }
706     else {
707         return 0;
708     }
709 }
710 
711 static int
Compressor_init_raw(_lzma_state * state,lzma_stream * lzs,PyObject * filterspecs)712 Compressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
713 {
714     lzma_filter filters[LZMA_FILTERS_MAX + 1];
715     lzma_ret lzret;
716 
717     if (filterspecs == Py_None) {
718         PyErr_SetString(PyExc_ValueError,
719                         "Must specify filters for FORMAT_RAW");
720         return -1;
721     }
722     if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
723         return -1;
724     }
725     lzret = lzma_raw_encoder(lzs, filters);
726     free_filter_chain(filters);
727     if (catch_lzma_error(state, lzret)) {
728         return -1;
729     }
730     else {
731         return 0;
732     }
733 }
734 
735 /*[-clinic input]
736 _lzma.LZMACompressor.__init__
737 
738     format: int(c_default="FORMAT_XZ") = FORMAT_XZ
739         The container format to use for the output.  This can
740         be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.
741 
742     check: int(c_default="-1") = unspecified
743         The integrity check to use.  For FORMAT_XZ, the default
744         is CHECK_CRC64.  FORMAT_ALONE and FORMAT_RAW do not support integrity
745         checks; for these formats, check must be omitted, or be CHECK_NONE.
746 
747     preset: object = None
748         If provided should be an integer in the range 0-9, optionally
749         OR-ed with the constant PRESET_EXTREME.
750 
751     filters: object = None
752         If provided should be a sequence of dicts.  Each dict should
753         have an entry for "id" indicating the ID of the filter, plus
754         additional entries for options to the filter.
755 
756 Create a compressor object for compressing data incrementally.
757 
758 The settings used by the compressor can be specified either as a
759 preset compression level (with the 'preset' argument), or in detail
760 as a custom filter chain (with the 'filters' argument).  For FORMAT_XZ
761 and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
762 level.  For FORMAT_RAW, the caller must always specify a filter chain;
763 the raw compressor does not support preset compression levels.
764 
765 For one-shot compression, use the compress() function instead.
766 [-clinic start generated code]*/
767 static int
Compressor_init(Compressor * self,PyObject * args,PyObject * kwargs)768 Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
769 {
770     static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
771     int format = FORMAT_XZ;
772     int check = -1;
773     uint32_t preset = LZMA_PRESET_DEFAULT;
774     PyObject *preset_obj = Py_None;
775     PyObject *filterspecs = Py_None;
776     _lzma_state *state = PyType_GetModuleState(Py_TYPE(self));
777     assert(state != NULL);
778     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
779                                      "|iiOO:LZMACompressor", arg_names,
780                                      &format, &check, &preset_obj,
781                                      &filterspecs)) {
782         return -1;
783     }
784 
785     if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
786         PyErr_SetString(PyExc_ValueError,
787                         "Integrity checks are only supported by FORMAT_XZ");
788         return -1;
789     }
790 
791     if (preset_obj != Py_None && filterspecs != Py_None) {
792         PyErr_SetString(PyExc_ValueError,
793                         "Cannot specify both preset and filter chain");
794         return -1;
795     }
796 
797     if (preset_obj != Py_None) {
798         if (!uint32_converter(preset_obj, &preset)) {
799             return -1;
800         }
801     }
802 
803     self->alloc.opaque = NULL;
804     self->alloc.alloc = PyLzma_Malloc;
805     self->alloc.free = PyLzma_Free;
806     self->lzs.allocator = &self->alloc;
807 
808     self->lock = PyThread_allocate_lock();
809     if (self->lock == NULL) {
810         PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
811         return -1;
812     }
813 
814     self->flushed = 0;
815     switch (format) {
816         case FORMAT_XZ:
817             if (check == -1) {
818                 check = LZMA_CHECK_CRC64;
819             }
820             if (Compressor_init_xz(state, &self->lzs, check, preset, filterspecs) != 0) {
821                 break;
822             }
823             return 0;
824 
825         case FORMAT_ALONE:
826             if (Compressor_init_alone(state, &self->lzs, preset, filterspecs) != 0) {
827                 break;
828             }
829             return 0;
830 
831         case FORMAT_RAW:
832             if (Compressor_init_raw(state, &self->lzs, filterspecs) != 0) {
833                 break;
834             }
835             return 0;
836 
837         default:
838             PyErr_Format(PyExc_ValueError,
839                          "Invalid container format: %d", format);
840             break;
841     }
842 
843     PyThread_free_lock(self->lock);
844     self->lock = NULL;
845     return -1;
846 }
847 
848 static void
Compressor_dealloc(Compressor * self)849 Compressor_dealloc(Compressor *self)
850 {
851     lzma_end(&self->lzs);
852     if (self->lock != NULL) {
853         PyThread_free_lock(self->lock);
854     }
855     PyTypeObject *tp = Py_TYPE(self);
856     tp->tp_free((PyObject *)self);
857     Py_DECREF(tp);
858 }
859 
860 static PyMethodDef Compressor_methods[] = {
861     _LZMA_LZMACOMPRESSOR_COMPRESS_METHODDEF
862     _LZMA_LZMACOMPRESSOR_FLUSH_METHODDEF
863     {NULL}
864 };
865 
866 static int
Compressor_traverse(Compressor * self,visitproc visit,void * arg)867 Compressor_traverse(Compressor *self, visitproc visit, void *arg)
868 {
869     Py_VISIT(Py_TYPE(self));
870     return 0;
871 }
872 
873 PyDoc_STRVAR(Compressor_doc,
874 "LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
875 "\n"
876 "Create a compressor object for compressing data incrementally.\n"
877 "\n"
878 "format specifies the container format to use for the output. This can\n"
879 "be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
880 "\n"
881 "check specifies the integrity check to use. For FORMAT_XZ, the default\n"
882 "is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity\n"
883 "checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
884 "\n"
885 "The settings used by the compressor can be specified either as a\n"
886 "preset compression level (with the 'preset' argument), or in detail\n"
887 "as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
888 "and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
889 "level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
890 "the raw compressor does not support preset compression levels.\n"
891 "\n"
892 "preset (if provided) should be an integer in the range 0-9, optionally\n"
893 "OR-ed with the constant PRESET_EXTREME.\n"
894 "\n"
895 "filters (if provided) should be a sequence of dicts. Each dict should\n"
896 "have an entry for \"id\" indicating the ID of the filter, plus\n"
897 "additional entries for options to the filter.\n"
898 "\n"
899 "For one-shot compression, use the compress() function instead.\n");
900 
901 static PyType_Slot lzma_compressor_type_slots[] = {
902     {Py_tp_dealloc, Compressor_dealloc},
903     {Py_tp_methods, Compressor_methods},
904     {Py_tp_init, Compressor_init},
905     {Py_tp_new, PyType_GenericNew},
906     {Py_tp_doc, (char *)Compressor_doc},
907     {Py_tp_traverse, Compressor_traverse},
908     {0, 0}
909 };
910 
911 static PyType_Spec lzma_compressor_type_spec = {
912     .name = "_lzma.LZMACompressor",
913     .basicsize = sizeof(Compressor),
914     // Calling PyType_GetModuleState() on a subclass is not safe.
915     // lzma_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
916     // which prevents to create a subclass.
917     // So calling PyType_GetModuleState() in this file is always safe.
918     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
919     .slots = lzma_compressor_type_slots,
920 };
921 
922 /* LZMADecompressor class. */
923 
924 /* Decompress data of length d->lzs.avail_in in d->lzs.next_in.  The output
925    buffer is allocated dynamically and returned.  At most max_length bytes are
926    returned, so some of the input may not be consumed. d->lzs.next_in and
927    d->lzs.avail_in are updated to reflect the consumed input. */
928 static PyObject*
decompress_buf(Decompressor * d,Py_ssize_t max_length)929 decompress_buf(Decompressor *d, Py_ssize_t max_length)
930 {
931     PyObject *result;
932     lzma_stream *lzs = &d->lzs;
933     _BlocksOutputBuffer buffer = {.list = NULL};
934     _lzma_state *state = PyType_GetModuleState(Py_TYPE(d));
935     assert(state != NULL);
936 
937     if (OutputBuffer_InitAndGrow(&buffer, max_length, &lzs->next_out, &lzs->avail_out) < 0) {
938         goto error;
939     }
940 
941     for (;;) {
942         lzma_ret lzret;
943 
944         Py_BEGIN_ALLOW_THREADS
945         lzret = lzma_code(lzs, LZMA_RUN);
946         Py_END_ALLOW_THREADS
947 
948         if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0) {
949             lzret = LZMA_OK; /* That wasn't a real error */
950         }
951         if (catch_lzma_error(state, lzret)) {
952             goto error;
953         }
954         if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) {
955             d->check = lzma_get_check(&d->lzs);
956         }
957         if (lzret == LZMA_STREAM_END) {
958             d->eof = 1;
959             break;
960         } else if (lzs->avail_out == 0) {
961             /* Need to check lzs->avail_out before lzs->avail_in.
962                Maybe lzs's internal state still have a few bytes
963                can be output, grow the output buffer and continue
964                if max_lengh < 0. */
965             if (OutputBuffer_GetDataSize(&buffer, lzs->avail_out) == max_length) {
966                 break;
967             }
968             if (OutputBuffer_Grow(&buffer, &lzs->next_out, &lzs->avail_out) < 0) {
969                 goto error;
970             }
971         } else if (lzs->avail_in == 0) {
972             break;
973         }
974     }
975 
976     result = OutputBuffer_Finish(&buffer, lzs->avail_out);
977     if (result != NULL) {
978         return result;
979     }
980 
981 error:
982     OutputBuffer_OnError(&buffer);
983     return NULL;
984 }
985 
986 static PyObject *
decompress(Decompressor * d,uint8_t * data,size_t len,Py_ssize_t max_length)987 decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
988 {
989     char input_buffer_in_use;
990     PyObject *result;
991     lzma_stream *lzs = &d->lzs;
992 
993     /* Prepend unconsumed input if necessary */
994     if (lzs->next_in != NULL) {
995         size_t avail_now, avail_total;
996 
997         /* Number of bytes we can append to input buffer */
998         avail_now = (d->input_buffer + d->input_buffer_size)
999             - (lzs->next_in + lzs->avail_in);
1000 
1001         /* Number of bytes we can append if we move existing
1002            contents to beginning of buffer (overwriting
1003            consumed input) */
1004         avail_total = d->input_buffer_size - lzs->avail_in;
1005 
1006         if (avail_total < len) {
1007             size_t offset = lzs->next_in - d->input_buffer;
1008             uint8_t *tmp;
1009             size_t new_size = d->input_buffer_size + len - avail_now;
1010 
1011             /* Assign to temporary variable first, so we don't
1012                lose address of allocated buffer if realloc fails */
1013             tmp = PyMem_Realloc(d->input_buffer, new_size);
1014             if (tmp == NULL) {
1015                 PyErr_SetNone(PyExc_MemoryError);
1016                 return NULL;
1017             }
1018             d->input_buffer = tmp;
1019             d->input_buffer_size = new_size;
1020 
1021             lzs->next_in = d->input_buffer + offset;
1022         }
1023         else if (avail_now < len) {
1024             memmove(d->input_buffer, lzs->next_in,
1025                     lzs->avail_in);
1026             lzs->next_in = d->input_buffer;
1027         }
1028         memcpy((void*)(lzs->next_in + lzs->avail_in), data, len);
1029         lzs->avail_in += len;
1030         input_buffer_in_use = 1;
1031     }
1032     else {
1033         lzs->next_in = data;
1034         lzs->avail_in = len;
1035         input_buffer_in_use = 0;
1036     }
1037 
1038     result = decompress_buf(d, max_length);
1039     if (result == NULL) {
1040         lzs->next_in = NULL;
1041         return NULL;
1042     }
1043 
1044     if (d->eof) {
1045         d->needs_input = 0;
1046         if (lzs->avail_in > 0) {
1047             Py_XSETREF(d->unused_data,
1048                       PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
1049             if (d->unused_data == NULL) {
1050                 goto error;
1051             }
1052         }
1053     }
1054     else if (lzs->avail_in == 0) {
1055         lzs->next_in = NULL;
1056 
1057         if (lzs->avail_out == 0) {
1058             /* (avail_in==0 && avail_out==0)
1059                Maybe lzs's internal state still have a few bytes can
1060                be output, try to output them next time. */
1061             d->needs_input = 0;
1062 
1063             /* If max_length < 0, lzs->avail_out always > 0 */
1064             assert(max_length >= 0);
1065         } else {
1066             /* Input buffer exhausted, output buffer has space. */
1067             d->needs_input = 1;
1068         }
1069     }
1070     else {
1071         d->needs_input = 0;
1072 
1073         /* If we did not use the input buffer, we now have
1074            to copy the tail from the caller's buffer into the
1075            input buffer */
1076         if (!input_buffer_in_use) {
1077 
1078             /* Discard buffer if it's too small
1079                (resizing it may needlessly copy the current contents) */
1080             if (d->input_buffer != NULL &&
1081                 d->input_buffer_size < lzs->avail_in) {
1082                 PyMem_Free(d->input_buffer);
1083                 d->input_buffer = NULL;
1084             }
1085 
1086             /* Allocate if necessary */
1087             if (d->input_buffer == NULL) {
1088                 d->input_buffer = PyMem_Malloc(lzs->avail_in);
1089                 if (d->input_buffer == NULL) {
1090                     PyErr_SetNone(PyExc_MemoryError);
1091                     goto error;
1092                 }
1093                 d->input_buffer_size = lzs->avail_in;
1094             }
1095 
1096             /* Copy tail */
1097             memcpy(d->input_buffer, lzs->next_in, lzs->avail_in);
1098             lzs->next_in = d->input_buffer;
1099         }
1100     }
1101 
1102     return result;
1103 
1104 error:
1105     Py_XDECREF(result);
1106     return NULL;
1107 }
1108 
1109 /*[clinic input]
1110 _lzma.LZMADecompressor.decompress
1111 
1112     data: Py_buffer
1113     max_length: Py_ssize_t=-1
1114 
1115 Decompress *data*, returning uncompressed data as bytes.
1116 
1117 If *max_length* is nonnegative, returns at most *max_length* bytes of
1118 decompressed data. If this limit is reached and further output can be
1119 produced, *self.needs_input* will be set to ``False``. In this case, the next
1120 call to *decompress()* may provide *data* as b'' to obtain more of the output.
1121 
1122 If all of the input data was decompressed and returned (either because this
1123 was less than *max_length* bytes, or because *max_length* was negative),
1124 *self.needs_input* will be set to True.
1125 
1126 Attempting to decompress data after the end of stream is reached raises an
1127 EOFError.  Any data found after the end of the stream is ignored and saved in
1128 the unused_data attribute.
1129 [clinic start generated code]*/
1130 
1131 static PyObject *
_lzma_LZMADecompressor_decompress_impl(Decompressor * self,Py_buffer * data,Py_ssize_t max_length)1132 _lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data,
1133                                        Py_ssize_t max_length)
1134 /*[clinic end generated code: output=ef4e20ec7122241d input=60c1f135820e309d]*/
1135 {
1136     PyObject *result = NULL;
1137 
1138     ACQUIRE_LOCK(self);
1139     if (self->eof)
1140         PyErr_SetString(PyExc_EOFError, "Already at end of stream");
1141     else
1142         result = decompress(self, data->buf, data->len, max_length);
1143     RELEASE_LOCK(self);
1144     return result;
1145 }
1146 
1147 static int
Decompressor_init_raw(_lzma_state * state,lzma_stream * lzs,PyObject * filterspecs)1148 Decompressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
1149 {
1150     lzma_filter filters[LZMA_FILTERS_MAX + 1];
1151     lzma_ret lzret;
1152 
1153     if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
1154         return -1;
1155     }
1156     lzret = lzma_raw_decoder(lzs, filters);
1157     free_filter_chain(filters);
1158     if (catch_lzma_error(state, lzret)) {
1159         return -1;
1160     }
1161     else {
1162         return 0;
1163     }
1164 }
1165 
1166 /*[clinic input]
1167 _lzma.LZMADecompressor.__init__
1168 
1169     format: int(c_default="FORMAT_AUTO") = FORMAT_AUTO
1170         Specifies the container format of the input stream.  If this is
1171         FORMAT_AUTO (the default), the decompressor will automatically detect
1172         whether the input is FORMAT_XZ or FORMAT_ALONE.  Streams created with
1173         FORMAT_RAW cannot be autodetected.
1174 
1175     memlimit: object = None
1176         Limit the amount of memory used by the decompressor.  This will cause
1177         decompression to fail if the input cannot be decompressed within the
1178         given limit.
1179 
1180     filters: object = None
1181         A custom filter chain.  This argument is required for FORMAT_RAW, and
1182         not accepted with any other format.  When provided, this should be a
1183         sequence of dicts, each indicating the ID and options for a single
1184         filter.
1185 
1186 Create a decompressor object for decompressing data incrementally.
1187 
1188 For one-shot decompression, use the decompress() function instead.
1189 [clinic start generated code]*/
1190 
1191 static int
_lzma_LZMADecompressor___init___impl(Decompressor * self,int format,PyObject * memlimit,PyObject * filters)1192 _lzma_LZMADecompressor___init___impl(Decompressor *self, int format,
1193                                      PyObject *memlimit, PyObject *filters)
1194 /*[clinic end generated code: output=3e1821f8aa36564c input=81fe684a6c2f8a27]*/
1195 {
1196     const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
1197     uint64_t memlimit_ = UINT64_MAX;
1198     lzma_ret lzret;
1199     _lzma_state *state = PyType_GetModuleState(Py_TYPE(self));
1200     assert(state != NULL);
1201 
1202     if (memlimit != Py_None) {
1203         if (format == FORMAT_RAW) {
1204             PyErr_SetString(PyExc_ValueError,
1205                             "Cannot specify memory limit with FORMAT_RAW");
1206             return -1;
1207         }
1208         memlimit_ = PyLong_AsUnsignedLongLong(memlimit);
1209         if (PyErr_Occurred()) {
1210             return -1;
1211         }
1212     }
1213 
1214     if (format == FORMAT_RAW && filters == Py_None) {
1215         PyErr_SetString(PyExc_ValueError,
1216                         "Must specify filters for FORMAT_RAW");
1217         return -1;
1218     } else if (format != FORMAT_RAW && filters != Py_None) {
1219         PyErr_SetString(PyExc_ValueError,
1220                         "Cannot specify filters except with FORMAT_RAW");
1221         return -1;
1222     }
1223 
1224     self->alloc.opaque = NULL;
1225     self->alloc.alloc = PyLzma_Malloc;
1226     self->alloc.free = PyLzma_Free;
1227     self->lzs.allocator = &self->alloc;
1228     self->lzs.next_in = NULL;
1229 
1230     PyThread_type_lock lock = PyThread_allocate_lock();
1231     if (lock == NULL) {
1232         PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
1233         return -1;
1234     }
1235     if (self->lock != NULL) {
1236         PyThread_free_lock(self->lock);
1237     }
1238     self->lock = lock;
1239 
1240     self->check = LZMA_CHECK_UNKNOWN;
1241     self->needs_input = 1;
1242     self->input_buffer = NULL;
1243     self->input_buffer_size = 0;
1244     Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
1245     if (self->unused_data == NULL) {
1246         goto error;
1247     }
1248 
1249     switch (format) {
1250         case FORMAT_AUTO:
1251             lzret = lzma_auto_decoder(&self->lzs, memlimit_, decoder_flags);
1252             if (catch_lzma_error(state, lzret)) {
1253                 break;
1254             }
1255             return 0;
1256 
1257         case FORMAT_XZ:
1258             lzret = lzma_stream_decoder(&self->lzs, memlimit_, decoder_flags);
1259             if (catch_lzma_error(state, lzret)) {
1260                 break;
1261             }
1262             return 0;
1263 
1264         case FORMAT_ALONE:
1265             self->check = LZMA_CHECK_NONE;
1266             lzret = lzma_alone_decoder(&self->lzs, memlimit_);
1267             if (catch_lzma_error(state, lzret)) {
1268                 break;
1269             }
1270             return 0;
1271 
1272         case FORMAT_RAW:
1273             self->check = LZMA_CHECK_NONE;
1274             if (Decompressor_init_raw(state, &self->lzs, filters) == -1) {
1275                 break;
1276             }
1277             return 0;
1278 
1279         default:
1280             PyErr_Format(PyExc_ValueError,
1281                          "Invalid container format: %d", format);
1282             break;
1283     }
1284 
1285 error:
1286     Py_CLEAR(self->unused_data);
1287     PyThread_free_lock(self->lock);
1288     self->lock = NULL;
1289     return -1;
1290 }
1291 
1292 static void
Decompressor_dealloc(Decompressor * self)1293 Decompressor_dealloc(Decompressor *self)
1294 {
1295     if(self->input_buffer != NULL)
1296         PyMem_Free(self->input_buffer);
1297 
1298     lzma_end(&self->lzs);
1299     Py_CLEAR(self->unused_data);
1300     if (self->lock != NULL) {
1301         PyThread_free_lock(self->lock);
1302     }
1303     PyTypeObject *tp = Py_TYPE(self);
1304     tp->tp_free((PyObject *)self);
1305     Py_DECREF(tp);
1306 }
1307 
1308 static int
Decompressor_traverse(Decompressor * self,visitproc visit,void * arg)1309 Decompressor_traverse(Decompressor *self, visitproc visit, void *arg)
1310 {
1311     Py_VISIT(Py_TYPE(self));
1312     return 0;
1313 }
1314 
1315 static PyMethodDef Decompressor_methods[] = {
1316     _LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF
1317     {NULL}
1318 };
1319 
1320 PyDoc_STRVAR(Decompressor_check_doc,
1321 "ID of the integrity check used by the input stream.");
1322 
1323 PyDoc_STRVAR(Decompressor_eof_doc,
1324 "True if the end-of-stream marker has been reached.");
1325 
1326 PyDoc_STRVAR(Decompressor_needs_input_doc,
1327 "True if more input is needed before more decompressed data can be produced.");
1328 
1329 PyDoc_STRVAR(Decompressor_unused_data_doc,
1330 "Data found after the end of the compressed stream.");
1331 
1332 static PyMemberDef Decompressor_members[] = {
1333     {"check", T_INT, offsetof(Decompressor, check), READONLY,
1334      Decompressor_check_doc},
1335     {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
1336      Decompressor_eof_doc},
1337     {"needs_input", T_BOOL, offsetof(Decompressor, needs_input), READONLY,
1338      Decompressor_needs_input_doc},
1339     {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
1340      Decompressor_unused_data_doc},
1341     {NULL}
1342 };
1343 
1344 static PyType_Slot lzma_decompressor_type_slots[] = {
1345     {Py_tp_dealloc, Decompressor_dealloc},
1346     {Py_tp_methods, Decompressor_methods},
1347     {Py_tp_init, _lzma_LZMADecompressor___init__},
1348     {Py_tp_new, PyType_GenericNew},
1349     {Py_tp_doc, (char *)_lzma_LZMADecompressor___init____doc__},
1350     {Py_tp_traverse, Decompressor_traverse},
1351     {Py_tp_members, Decompressor_members},
1352     {0, 0}
1353 };
1354 
1355 static PyType_Spec lzma_decompressor_type_spec = {
1356     .name = "_lzma.LZMADecompressor",
1357     .basicsize = sizeof(Decompressor),
1358     // Calling PyType_GetModuleState() on a subclass is not safe.
1359     // lzma_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
1360     // which prevents to create a subclass.
1361     // So calling PyType_GetModuleState() in this file is always safe.
1362     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
1363     .slots = lzma_decompressor_type_slots,
1364 };
1365 
1366 
1367 /* Module-level functions. */
1368 
1369 /*[clinic input]
1370 _lzma.is_check_supported
1371     check_id: int
1372     /
1373 
1374 Test whether the given integrity check is supported.
1375 
1376 Always returns True for CHECK_NONE and CHECK_CRC32.
1377 [clinic start generated code]*/
1378 
1379 static PyObject *
_lzma_is_check_supported_impl(PyObject * module,int check_id)1380 _lzma_is_check_supported_impl(PyObject *module, int check_id)
1381 /*[clinic end generated code: output=e4f14ba3ce2ad0a5 input=5518297b97b2318f]*/
1382 {
1383     return PyBool_FromLong(lzma_check_is_supported(check_id));
1384 }
1385 
1386 PyDoc_STRVAR(_lzma__encode_filter_properties__doc__,
1387 "_encode_filter_properties($module, filter, /)\n"
1388 "--\n"
1389 "\n"
1390 "Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).\n"
1391 "\n"
1392 "The result does not include the filter ID itself, only the options.");
1393 
1394 #define _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF    \
1395     {"_encode_filter_properties", (PyCFunction)_lzma__encode_filter_properties, METH_O, _lzma__encode_filter_properties__doc__},
1396 
1397 static PyObject *
1398 _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter);
1399 
1400 static PyObject *
_lzma__encode_filter_properties(PyObject * module,PyObject * arg)1401 _lzma__encode_filter_properties(PyObject *module, PyObject *arg)
1402 {
1403     PyObject *return_value = NULL;
1404     lzma_filter filter = {LZMA_VLI_UNKNOWN, NULL};
1405     _lzma_state *state = get_lzma_state(module);
1406     assert(state != NULL);
1407     if (!lzma_filter_converter(state, arg, &filter)) {
1408         goto exit;
1409     }
1410     return_value = _lzma__encode_filter_properties_impl(module, filter);
1411 
1412 exit:
1413     /* Cleanup for filter */
1414     if (filter.id != LZMA_VLI_UNKNOWN) {
1415        PyMem_Free(filter.options);
1416     }
1417 
1418     return return_value;
1419 }
1420 
1421 static PyObject *
_lzma__encode_filter_properties_impl(PyObject * module,lzma_filter filter)1422 _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter)
1423 {
1424     lzma_ret lzret;
1425     uint32_t encoded_size;
1426     PyObject *result = NULL;
1427     _lzma_state *state = get_lzma_state(module);
1428     assert(state != NULL);
1429 
1430     lzret = lzma_properties_size(&encoded_size, &filter);
1431     if (catch_lzma_error(state, lzret))
1432         goto error;
1433 
1434     result = PyBytes_FromStringAndSize(NULL, encoded_size);
1435     if (result == NULL)
1436         goto error;
1437 
1438     lzret = lzma_properties_encode(
1439             &filter, (uint8_t *)PyBytes_AS_STRING(result));
1440     if (catch_lzma_error(state, lzret)) {
1441         goto error;
1442     }
1443 
1444     return result;
1445 
1446 error:
1447     Py_XDECREF(result);
1448     return NULL;
1449 }
1450 
1451 
1452 /*[clinic input]
1453 _lzma._decode_filter_properties
1454     filter_id: lzma_vli
1455     encoded_props: Py_buffer
1456     /
1457 
1458 Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).
1459 
1460 The result does not include the filter ID itself, only the options.
1461 [clinic start generated code]*/
1462 
1463 static PyObject *
_lzma__decode_filter_properties_impl(PyObject * module,lzma_vli filter_id,Py_buffer * encoded_props)1464 _lzma__decode_filter_properties_impl(PyObject *module, lzma_vli filter_id,
1465                                      Py_buffer *encoded_props)
1466 /*[clinic end generated code: output=714fd2ef565d5c60 input=246410800782160c]*/
1467 {
1468     lzma_filter filter;
1469     lzma_ret lzret;
1470     PyObject *result = NULL;
1471     filter.id = filter_id;
1472     _lzma_state *state = get_lzma_state(module);
1473     assert(state != NULL);
1474 
1475     lzret = lzma_properties_decode(
1476             &filter, NULL, encoded_props->buf, encoded_props->len);
1477     if (catch_lzma_error(state, lzret)) {
1478         return NULL;
1479     }
1480 
1481     result = build_filter_spec(&filter);
1482 
1483     /* We use vanilla free() here instead of PyMem_Free() - filter.options was
1484        allocated by lzma_properties_decode() using the default allocator. */
1485     free(filter.options);
1486     return result;
1487 }
1488 
1489 /* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
1490    would not work correctly on platforms with 32-bit longs. */
1491 static int
module_add_int_constant(PyObject * m,const char * name,long long value)1492 module_add_int_constant(PyObject *m, const char *name, long long value)
1493 {
1494     PyObject *o = PyLong_FromLongLong(value);
1495     if (o == NULL) {
1496         return -1;
1497     }
1498     if (PyModule_AddObject(m, name, o) == 0) {
1499         return 0;
1500     }
1501     Py_DECREF(o);
1502     return -1;
1503 }
1504 
1505 static int
lzma_exec(PyObject * module)1506 lzma_exec(PyObject *module)
1507 {
1508 #define ADD_INT_PREFIX_MACRO(module, macro)                                 \
1509     do {                                                                    \
1510         if (module_add_int_constant(module, #macro, LZMA_ ## macro) < 0) {  \
1511             return -1;                                                      \
1512         }                                                                   \
1513     } while(0)
1514 
1515 #define ADD_INT_MACRO(module, macro)                                        \
1516     do {                                                                    \
1517         if (PyModule_AddIntMacro(module, macro) < 0) {                      \
1518             return -1;                                                      \
1519         }                                                                   \
1520     } while (0)
1521 
1522 
1523     _lzma_state *state = get_lzma_state(module);
1524 
1525     state->empty_tuple = PyTuple_New(0);
1526     if (state->empty_tuple == NULL) {
1527         return -1;
1528     }
1529 
1530     ADD_INT_MACRO(module, FORMAT_AUTO);
1531     ADD_INT_MACRO(module, FORMAT_XZ);
1532     ADD_INT_MACRO(module, FORMAT_ALONE);
1533     ADD_INT_MACRO(module, FORMAT_RAW);
1534     ADD_INT_PREFIX_MACRO(module, CHECK_NONE);
1535     ADD_INT_PREFIX_MACRO(module, CHECK_CRC32);
1536     ADD_INT_PREFIX_MACRO(module, CHECK_CRC64);
1537     ADD_INT_PREFIX_MACRO(module, CHECK_SHA256);
1538     ADD_INT_PREFIX_MACRO(module, CHECK_ID_MAX);
1539     ADD_INT_PREFIX_MACRO(module, CHECK_UNKNOWN);
1540     ADD_INT_PREFIX_MACRO(module, FILTER_LZMA1);
1541     ADD_INT_PREFIX_MACRO(module, FILTER_LZMA2);
1542     ADD_INT_PREFIX_MACRO(module, FILTER_DELTA);
1543     ADD_INT_PREFIX_MACRO(module, FILTER_X86);
1544     ADD_INT_PREFIX_MACRO(module, FILTER_IA64);
1545     ADD_INT_PREFIX_MACRO(module, FILTER_ARM);
1546     ADD_INT_PREFIX_MACRO(module, FILTER_ARMTHUMB);
1547     ADD_INT_PREFIX_MACRO(module, FILTER_SPARC);
1548     ADD_INT_PREFIX_MACRO(module, FILTER_POWERPC);
1549     ADD_INT_PREFIX_MACRO(module, MF_HC3);
1550     ADD_INT_PREFIX_MACRO(module, MF_HC4);
1551     ADD_INT_PREFIX_MACRO(module, MF_BT2);
1552     ADD_INT_PREFIX_MACRO(module, MF_BT3);
1553     ADD_INT_PREFIX_MACRO(module, MF_BT4);
1554     ADD_INT_PREFIX_MACRO(module, MODE_FAST);
1555     ADD_INT_PREFIX_MACRO(module, MODE_NORMAL);
1556     ADD_INT_PREFIX_MACRO(module, PRESET_DEFAULT);
1557     ADD_INT_PREFIX_MACRO(module, PRESET_EXTREME);
1558 
1559     state->error = PyErr_NewExceptionWithDoc("_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1560     if (state->error == NULL) {
1561         return -1;
1562     }
1563 
1564     if (PyModule_AddType(module, (PyTypeObject *)state->error) < 0) {
1565         return -1;
1566     }
1567 
1568 
1569     state->lzma_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1570                                                             &lzma_compressor_type_spec, NULL);
1571     if (state->lzma_compressor_type == NULL) {
1572         return -1;
1573     }
1574 
1575     if (PyModule_AddType(module, state->lzma_compressor_type) < 0) {
1576         return -1;
1577     }
1578 
1579     state->lzma_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1580                                                          &lzma_decompressor_type_spec, NULL);
1581     if (state->lzma_decompressor_type == NULL) {
1582         return -1;
1583     }
1584 
1585     if (PyModule_AddType(module, state->lzma_decompressor_type) < 0) {
1586         return -1;
1587     }
1588 
1589     return 0;
1590 }
1591 
1592 static PyMethodDef lzma_methods[] = {
1593     _LZMA_IS_CHECK_SUPPORTED_METHODDEF
1594     _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
1595     _LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
1596     {NULL}
1597 };
1598 
1599 static PyModuleDef_Slot lzma_slots[] = {
1600     {Py_mod_exec, lzma_exec},
1601     {0, NULL}
1602 };
1603 
1604 static int
lzma_traverse(PyObject * module,visitproc visit,void * arg)1605 lzma_traverse(PyObject *module, visitproc visit, void *arg)
1606 {
1607     _lzma_state *state = get_lzma_state(module);
1608     Py_VISIT(state->lzma_compressor_type);
1609     Py_VISIT(state->lzma_decompressor_type);
1610     Py_VISIT(state->error);
1611     Py_VISIT(state->empty_tuple);
1612     return 0;
1613 }
1614 
1615 static int
lzma_clear(PyObject * module)1616 lzma_clear(PyObject *module)
1617 {
1618     _lzma_state *state = get_lzma_state(module);
1619     Py_CLEAR(state->lzma_compressor_type);
1620     Py_CLEAR(state->lzma_decompressor_type);
1621     Py_CLEAR(state->error);
1622     Py_CLEAR(state->empty_tuple);
1623     return 0;
1624 }
1625 
1626 static void
lzma_free(void * module)1627 lzma_free(void *module)
1628 {
1629     lzma_clear((PyObject *)module);
1630 }
1631 
1632 static PyModuleDef _lzmamodule = {
1633     PyModuleDef_HEAD_INIT,
1634     .m_name = "_lzma",
1635     .m_size = sizeof(_lzma_state),
1636     .m_methods = lzma_methods,
1637     .m_slots = lzma_slots,
1638     .m_traverse = lzma_traverse,
1639     .m_clear = lzma_clear,
1640     .m_free = lzma_free,
1641 };
1642 
1643 PyMODINIT_FUNC
PyInit__lzma(void)1644 PyInit__lzma(void)
1645 {
1646     return PyModuleDef_Init(&_lzmamodule);
1647 }
1648